[X86][Haswell] Updating HSW instruction scheduling information

This patch completely replaces the instruction scheduling information for the Haswell architecture target by modifying the file X86SchedHaswell.td located under the X86 Target.
We used the scheduling information retrieved from the Haswell architects in order to replace and modify the existing scheduling.
The patch continues the scheduling replacement effort started with the SNB target in r307529 and r310792.
Information includes latency, number of micro-Ops and used ports by each HSW instruction.

Please expect some performance fluctuations due to code alignment effects.

Reviewers: RKSimon, zvi, aymanmus, craig.topper, m_zuckerman, igorb, dim, chandlerc, aaboud

Differential Revision: https://reviews.llvm.org/D36663

llvm-svn: 311879
This commit is contained in:
Gadi Haber 2017-08-28 10:04:16 +00:00
parent 60608a8ae5
commit d76f7b824e
40 changed files with 14920 additions and 13892 deletions

File diff suppressed because it is too large Load Diff

View File

@ -32,7 +32,7 @@ define <2 x i64> @test_aesdec(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; HASWELL: # BB#0:
; HASWELL-NEXT: vaesdec %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
; HASWELL-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_aesdec:
; BTVER2: # BB#0:
@ -75,7 +75,7 @@ define <2 x i64> @test_aesdeclast(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2)
; HASWELL: # BB#0:
; HASWELL-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
; HASWELL-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_aesdeclast:
; BTVER2: # BB#0:
@ -118,7 +118,7 @@ define <2 x i64> @test_aesenc(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; HASWELL: # BB#0:
; HASWELL-NEXT: vaesenc %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
; HASWELL-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_aesenc:
; BTVER2: # BB#0:
@ -161,7 +161,7 @@ define <2 x i64> @test_aesenclast(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2)
; HASWELL: # BB#0:
; HASWELL-NEXT: vaesenclast %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
; HASWELL-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_aesenclast:
; BTVER2: # BB#0:
@ -208,7 +208,7 @@ define <2 x i64> @test_aesimc(<2 x i64> %a0, <2 x i64> *%a1) {
; HASWELL-NEXT: vaesimc %xmm0, %xmm0 # sched: [14:2.00]
; HASWELL-NEXT: vaesimc (%rdi), %xmm1 # sched: [14:2.00]
; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_aesimc:
; BTVER2: # BB#0:
@ -255,10 +255,10 @@ define <2 x i64> @test_aeskeygenassist(<2 x i64> %a0, <2 x i64> *%a1) {
;
; HASWELL-LABEL: test_aeskeygenassist:
; HASWELL: # BB#0:
; HASWELL-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 # sched: [10:8.00]
; HASWELL-NEXT: vaeskeygenassist $7, (%rdi), %xmm1 # sched: [10:7.00]
; HASWELL-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 # sched: [29:7.00]
; HASWELL-NEXT: vaeskeygenassist $7, (%rdi), %xmm1 # sched: [28:7.00]
; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_aeskeygenassist:
; BTVER2: # BB#0:

View File

@ -23,8 +23,8 @@ define <4 x double> @test_addpd(<4 x double> %a0, <4 x double> %a1, <4 x double>
; HASWELL-LABEL: test_addpd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_addpd:
; BTVER2: # BB#0:
@ -59,8 +59,8 @@ define <8 x float> @test_addps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a
; HASWELL-LABEL: test_addps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_addps:
; BTVER2: # BB#0:
@ -95,8 +95,8 @@ define <4 x double> @test_addsubpd(<4 x double> %a0, <4 x double> %a1, <4 x doub
; HASWELL-LABEL: test_addsubpd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_addsubpd:
; BTVER2: # BB#0:
@ -132,8 +132,8 @@ define <8 x float> @test_addsubps(<8 x float> %a0, <8 x float> %a1, <8 x float>
; HASWELL-LABEL: test_addsubps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_addsubps:
; BTVER2: # BB#0:
@ -171,9 +171,9 @@ define <4 x double> @test_andnotpd(<4 x double> %a0, <4 x double> %a1, <4 x doub
; HASWELL-LABEL: test_andnotpd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
; HASWELL-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
; HASWELL-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_andnotpd:
; BTVER2: # BB#0:
@ -219,9 +219,9 @@ define <8 x float> @test_andnotps(<8 x float> %a0, <8 x float> %a1, <8 x float>
; HASWELL-LABEL: test_andnotps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
; HASWELL-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
; HASWELL-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_andnotps:
; BTVER2: # BB#0:
@ -267,9 +267,9 @@ define <4 x double> @test_andpd(<4 x double> %a0, <4 x double> %a1, <4 x double>
; HASWELL-LABEL: test_andpd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
; HASWELL-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
; HASWELL-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_andpd:
; BTVER2: # BB#0:
@ -313,9 +313,9 @@ define <8 x float> @test_andps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a
; HASWELL-LABEL: test_andps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
; HASWELL-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
; HASWELL-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_andps:
; BTVER2: # BB#0:
@ -360,8 +360,8 @@ define <4 x double> @test_blendpd(<4 x double> %a0, <4 x double> %a1, <4 x doubl
; HASWELL: # BB#0:
; HASWELL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33]
; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_blendpd:
; BTVER2: # BB#0:
@ -399,8 +399,8 @@ define <8 x float> @test_blendps(<8 x float> %a0, <8 x float> %a1, <8 x float> *
; HASWELL-LABEL: test_blendps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.33]
; HASWELL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_blendps:
; BTVER2: # BB#0:
@ -435,8 +435,8 @@ define <4 x double> @test_blendvpd(<4 x double> %a0, <4 x double> %a1, <4 x doub
; HASWELL-LABEL: test_blendvpd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
; HASWELL-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [6:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [2:2.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_blendvpd:
; BTVER2: # BB#0:
@ -472,8 +472,8 @@ define <8 x float> @test_blendvps(<8 x float> %a0, <8 x float> %a1, <8 x float>
; HASWELL-LABEL: test_blendvps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
; HASWELL-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [6:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [2:2.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_blendvps:
; BTVER2: # BB#0:
@ -506,8 +506,8 @@ define <8 x float> @test_broadcastf128(<4 x float> *%a0) {
;
; HASWELL-LABEL: test_broadcastf128:
; HASWELL: # BB#0:
; HASWELL-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [4:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_broadcastf128:
; BTVER2: # BB#0:
@ -536,8 +536,8 @@ define <4 x double> @test_broadcastsd_ymm(double *%a0) {
;
; HASWELL-LABEL: test_broadcastsd_ymm:
; HASWELL: # BB#0:
; HASWELL-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_broadcastsd_ymm:
; BTVER2: # BB#0:
@ -567,8 +567,8 @@ define <4 x float> @test_broadcastss(float *%a0) {
;
; HASWELL-LABEL: test_broadcastss:
; HASWELL: # BB#0:
; HASWELL-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [4:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_broadcastss:
; BTVER2: # BB#0:
@ -598,8 +598,8 @@ define <8 x float> @test_broadcastss_ymm(float *%a0) {
;
; HASWELL-LABEL: test_broadcastss_ymm:
; HASWELL: # BB#0:
; HASWELL-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_broadcastss_ymm:
; BTVER2: # BB#0:
@ -634,9 +634,9 @@ define <4 x double> @test_cmppd(<4 x double> %a0, <4 x double> %a1, <4 x double>
; HASWELL-LABEL: test_cmppd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
; HASWELL-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
; HASWELL-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_cmppd:
; BTVER2: # BB#0:
@ -679,9 +679,9 @@ define <8 x float> @test_cmpps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a
; HASWELL-LABEL: test_cmpps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
; HASWELL-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
; HASWELL-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_cmpps:
; BTVER2: # BB#0:
@ -724,9 +724,9 @@ define <4 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) {
; HASWELL-LABEL: test_cvtdq2pd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [6:1.00]
; HASWELL-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [8:1.00]
; HASWELL-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [6:1.00]
; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_cvtdq2pd:
; BTVER2: # BB#0:
@ -767,10 +767,10 @@ define <8 x float> @test_cvtdq2ps(<8 x i32> %a0, <8 x i32> *%a1) {
;
; HASWELL-LABEL: test_cvtdq2ps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:1.00]
; HASWELL-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [8:1.00]
; HASWELL-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [3:1.00]
; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_cvtdq2ps:
; BTVER2: # BB#0:
@ -810,9 +810,9 @@ define <8 x i32> @test_cvtpd2dq(<4 x double> %a0, <4 x double> *%a1) {
; HASWELL-LABEL: test_cvtpd2dq:
; HASWELL: # BB#0:
; HASWELL-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [6:1.00]
; HASWELL-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [10:1.00]
; HASWELL-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [7:1.00]
; HASWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_cvtpd2dq:
; BTVER2: # BB#0:
@ -851,10 +851,10 @@ define <8 x float> @test_cvtpd2ps(<4 x double> %a0, <4 x double> *%a1) {
;
; HASWELL-LABEL: test_cvtpd2ps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [9:1.00]
; HASWELL-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [6:1.00]
; HASWELL-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [7:1.00]
; HASWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_cvtpd2ps:
; BTVER2: # BB#0:
@ -894,9 +894,9 @@ define <8 x i32> @test_cvtps2dq(<8 x float> %a0, <8 x float> *%a1) {
; HASWELL-LABEL: test_cvtps2dq:
; HASWELL: # BB#0:
; HASWELL-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [7:1.00]
; HASWELL-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [3:1.00]
; HASWELL-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_cvtps2dq:
; BTVER2: # BB#0:
@ -933,9 +933,9 @@ define <4 x double> @test_divpd(<4 x double> %a0, <4 x double> %a1, <4 x double>
;
; HASWELL-LABEL: test_divpd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [27:2.00]
; HASWELL-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [31:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [35:2.00]
; HASWELL-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [35:2.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_divpd:
; BTVER2: # BB#0:
@ -969,9 +969,9 @@ define <8 x float> @test_divps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a
;
; HASWELL-LABEL: test_divps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [19:2.00]
; HASWELL-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [23:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [21:2.00]
; HASWELL-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [21:2.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_divps:
; BTVER2: # BB#0:
@ -1006,8 +1006,8 @@ define <8 x float> @test_dpps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2
; HASWELL-LABEL: test_dpps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [14:2.00]
; HASWELL-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [18:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [14:2.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_dpps:
; BTVER2: # BB#0:
@ -1045,9 +1045,9 @@ define <4 x float> @test_extractf128(<8 x float> %a0, <8 x float> %a1, <4 x floa
; HASWELL-LABEL: test_extractf128:
; HASWELL: # BB#0:
; HASWELL-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [4:1.00]
; HASWELL-NEXT: vzeroupper # sched: [1:?]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00]
; HASWELL-NEXT: vzeroupper # sched: [4:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_extractf128:
; BTVER2: # BB#0:
@ -1083,8 +1083,8 @@ define <4 x double> @test_haddpd(<4 x double> %a0, <4 x double> %a1, <4 x double
; HASWELL-LABEL: test_haddpd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
; HASWELL-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [5:2.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_haddpd:
; BTVER2: # BB#0:
@ -1120,8 +1120,8 @@ define <8 x float> @test_haddps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%
; HASWELL-LABEL: test_haddps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
; HASWELL-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [5:2.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_haddps:
; BTVER2: # BB#0:
@ -1157,8 +1157,8 @@ define <4 x double> @test_hsubpd(<4 x double> %a0, <4 x double> %a1, <4 x double
; HASWELL-LABEL: test_hsubpd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
; HASWELL-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [5:2.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_hsubpd:
; BTVER2: # BB#0:
@ -1194,8 +1194,8 @@ define <8 x float> @test_hsubps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%
; HASWELL-LABEL: test_hsubps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
; HASWELL-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [5:2.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_hsubps:
; BTVER2: # BB#0:
@ -1233,9 +1233,9 @@ define <8 x float> @test_insertf128(<8 x float> %a0, <4 x float> %a1, <4 x float
; HASWELL-LABEL: test_insertf128:
; HASWELL: # BB#0:
; HASWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
; HASWELL-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_insertf128:
; BTVER2: # BB#0:
@ -1272,8 +1272,8 @@ define <32 x i8> @test_lddqu(i8* %a0) {
;
; HASWELL-LABEL: test_lddqu:
; HASWELL: # BB#0:
; HASWELL-NEXT: vlddqu (%rdi), %ymm0 # sched: [4:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vlddqu (%rdi), %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_lddqu:
; BTVER2: # BB#0:
@ -1306,10 +1306,10 @@ define <2 x double> @test_maskmovpd(i8* %a0, <2 x i64> %a1, <2 x double> %a2) {
;
; HASWELL-LABEL: test_maskmovpd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [4:2.00]
; HASWELL-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [13:1.00]
; HASWELL-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [2:2.00]
; HASWELL-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [4:1.00]
; HASWELL-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_maskmovpd:
; BTVER2: # BB#0:
@ -1348,10 +1348,10 @@ define <4 x double> @test_maskmovpd_ymm(i8* %a0, <4 x i64> %a1, <4 x double> %a2
;
; HASWELL-LABEL: test_maskmovpd_ymm:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [4:2.00]
; HASWELL-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [14:1.00]
; HASWELL-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [2:2.00]
; HASWELL-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [4:1.00]
; HASWELL-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_maskmovpd_ymm:
; BTVER2: # BB#0:
@ -1390,10 +1390,10 @@ define <4 x float> @test_maskmovps(i8* %a0, <4 x i32> %a1, <4 x float> %a2) {
;
; HASWELL-LABEL: test_maskmovps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [4:2.00]
; HASWELL-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [13:1.00]
; HASWELL-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [2:2.00]
; HASWELL-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [4:1.00]
; HASWELL-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_maskmovps:
; BTVER2: # BB#0:
@ -1432,10 +1432,10 @@ define <8 x float> @test_maskmovps_ymm(i8* %a0, <8 x i32> %a1, <8 x float> %a2)
;
; HASWELL-LABEL: test_maskmovps_ymm:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [4:2.00]
; HASWELL-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [14:1.00]
; HASWELL-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [2:2.00]
; HASWELL-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [4:1.00]
; HASWELL-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_maskmovps_ymm:
; BTVER2: # BB#0:
@ -1473,8 +1473,8 @@ define <4 x double> @test_maxpd(<4 x double> %a0, <4 x double> %a1, <4 x double>
; HASWELL-LABEL: test_maxpd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_maxpd:
; BTVER2: # BB#0:
@ -1510,8 +1510,8 @@ define <8 x float> @test_maxps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a
; HASWELL-LABEL: test_maxps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_maxps:
; BTVER2: # BB#0:
@ -1547,8 +1547,8 @@ define <4 x double> @test_minpd(<4 x double> %a0, <4 x double> %a1, <4 x double>
; HASWELL-LABEL: test_minpd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_minpd:
; BTVER2: # BB#0:
@ -1584,8 +1584,8 @@ define <8 x float> @test_minps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a
; HASWELL-LABEL: test_minps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_minps:
; BTVER2: # BB#0:
@ -1622,10 +1622,10 @@ define <4 x double> @test_movapd(<4 x double> *%a0, <4 x double> *%a1) {
;
; HASWELL-LABEL: test_movapd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmovapd (%rdi), %ymm0 # sched: [4:0.50]
; HASWELL-NEXT: vmovapd (%rdi), %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_movapd:
; BTVER2: # BB#0:
@ -1663,10 +1663,10 @@ define <8 x float> @test_movaps(<8 x float> *%a0, <8 x float> *%a1) {
;
; HASWELL-LABEL: test_movaps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmovaps (%rdi), %ymm0 # sched: [4:0.50]
; HASWELL-NEXT: vmovaps (%rdi), %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_movaps:
; BTVER2: # BB#0:
@ -1705,9 +1705,9 @@ define <4 x double> @test_movddup(<4 x double> %a0, <4 x double> *%a1) {
; HASWELL-LABEL: test_movddup:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00]
; HASWELL-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [4:0.50]
; HASWELL-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [1:0.50]
; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_movddup:
; BTVER2: # BB#0:
@ -1744,9 +1744,9 @@ define i32 @test_movmskpd(<4 x double> %a0) {
;
; HASWELL-LABEL: test_movmskpd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmovmskpd %ymm0, %eax # sched: [2:1.00]
; HASWELL-NEXT: vzeroupper # sched: [1:?]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vmovmskpd %ymm0, %eax # sched: [3:1.00]
; HASWELL-NEXT: vzeroupper # sched: [4:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_movmskpd:
; BTVER2: # BB#0:
@ -1778,9 +1778,9 @@ define i32 @test_movmskps(<8 x float> %a0) {
;
; HASWELL-LABEL: test_movmskps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmovmskps %ymm0, %eax # sched: [2:1.00]
; HASWELL-NEXT: vzeroupper # sched: [1:?]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vmovmskps %ymm0, %eax # sched: [3:1.00]
; HASWELL-NEXT: vzeroupper # sched: [4:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_movmskps:
; BTVER2: # BB#0:
@ -1814,7 +1814,7 @@ define <4 x double> @test_movntpd(<4 x double> %a0, <4 x double> *%a1) {
; HASWELL: # BB#0:
; HASWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_movntpd:
; BTVER2: # BB#0:
@ -1849,7 +1849,7 @@ define <8 x float> @test_movntps(<8 x float> %a0, <8 x float> *%a1) {
; HASWELL: # BB#0:
; HASWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_movntps:
; BTVER2: # BB#0:
@ -1885,9 +1885,9 @@ define <8 x float> @test_movshdup(<8 x float> %a0, <8 x float> *%a1) {
; HASWELL-LABEL: test_movshdup:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00]
; HASWELL-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [4:0.50]
; HASWELL-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [1:0.50]
; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_movshdup:
; BTVER2: # BB#0:
@ -1927,9 +1927,9 @@ define <8 x float> @test_movsldup(<8 x float> %a0, <8 x float> *%a1) {
; HASWELL-LABEL: test_movsldup:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00]
; HASWELL-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [4:0.50]
; HASWELL-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [1:0.50]
; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_movsldup:
; BTVER2: # BB#0:
@ -1970,10 +1970,10 @@ define <4 x double> @test_movupd(<4 x double> *%a0, <4 x double> *%a1) {
;
; HASWELL-LABEL: test_movupd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmovupd (%rdi), %ymm0 # sched: [4:0.50]
; HASWELL-NEXT: vmovupd (%rdi), %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_movupd:
; BTVER2: # BB#0:
@ -2013,10 +2013,10 @@ define <8 x float> @test_movups(<8 x float> *%a0, <8 x float> *%a1) {
;
; HASWELL-LABEL: test_movups:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmovups (%rdi), %ymm0 # sched: [4:0.50]
; HASWELL-NEXT: vmovups (%rdi), %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_movups:
; BTVER2: # BB#0:
@ -2052,9 +2052,9 @@ define <4 x double> @test_mulpd(<4 x double> %a0, <4 x double> %a1, <4 x double>
;
; HASWELL-LABEL: test_mulpd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
; HASWELL-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_mulpd:
; BTVER2: # BB#0:
@ -2088,9 +2088,9 @@ define <8 x float> @test_mulps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a
;
; HASWELL-LABEL: test_mulps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
; HASWELL-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_mulps:
; BTVER2: # BB#0:
@ -2127,9 +2127,9 @@ define <4 x double> @orpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2)
; HASWELL-LABEL: orpd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
; HASWELL-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
; HASWELL-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: orpd:
; BTVER2: # BB#0:
@ -2173,9 +2173,9 @@ define <8 x float> @test_orps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2
; HASWELL-LABEL: test_orps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
; HASWELL-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
; HASWELL-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_orps:
; BTVER2: # BB#0:
@ -2219,9 +2219,9 @@ define <2 x double> @test_permilpd(<2 x double> %a0, <2 x double> *%a1) {
; HASWELL-LABEL: test_permilpd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00]
; HASWELL-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [5:1.00]
; HASWELL-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [1:1.00]
; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_permilpd:
; BTVER2: # BB#0:
@ -2261,9 +2261,9 @@ define <4 x double> @test_permilpd_ymm(<4 x double> %a0, <4 x double> *%a1) {
; HASWELL-LABEL: test_permilpd_ymm:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00]
; HASWELL-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [5:1.00]
; HASWELL-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [1:1.00]
; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_permilpd_ymm:
; BTVER2: # BB#0:
@ -2303,9 +2303,9 @@ define <4 x float> @test_permilps(<4 x float> %a0, <4 x float> *%a1) {
; HASWELL-LABEL: test_permilps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00]
; HASWELL-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [5:1.00]
; HASWELL-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [1:1.00]
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_permilps:
; BTVER2: # BB#0:
@ -2345,9 +2345,9 @@ define <8 x float> @test_permilps_ymm(<8 x float> %a0, <8 x float> *%a1) {
; HASWELL-LABEL: test_permilps_ymm:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
; HASWELL-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [5:1.00]
; HASWELL-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [1:1.00]
; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_permilps_ymm:
; BTVER2: # BB#0:
@ -2385,8 +2385,8 @@ define <2 x double> @test_permilvarpd(<2 x double> %a0, <2 x i64> %a1, <2 x i64>
; HASWELL-LABEL: test_permilvarpd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_permilvarpd:
; BTVER2: # BB#0:
@ -2422,8 +2422,8 @@ define <4 x double> @test_permilvarpd_ymm(<4 x double> %a0, <4 x i64> %a1, <4 x
; HASWELL-LABEL: test_permilvarpd_ymm:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
; HASWELL-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_permilvarpd_ymm:
; BTVER2: # BB#0:
@ -2459,8 +2459,8 @@ define <4 x float> @test_permilvarps(<4 x float> %a0, <4 x i32> %a1, <4 x i32> *
; HASWELL-LABEL: test_permilvarps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_permilvarps:
; BTVER2: # BB#0:
@ -2496,8 +2496,8 @@ define <8 x float> @test_permilvarps_ymm(<8 x float> %a0, <8 x i32> %a1, <8 x i3
; HASWELL-LABEL: test_permilvarps_ymm:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
; HASWELL-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_permilvarps_ymm:
; BTVER2: # BB#0:
@ -2535,9 +2535,9 @@ define <8 x float> @test_rcpps(<8 x float> %a0, <8 x float> *%a1) {
; HASWELL-LABEL: test_rcpps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps (%rdi), %ymm1 # sched: [11:2.00]
; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_rcpps:
; BTVER2: # BB#0:
@ -2577,10 +2577,10 @@ define <4 x double> @test_roundpd(<4 x double> %a0, <4 x double> *%a1) {
;
; HASWELL-LABEL: test_roundpd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [6:2.00]
; HASWELL-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [10:2.00]
; HASWELL-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [5:1.25]
; HASWELL-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [6:2.00]
; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_roundpd:
; BTVER2: # BB#0:
@ -2620,10 +2620,10 @@ define <8 x float> @test_roundps(<8 x float> %a0, <8 x float> *%a1) {
;
; HASWELL-LABEL: test_roundps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [6:2.00]
; HASWELL-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [10:2.00]
; HASWELL-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [5:1.25]
; HASWELL-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [6:2.00]
; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_roundps:
; BTVER2: # BB#0:
@ -2664,9 +2664,9 @@ define <8 x float> @test_rsqrtps(<8 x float> %a0, <8 x float> *%a1) {
; HASWELL-LABEL: test_rsqrtps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [11:2.00]
; HASWELL-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [7:2.00]
; HASWELL-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [11:2.00]
; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_rsqrtps:
; BTVER2: # BB#0:
@ -2707,9 +2707,9 @@ define <4 x double> @test_shufpd(<4 x double> %a0, <4 x double> %a1, <4 x double
; HASWELL-LABEL: test_shufpd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00]
; HASWELL-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [5:1.00]
; HASWELL-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [1:1.00]
; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_shufpd:
; BTVER2: # BB#0:
@ -2747,8 +2747,8 @@ define <8 x float> @test_shufps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%
; HASWELL-LABEL: test_shufps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00]
; HASWELL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_shufps:
; BTVER2: # BB#0:
@ -2784,10 +2784,10 @@ define <4 x double> @test_sqrtpd(<4 x double> %a0, <4 x double> *%a1) {
;
; HASWELL-LABEL: test_sqrtpd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [32:2.00]
; HASWELL-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [28:2.00]
; HASWELL-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [35:2.00]
; HASWELL-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [35:2.00]
; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_sqrtpd:
; BTVER2: # BB#0:
@ -2827,10 +2827,10 @@ define <8 x float> @test_sqrtps(<8 x float> %a0, <8 x float> *%a1) {
;
; HASWELL-LABEL: test_sqrtps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vsqrtps (%rdi), %ymm1 # sched: [23:2.00]
; HASWELL-NEXT: vsqrtps %ymm0, %ymm0 # sched: [19:2.00]
; HASWELL-NEXT: vsqrtps (%rdi), %ymm1 # sched: [21:2.00]
; HASWELL-NEXT: vsqrtps %ymm0, %ymm0 # sched: [21:2.00]
; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_sqrtps:
; BTVER2: # BB#0:
@ -2869,8 +2869,8 @@ define <4 x double> @test_subpd(<4 x double> %a0, <4 x double> %a1, <4 x double>
; HASWELL-LABEL: test_subpd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_subpd:
; BTVER2: # BB#0:
@ -2905,8 +2905,8 @@ define <8 x float> @test_subps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a
; HASWELL-LABEL: test_subps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_subps:
; BTVER2: # BB#0:
@ -2947,11 +2947,11 @@ define i32 @test_testpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
; HASWELL-LABEL: test_testpd:
; HASWELL: # BB#0:
; HASWELL-NEXT: xorl %eax, %eax # sched: [1:0.25]
; HASWELL-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:0.33]
; HASWELL-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: setb %al # sched: [1:0.50]
; HASWELL-NEXT: vtestpd (%rdi), %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vtestpd (%rdi), %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: adcl $0, %eax # sched: [2:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_testpd:
; BTVER2: # BB#0:
@ -3002,12 +3002,12 @@ define i32 @test_testpd_ymm(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a
; HASWELL-LABEL: test_testpd_ymm:
; HASWELL: # BB#0:
; HASWELL-NEXT: xorl %eax, %eax # sched: [1:0.25]
; HASWELL-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:0.33]
; HASWELL-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:1.00]
; HASWELL-NEXT: setb %al # sched: [1:0.50]
; HASWELL-NEXT: vtestpd (%rdi), %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: vtestpd (%rdi), %ymm0 # sched: [1:1.00]
; HASWELL-NEXT: adcl $0, %eax # sched: [2:0.50]
; HASWELL-NEXT: vzeroupper # sched: [1:?]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vzeroupper # sched: [4:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_testpd_ymm:
; BTVER2: # BB#0:
@ -3057,11 +3057,11 @@ define i32 @test_testps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
; HASWELL-LABEL: test_testps:
; HASWELL: # BB#0:
; HASWELL-NEXT: xorl %eax, %eax # sched: [1:0.25]
; HASWELL-NEXT: vtestps %xmm1, %xmm0 # sched: [1:0.33]
; HASWELL-NEXT: vtestps %xmm1, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: setb %al # sched: [1:0.50]
; HASWELL-NEXT: vtestps (%rdi), %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vtestps (%rdi), %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: adcl $0, %eax # sched: [2:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_testps:
; BTVER2: # BB#0:
@ -3112,12 +3112,12 @@ define i32 @test_testps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2)
; HASWELL-LABEL: test_testps_ymm:
; HASWELL: # BB#0:
; HASWELL-NEXT: xorl %eax, %eax # sched: [1:0.25]
; HASWELL-NEXT: vtestps %ymm1, %ymm0 # sched: [1:0.33]
; HASWELL-NEXT: vtestps %ymm1, %ymm0 # sched: [1:1.00]
; HASWELL-NEXT: setb %al # sched: [1:0.50]
; HASWELL-NEXT: vtestps (%rdi), %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: vtestps (%rdi), %ymm0 # sched: [1:1.00]
; HASWELL-NEXT: adcl $0, %eax # sched: [2:0.50]
; HASWELL-NEXT: vzeroupper # sched: [1:?]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vzeroupper # sched: [4:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_testps_ymm:
; BTVER2: # BB#0:
@ -3163,9 +3163,9 @@ define <4 x double> @test_unpckhpd(<4 x double> %a0, <4 x double> %a1, <4 x doub
; HASWELL-LABEL: test_unpckhpd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
; HASWELL-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [5:1.00]
; HASWELL-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [1:1.00]
; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_unpckhpd:
; BTVER2: # BB#0:
@ -3203,8 +3203,8 @@ define <8 x float> @test_unpckhps(<8 x float> %a0, <8 x float> %a1, <8 x float>
; HASWELL-LABEL: test_unpckhps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
; HASWELL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_unpckhps:
; BTVER2: # BB#0:
@ -3241,9 +3241,9 @@ define <4 x double> @test_unpcklpd(<4 x double> %a0, <4 x double> %a1, <4 x doub
; HASWELL-LABEL: test_unpcklpd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
; HASWELL-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [5:1.00]
; HASWELL-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [1:1.00]
; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_unpcklpd:
; BTVER2: # BB#0:
@ -3281,8 +3281,8 @@ define <8 x float> @test_unpcklps(<8 x float> %a0, <8 x float> %a1, <8 x float>
; HASWELL-LABEL: test_unpcklps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
; HASWELL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_unpcklps:
; BTVER2: # BB#0:
@ -3319,9 +3319,9 @@ define <4 x double> @test_xorpd(<4 x double> %a0, <4 x double> %a1, <4 x double>
; HASWELL-LABEL: test_xorpd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
; HASWELL-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
; HASWELL-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_xorpd:
; BTVER2: # BB#0:
@ -3365,9 +3365,9 @@ define <8 x float> @test_xorps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a
; HASWELL-LABEL: test_xorps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
; HASWELL-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
; HASWELL-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_xorps:
; BTVER2: # BB#0:
@ -3406,8 +3406,8 @@ define void @test_zeroall() {
;
; HASWELL-LABEL: test_zeroall:
; HASWELL: # BB#0:
; HASWELL-NEXT: vzeroall # sched: [1:?]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vzeroall # sched: [16:16.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_zeroall:
; BTVER2: # BB#0:
@ -3436,8 +3436,8 @@ define void @test_zeroupper() {
;
; HASWELL-LABEL: test_zeroupper:
; HASWELL: # BB#0:
; HASWELL-NEXT: vzeroupper # sched: [1:?]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vzeroupper # sched: [4:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_zeroupper:
; BTVER2: # BB#0:

View File

@ -15,9 +15,9 @@ define <32 x i8> @test_pabsb(<32 x i8> %a0, <32 x i8> *%a1) {
; HASWELL-LABEL: test_pabsb:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: vpabsb (%rdi), %ymm1 # sched: [5:0.50]
; HASWELL-NEXT: vpabsb (%rdi), %ymm1 # sched: [1:0.50]
; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; ZNVER1-LABEL: test_pabsb:
; ZNVER1: # BB#0:
@ -44,9 +44,9 @@ define <8 x i32> @test_pabsd(<8 x i32> %a0, <8 x i32> *%a1) {
; HASWELL-LABEL: test_pabsd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: vpabsd (%rdi), %ymm1 # sched: [5:0.50]
; HASWELL-NEXT: vpabsd (%rdi), %ymm1 # sched: [1:0.50]
; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; ZNVER1-LABEL: test_pabsd:
; ZNVER1: # BB#0:
@ -73,9 +73,9 @@ define <16 x i16> @test_pabsw(<16 x i16> %a0, <16 x i16> *%a1) {
; HASWELL-LABEL: test_pabsw:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: vpabsw (%rdi), %ymm1 # sched: [5:0.50]
; HASWELL-NEXT: vpabsw (%rdi), %ymm1 # sched: [1:0.50]
; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; ZNVER1-LABEL: test_pabsw:
; ZNVER1: # BB#0:
@ -101,8 +101,8 @@ define <32 x i8> @test_paddb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
; HASWELL-LABEL: test_paddb:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; ZNVER1-LABEL: test_paddb:
; ZNVER1: # BB#0:
@ -125,8 +125,8 @@ define <8 x i32> @test_paddd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
; HASWELL-LABEL: test_paddd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; ZNVER1-LABEL: test_paddd:
; ZNVER1: # BB#0:
@ -149,8 +149,8 @@ define <4 x i64> @test_paddq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
; HASWELL-LABEL: test_paddq:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; ZNVER1-LABEL: test_paddq:
; ZNVER1: # BB#0:
@ -173,8 +173,8 @@ define <16 x i16> @test_paddw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
; HASWELL-LABEL: test_paddw:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; ZNVER1-LABEL: test_paddw:
; ZNVER1: # BB#0:
@ -198,9 +198,9 @@ define <4 x i64> @test_pand(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
; HASWELL-LABEL: test_pand:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; HASWELL-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; ZNVER1-LABEL: test_pand:
; ZNVER1: # BB#0:
@ -226,9 +226,9 @@ define <4 x i64> @test_pandn(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
; HASWELL-LABEL: test_pandn:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; HASWELL-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [5:0.50]
; HASWELL-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [1:0.50]
; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; ZNVER1-LABEL: test_pandn:
; ZNVER1: # BB#0:
@ -256,7 +256,7 @@ define <8 x i32> @test_pmulld(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
; HASWELL: # BB#0:
; HASWELL-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:2.00]
; HASWELL-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; ZNVER1-LABEL: test_pmulld:
; ZNVER1: # BB#0:
@ -279,8 +279,8 @@ define <16 x i16> @test_pmullw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2)
; HASWELL-LABEL: test_pmullw:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
; HASWELL-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; ZNVER1-LABEL: test_pmullw:
; ZNVER1: # BB#0:
@ -304,9 +304,9 @@ define <4 x i64> @test_por(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
; HASWELL-LABEL: test_por:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; HASWELL-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; ZNVER1-LABEL: test_por:
; ZNVER1: # BB#0:
@ -331,8 +331,8 @@ define <32 x i8> @test_psubb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
; HASWELL-LABEL: test_psubb:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; ZNVER1-LABEL: test_psubb:
; ZNVER1: # BB#0:
@ -355,8 +355,8 @@ define <8 x i32> @test_psubd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
; HASWELL-LABEL: test_psubd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; ZNVER1-LABEL: test_psubd:
; ZNVER1: # BB#0:
@ -379,8 +379,8 @@ define <4 x i64> @test_psubq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
; HASWELL-LABEL: test_psubq:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; ZNVER1-LABEL: test_psubq:
; ZNVER1: # BB#0:
@ -403,8 +403,8 @@ define <16 x i16> @test_psubw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
; HASWELL-LABEL: test_psubw:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; ZNVER1-LABEL: test_psubw:
; ZNVER1: # BB#0:
@ -428,9 +428,9 @@ define <4 x i64> @test_pxor(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
; HASWELL-LABEL: test_pxor:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; HASWELL-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; ZNVER1-LABEL: test_pxor:
; ZNVER1: # BB#0:

View File

@ -126,11 +126,11 @@ entry:
define i32 @test8(i32 %a1, i32 %a2, i32 %a3) {
; ALL-LABEL: test8:
; ALL: ## BB#0:
; ALL-NEXT: notl %edi
; ALL-NEXT: xorl $-2147483648, %esi ## imm = 0x80000000
; ALL-NEXT: testl %edx, %edx
; ALL-NEXT: movl $1, %eax
; ALL-NEXT: cmovel %eax, %edx
; ALL-NEXT: notl %edi
; ALL-NEXT: orl %edi, %esi
; ALL-NEXT: cmovnel %edx, %eax
; ALL-NEXT: retq

View File

@ -1530,19 +1530,19 @@ define <4 x double> @uitofp_4i1_double(<4 x i32> %a) {
}
define <2 x float> @uitofp_2i1_float(<2 x i32> %a) {
; NOVL-LABEL: uitofp_2i1_float:
; NOVL: # BB#0:
; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; NOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; NOVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NOVL-NEXT: vpextrb $8, %xmm0, %eax
; NOVL-NEXT: andl $1, %eax
; NOVL-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm1
; NOVL-NEXT: vpextrb $0, %xmm0, %eax
; NOVL-NEXT: andl $1, %eax
; NOVL-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
; NOVL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; NOVL-NEXT: retq
; KNL-LABEL: uitofp_2i1_float:
; KNL: # BB#0:
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; KNL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; KNL-NEXT: vpextrb $8, %xmm0, %eax
; KNL-NEXT: andl $1, %eax
; KNL-NEXT: vpextrb $0, %xmm0, %ecx
; KNL-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
; KNL-NEXT: andl $1, %ecx
; KNL-NEXT: vcvtsi2ssl %ecx, %xmm2, %xmm1
; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; KNL-NEXT: retq
;
; VL-LABEL: uitofp_2i1_float:
; VL: # BB#0:
@ -1552,6 +1552,34 @@ define <2 x float> @uitofp_2i1_float(<2 x i32> %a) {
; VL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
; VL-NEXT: vcvtudq2ps %xmm0, %xmm0
; VL-NEXT: retq
;
; AVX512DQ-LABEL: uitofp_2i1_float:
; AVX512DQ: # BB#0:
; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512DQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; AVX512DQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; AVX512DQ-NEXT: vpextrb $8, %xmm0, %eax
; AVX512DQ-NEXT: andl $1, %eax
; AVX512DQ-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm1
; AVX512DQ-NEXT: vpextrb $0, %xmm0, %eax
; AVX512DQ-NEXT: andl $1, %eax
; AVX512DQ-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: uitofp_2i1_float:
; AVX512BW: # BB#0:
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; AVX512BW-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax
; AVX512BW-NEXT: andl $1, %eax
; AVX512BW-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm1
; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
; AVX512BW-NEXT: andl $1, %eax
; AVX512BW-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
; AVX512BW-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; AVX512BW-NEXT: retq
%mask = icmp ult <2 x i32> %a, zeroinitializer
%1 = uitofp <2 x i1> %mask to <2 x float>
ret <2 x float> %1

View File

@ -48,8 +48,8 @@ define <8 x i16> @sext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind re
define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
; KNL-LABEL: zext_16x8mem_to_16x16:
; KNL: # BB#0:
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
@ -70,8 +70,8 @@ define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwi
define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
; KNL-LABEL: sext_16x8mem_to_16x16:
; KNL: # BB#0:
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; KNL-NEXT: vpmovsxbw (%rdi), %ymm1
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0

View File

@ -936,7 +936,6 @@ define i32 @test_insertelement_v32i1(i32 %a, i32 %b, <32 x i32> %x , <32 x i32>
; KNL-NEXT: subq $32, %rsp
; KNL-NEXT: xorl %eax, %eax
; KNL-NEXT: cmpl %esi, %edi
; KNL-NEXT: setb %al
; KNL-NEXT: vpcmpltud %zmm3, %zmm1, %k0
; KNL-NEXT: kshiftlw $14, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
@ -1062,6 +1061,7 @@ define i32 @test_insertelement_v32i1(i32 %a, i32 %b, <32 x i32> %x , <32 x i32>
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; KNL-NEXT: setb %al
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: vpinsrb $15, %ecx, %xmm0, %xmm0
@ -1112,23 +1112,23 @@ define i8 @test_iinsertelement_v4i1(i32 %a, i32 %b, <4 x i32> %x , <4 x i32> %y)
; KNL-LABEL: test_iinsertelement_v4i1:
; KNL: ## BB#0:
; KNL-NEXT: cmpl %esi, %edi
; KNL-NEXT: setb %al
; KNL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0
; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1
; KNL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; KNL-NEXT: vpextrb $4, %xmm0, %ecx
; KNL-NEXT: kmovw %ecx, %k1
; KNL-NEXT: vpextrb $4, %xmm0, %eax
; KNL-NEXT: setb %cl
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; KNL-NEXT: vpextrb $0, %xmm0, %ecx
; KNL-NEXT: kmovw %ecx, %k1
; KNL-NEXT: vpextrb $0, %xmm0, %eax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; KNL-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; KNL-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; KNL-NEXT: vpsllq $63, %zmm3, %zmm1
; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kmovw %ecx, %k1
; KNL-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; KNL-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; KNL-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
@ -1902,14 +1902,23 @@ define i16 @test_extractelement_variable_v32i16(<32 x i16> %t1, i32 %index) {
}
define i8 @test_extractelement_variable_v16i8(<16 x i8> %t1, i32 %index) {
; CHECK-LABEL: test_extractelement_variable_v16i8:
; CHECK: ## BB#0:
; CHECK-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; CHECK-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: andl $15, %edi
; CHECK-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
; CHECK-NEXT: movb (%rdi,%rax), %al
; CHECK-NEXT: retq
; KNL-LABEL: test_extractelement_variable_v16i8:
; KNL: ## BB#0:
; KNL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; KNL-NEXT: andl $15, %edi
; KNL-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
; KNL-NEXT: movb (%rdi,%rax), %al
; KNL-NEXT: retq
;
; SKX-LABEL: test_extractelement_variable_v16i8:
; SKX: ## BB#0:
; SKX-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; SKX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; SKX-NEXT: andl $15, %edi
; SKX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
; SKX-NEXT: movb (%rdi,%rax), %al
; SKX-NEXT: retq
%t2 = extractelement <16 x i8> %t1, i32 %index
ret i8 %t2
}
@ -1927,8 +1936,8 @@ define i8 @test_extractelement_variable_v32i8(<32 x i8> %t1, i32 %index) {
; KNL-NEXT: .cfi_def_cfa_register %rbp
; KNL-NEXT: andq $-32, %rsp
; KNL-NEXT: subq $64, %rsp
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; KNL-NEXT: vmovaps %ymm0, (%rsp)
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; KNL-NEXT: andl $31, %edi
; KNL-NEXT: movq %rsp, %rax
; KNL-NEXT: movb (%rdi,%rax), %al
@ -1975,9 +1984,9 @@ define i8 @test_extractelement_variable_v64i8(<64 x i8> %t1, i32 %index) {
; KNL-NEXT: .cfi_def_cfa_register %rbp
; KNL-NEXT: andq $-64, %rsp
; KNL-NEXT: subq $128, %rsp
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; KNL-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp)
; KNL-NEXT: vmovaps %ymm0, (%rsp)
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; KNL-NEXT: andl $63, %edi
; KNL-NEXT: movq %rsp, %rax
; KNL-NEXT: movb (%rdi,%rax), %al
@ -2066,12 +2075,12 @@ define i8 @test_extractelement_variable_v64i8_indexi8(<64 x i8> %t1, i8 %index)
define zeroext i8 @test_extractelement_varible_v2i1(<2 x i64> %a, <2 x i64> %b, i32 %index) {
; KNL-LABEL: test_extractelement_varible_v2i1:
; KNL: ## BB#0:
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1
; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0
; KNL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; KNL-NEXT: andl $1, %edi
; KNL-NEXT: movl -24(%rsp,%rdi,8), %eax
; KNL-NEXT: andl $1, %eax
@ -2096,12 +2105,12 @@ define zeroext i8 @test_extractelement_varible_v2i1(<2 x i64> %a, <2 x i64> %b,
define zeroext i8 @test_extractelement_varible_v4i1(<4 x i32> %a, <4 x i32> %b, i32 %index) {
; KNL-LABEL: test_extractelement_varible_v4i1:
; KNL: ## BB#0:
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; KNL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1
; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0
; KNL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; KNL-NEXT: andl $3, %edi
; KNL-NEXT: movl -24(%rsp,%rdi,4), %eax
; KNL-NEXT: andl $1, %eax

View File

@ -2880,7 +2880,6 @@ declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8
define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) {
; CHECK-LABEL: test_mask_vextractf32x4:
; CHECK: ## BB#0:
; CHECK-NEXT: vextractf32x4 $2, %zmm1, %xmm1
; CHECK-NEXT: kmovw %edi, %k0
; CHECK-NEXT: kshiftlw $12, %k0, %k1
; CHECK-NEXT: kshiftrw $15, %k1, %k1
@ -2898,6 +2897,7 @@ define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8
; CHECK-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
; CHECK-NEXT: kmovw %k1, %eax
; CHECK-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
; CHECK-NEXT: vextractf32x4 $2, %zmm1, %xmm1
; CHECK-NEXT: vpslld $31, %xmm2, %xmm2
; CHECK-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
@ -2941,7 +2941,6 @@ declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i32, <4 x i
define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) {
; CHECK-LABEL: test_maskz_vextracti32x4:
; CHECK: ## BB#0:
; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm0
; CHECK-NEXT: kmovw %edi, %k0
; CHECK-NEXT: kshiftlw $12, %k0, %k1
; CHECK-NEXT: kshiftrw $15, %k1, %k1
@ -2959,6 +2958,7 @@ define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) {
; CHECK-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; CHECK-NEXT: kmovw %k1, %eax
; CHECK-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm0
; CHECK-NEXT: vpslld $31, %xmm1, %xmm1
; CHECK-NEXT: vpsrad $31, %xmm1, %xmm1
; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0

View File

@ -1835,73 +1835,8 @@ define void @ktest_2(<32 x float> %in, float * %base) {
; KNL-NEXT: .cfi_def_cfa_register %rbp
; KNL-NEXT: andq $-32, %rsp
; KNL-NEXT: subq $32, %rsp
; KNL-NEXT: vmovups (%rdi), %zmm2
; KNL-NEXT: vmovups 64(%rdi), %zmm3
; KNL-NEXT: vcmpltps %zmm1, %zmm3, %k1
; KNL-NEXT: kshiftlw $14, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kshiftlw $15, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: vmovd %ecx, %xmm3
; KNL-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $13, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $12, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $11, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $10, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $9, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $8, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $7, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $6, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $5, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $4, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $3, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $2, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $1, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k1, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
; KNL-NEXT: vcmpltps %zmm0, %zmm2, %k2
; KNL-NEXT: vmovups 64(%rdi), %zmm2
; KNL-NEXT: vcmpltps %zmm1, %zmm2, %k2
; KNL-NEXT: kshiftlw $14, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
@ -1965,138 +1900,203 @@ define void @ktest_2(<32 x float> %in, float * %base) {
; KNL-NEXT: kshiftrw $15, %k2, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
; KNL-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
; KNL-NEXT: vmovups 4(%rdi), %zmm3 {%k2} {z}
; KNL-NEXT: vmovups 68(%rdi), %zmm4 {%k1} {z}
; KNL-NEXT: vmovups (%rdi), %zmm3
; KNL-NEXT: vcmpltps %zmm0, %zmm3, %k1
; KNL-NEXT: kshiftlw $14, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kshiftlw $15, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: vmovd %ecx, %xmm3
; KNL-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $13, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $12, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $11, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $10, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $9, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $8, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $7, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $6, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $5, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $4, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $3, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $2, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $1, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k1, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
; KNL-NEXT: vmovups 68(%rdi), %zmm4 {%k2} {z}
; KNL-NEXT: vcmpltps %zmm4, %zmm1, %k0
; KNL-NEXT: kshiftlw $14, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $15, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: kshiftlw $14, %k0, %k2
; KNL-NEXT: kshiftrw $15, %k2, %k2
; KNL-NEXT: kmovw %k2, %eax
; KNL-NEXT: kshiftlw $15, %k0, %k2
; KNL-NEXT: kshiftrw $15, %k2, %k2
; KNL-NEXT: kmovw %k2, %ecx
; KNL-NEXT: vmovd %ecx, %xmm4
; KNL-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4
; KNL-NEXT: kshiftlw $13, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $13, %k0, %k2
; KNL-NEXT: kshiftrw $15, %k2, %k2
; KNL-NEXT: kmovw %k2, %eax
; KNL-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
; KNL-NEXT: kshiftlw $12, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $12, %k0, %k2
; KNL-NEXT: kshiftrw $15, %k2, %k2
; KNL-NEXT: kmovw %k2, %eax
; KNL-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4
; KNL-NEXT: kshiftlw $11, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $11, %k0, %k2
; KNL-NEXT: kshiftrw $15, %k2, %k2
; KNL-NEXT: kmovw %k2, %eax
; KNL-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
; KNL-NEXT: kshiftlw $10, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $10, %k0, %k2
; KNL-NEXT: kshiftrw $15, %k2, %k2
; KNL-NEXT: kmovw %k2, %eax
; KNL-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4
; KNL-NEXT: kshiftlw $9, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $9, %k0, %k2
; KNL-NEXT: kshiftrw $15, %k2, %k2
; KNL-NEXT: kmovw %k2, %eax
; KNL-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
; KNL-NEXT: kshiftlw $8, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $8, %k0, %k2
; KNL-NEXT: kshiftrw $15, %k2, %k2
; KNL-NEXT: kmovw %k2, %eax
; KNL-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4
; KNL-NEXT: kshiftlw $7, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $7, %k0, %k2
; KNL-NEXT: kshiftrw $15, %k2, %k2
; KNL-NEXT: kmovw %k2, %eax
; KNL-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4
; KNL-NEXT: kshiftlw $6, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $6, %k0, %k2
; KNL-NEXT: kshiftrw $15, %k2, %k2
; KNL-NEXT: kmovw %k2, %eax
; KNL-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4
; KNL-NEXT: kshiftlw $5, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $5, %k0, %k2
; KNL-NEXT: kshiftrw $15, %k2, %k2
; KNL-NEXT: kmovw %k2, %eax
; KNL-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4
; KNL-NEXT: kshiftlw $4, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $4, %k0, %k2
; KNL-NEXT: kshiftrw $15, %k2, %k2
; KNL-NEXT: kmovw %k2, %eax
; KNL-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4
; KNL-NEXT: kshiftlw $3, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $3, %k0, %k2
; KNL-NEXT: kshiftrw $15, %k2, %k2
; KNL-NEXT: kmovw %k2, %eax
; KNL-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4
; KNL-NEXT: kshiftlw $2, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $2, %k0, %k2
; KNL-NEXT: kshiftrw $15, %k2, %k2
; KNL-NEXT: kmovw %k2, %eax
; KNL-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4
; KNL-NEXT: kshiftlw $1, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $1, %k0, %k2
; KNL-NEXT: kshiftrw $15, %k2, %k2
; KNL-NEXT: kmovw %k2, %eax
; KNL-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm4, %xmm4
; KNL-NEXT: vcmpltps %zmm3, %zmm0, %k0
; KNL-NEXT: vmovups 4(%rdi), %zmm5 {%k1} {z}
; KNL-NEXT: vcmpltps %zmm5, %zmm0, %k0
; KNL-NEXT: kshiftlw $14, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $15, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: vmovd %ecx, %xmm3
; KNL-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
; KNL-NEXT: vmovd %ecx, %xmm5
; KNL-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftlw $13, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
; KNL-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftlw $12, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
; KNL-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftlw $11, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
; KNL-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftlw $10, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
; KNL-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftlw $9, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
; KNL-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftlw $8, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
; KNL-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftlw $7, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
; KNL-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftlw $6, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
; KNL-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftlw $5, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
; KNL-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftlw $4, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
; KNL-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftlw $3, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
; KNL-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftlw $2, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
; KNL-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftlw $1, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
; KNL-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
; KNL-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3
; KNL-NEXT: vpinsrb $15, %eax, %xmm5, %xmm5
; KNL-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2
; KNL-NEXT: vinserti128 $1, %xmm4, %ymm5, %ymm3
; KNL-NEXT: vpor %ymm3, %ymm2, %ymm2
; KNL-NEXT: vextracti128 $1, %ymm2, %xmm3
; KNL-NEXT: vpmovsxbd %xmm3, %zmm3
@ -2941,36 +2941,6 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
;
; KNL-LABEL: store_64i1:
; KNL: ## BB#0:
; KNL-NEXT: pushq %rbp
; KNL-NEXT: Lcfi9:
; KNL-NEXT: .cfi_def_cfa_offset 16
; KNL-NEXT: pushq %r15
; KNL-NEXT: Lcfi10:
; KNL-NEXT: .cfi_def_cfa_offset 24
; KNL-NEXT: pushq %r14
; KNL-NEXT: Lcfi11:
; KNL-NEXT: .cfi_def_cfa_offset 32
; KNL-NEXT: pushq %r13
; KNL-NEXT: Lcfi12:
; KNL-NEXT: .cfi_def_cfa_offset 40
; KNL-NEXT: pushq %r12
; KNL-NEXT: Lcfi13:
; KNL-NEXT: .cfi_def_cfa_offset 48
; KNL-NEXT: pushq %rbx
; KNL-NEXT: Lcfi14:
; KNL-NEXT: .cfi_def_cfa_offset 56
; KNL-NEXT: Lcfi15:
; KNL-NEXT: .cfi_offset %rbx, -56
; KNL-NEXT: Lcfi16:
; KNL-NEXT: .cfi_offset %r12, -48
; KNL-NEXT: Lcfi17:
; KNL-NEXT: .cfi_offset %r13, -40
; KNL-NEXT: Lcfi18:
; KNL-NEXT: .cfi_offset %r14, -32
; KNL-NEXT: Lcfi19:
; KNL-NEXT: .cfi_offset %r15, -24
; KNL-NEXT: Lcfi20:
; KNL-NEXT: .cfi_offset %rbp, -16
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
@ -2982,66 +2952,66 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0
; KNL-NEXT: kshiftlw $14, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %r8d
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $15, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %r9d
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: kshiftlw $13, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %r10d
; KNL-NEXT: kmovw %k1, %edx
; KNL-NEXT: kshiftlw $12, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %r11d
; KNL-NEXT: vmovd %ecx, %xmm3
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: kshiftlw $11, %k0, %k1
; KNL-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %r14d
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $10, %k0, %k1
; KNL-NEXT: vpinsrb $2, %edx, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %r15d
; KNL-NEXT: kmovw %k1, %edx
; KNL-NEXT: kshiftlw $9, %k0, %k1
; KNL-NEXT: vpinsrb $3, %ecx, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %r12d
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: kshiftlw $8, %k0, %k1
; KNL-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %r13d
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $7, %k0, %k1
; KNL-NEXT: vpinsrb $5, %edx, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ebx
; KNL-NEXT: kmovw %k1, %edx
; KNL-NEXT: kshiftlw $6, %k0, %k1
; KNL-NEXT: vpinsrb $6, %ecx, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ebp
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: kshiftlw $5, %k0, %k1
; KNL-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $4, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: kshiftlw $3, %k0, %k1
; KNL-NEXT: vpinsrb $8, %edx, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %edx
; KNL-NEXT: kshiftlw $3, %k0, %k1
; KNL-NEXT: vpinsrb $9, %ecx, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: kshiftlw $2, %k0, %k1
; KNL-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %esi
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $1, %k0, %k1
; KNL-NEXT: vpinsrb $11, %edx, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: vmovd %r9d, %xmm3
; KNL-NEXT: kmovw %k1, %r9d
; KNL-NEXT: kmovw %k1, %edx
; KNL-NEXT: vptestmd %zmm2, %zmm2, %k2
; KNL-NEXT: vpinsrb $12, %ecx, %xmm3, %xmm2
; KNL-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
; KNL-NEXT: vpinsrb $14, %edx, %xmm2, %xmm2
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: vpinsrb $1, %r8d, %xmm3, %xmm2
; KNL-NEXT: vpinsrb $2, %r10d, %xmm2, %xmm2
; KNL-NEXT: vpinsrb $3, %r11d, %xmm2, %xmm2
; KNL-NEXT: vpinsrb $4, %r14d, %xmm2, %xmm2
; KNL-NEXT: vpinsrb $5, %r15d, %xmm2, %xmm2
; KNL-NEXT: vpinsrb $6, %r12d, %xmm2, %xmm2
; KNL-NEXT: vpinsrb $7, %r13d, %xmm2, %xmm2
; KNL-NEXT: vpinsrb $8, %ebx, %xmm2, %xmm2
; KNL-NEXT: vpinsrb $9, %ebp, %xmm2, %xmm2
; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
; KNL-NEXT: vpinsrb $11, %ecx, %xmm2, %xmm2
; KNL-NEXT: vpinsrb $12, %edx, %xmm2, %xmm2
; KNL-NEXT: vpinsrb $13, %esi, %xmm2, %xmm2
; KNL-NEXT: vpinsrb $14, %r9d, %xmm2, %xmm2
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
@ -3050,66 +3020,66 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
; KNL-NEXT: kmovw %k0, 6(%rdi)
; KNL-NEXT: kshiftlw $14, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %r8d
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kshiftlw $15, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %r10d
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: kshiftlw $13, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %r9d
; KNL-NEXT: kmovw %k0, %edx
; KNL-NEXT: kshiftlw $12, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %r11d
; KNL-NEXT: vmovd %ecx, %xmm2
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: kshiftlw $11, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %r14d
; KNL-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kshiftlw $10, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %r15d
; KNL-NEXT: vpinsrb $2, %edx, %xmm2, %xmm2
; KNL-NEXT: kmovw %k0, %edx
; KNL-NEXT: kshiftlw $9, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %r12d
; KNL-NEXT: vpinsrb $3, %ecx, %xmm2, %xmm2
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: kshiftlw $8, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %r13d
; KNL-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kshiftlw $7, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: vpinsrb $5, %edx, %xmm2, %xmm2
; KNL-NEXT: kmovw %k0, %edx
; KNL-NEXT: kshiftlw $6, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %esi
; KNL-NEXT: vpinsrb $6, %ecx, %xmm2, %xmm2
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: kshiftlw $5, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %ebp
; KNL-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kshiftlw $4, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %ebx
; KNL-NEXT: vpinsrb $8, %edx, %xmm2, %xmm2
; KNL-NEXT: kmovw %k0, %edx
; KNL-NEXT: kshiftlw $3, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $9, %ecx, %xmm2, %xmm2
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: kshiftlw $2, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %edx
; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kshiftlw $1, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: vmovd %r10d, %xmm2
; KNL-NEXT: kmovw %k0, %r10d
; KNL-NEXT: vpinsrb $11, %edx, %xmm2, %xmm2
; KNL-NEXT: kmovw %k0, %edx
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
; KNL-NEXT: kshiftrw $15, %k2, %k0
; KNL-NEXT: vpinsrb $1, %r8d, %xmm2, %xmm1
; KNL-NEXT: vpinsrb $2, %r9d, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $3, %r11d, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $4, %r14d, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $5, %r15d, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $6, %r12d, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $7, %r13d, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $9, %esi, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $10, %ebp, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $11, %ebx, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $13, %edx, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $14, %r10d, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $12, %ecx, %xmm2, %xmm1
; KNL-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $14, %edx, %xmm1, %xmm1
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
@ -3118,145 +3088,139 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
; KNL-NEXT: kmovw %k0, 4(%rdi)
; KNL-NEXT: kshiftlw $14, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %r8d
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kshiftlw $15, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %r10d
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: kshiftlw $13, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %r9d
; KNL-NEXT: kmovw %k0, %edx
; KNL-NEXT: kshiftlw $12, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %r11d
; KNL-NEXT: vmovd %ecx, %xmm1
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: kshiftlw $11, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %r14d
; KNL-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kshiftlw $10, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %r15d
; KNL-NEXT: vpinsrb $2, %edx, %xmm1, %xmm1
; KNL-NEXT: kmovw %k0, %edx
; KNL-NEXT: kshiftlw $9, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %r12d
; KNL-NEXT: vpinsrb $3, %ecx, %xmm1, %xmm1
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: kshiftlw $8, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %r13d
; KNL-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kshiftlw $7, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: vpinsrb $5, %edx, %xmm1, %xmm1
; KNL-NEXT: kmovw %k0, %edx
; KNL-NEXT: kshiftlw $6, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %esi
; KNL-NEXT: vpinsrb $6, %ecx, %xmm1, %xmm1
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: kshiftlw $5, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %ebp
; KNL-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kshiftlw $4, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %ebx
; KNL-NEXT: vpinsrb $8, %edx, %xmm1, %xmm1
; KNL-NEXT: kmovw %k0, %edx
; KNL-NEXT: kshiftlw $3, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $9, %ecx, %xmm1, %xmm1
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: kshiftlw $2, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %edx
; KNL-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kshiftlw $1, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: vmovd %r10d, %xmm1
; KNL-NEXT: kmovw %k0, %r10d
; KNL-NEXT: vpinsrb $11, %edx, %xmm1, %xmm1
; KNL-NEXT: kmovw %k0, %edx
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: vpinsrb $1, %r8d, %xmm1, %xmm0
; KNL-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $14, %r10d, %xmm0, %xmm0
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
; KNL-NEXT: kmovw %k1, 2(%rdi)
; KNL-NEXT: vpinsrb $12, %ecx, %xmm1, %xmm0
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: kshiftlw $14, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %r8d
; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $15, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %r9d
; KNL-NEXT: vpinsrb $14, %edx, %xmm0, %xmm0
; KNL-NEXT: kmovw %k1, %edx
; KNL-NEXT: kshiftlw $13, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %r10d
; KNL-NEXT: vpinsrb $15, %ecx, %xmm0, %xmm0
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: kshiftlw $12, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %r11d
; KNL-NEXT: kshiftlw $11, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %r14d
; KNL-NEXT: kshiftlw $10, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %r15d
; KNL-NEXT: kshiftlw $9, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %r12d
; KNL-NEXT: kshiftlw $8, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %r13d
; KNL-NEXT: kshiftlw $7, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: vmovd %edx, %xmm1
; KNL-NEXT: kmovw %k1, %edx
; KNL-NEXT: kshiftlw $6, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %esi
; KNL-NEXT: kshiftlw $5, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ebp
; KNL-NEXT: kshiftlw $4, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ebx
; KNL-NEXT: kshiftlw $3, %k0, %k1
; KNL-NEXT: kshiftlw $11, %k0, %k1
; KNL-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $2, %k0, %k1
; KNL-NEXT: kshiftlw $10, %k0, %k1
; KNL-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: kshiftlw $1, %k0, %k1
; KNL-NEXT: kshiftlw $9, %k0, %k1
; KNL-NEXT: vpinsrb $3, %edx, %xmm1, %xmm1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: vmovd %r9d, %xmm0
; KNL-NEXT: kmovw %k1, %r9d
; KNL-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $2, %r10d, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $8, %edx, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
; KNL-NEXT: kmovw %k1, %edx
; KNL-NEXT: kshiftlw $8, %k0, %k1
; KNL-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $7, %k0, %k1
; KNL-NEXT: vpinsrb $5, %ecx, %xmm1, %xmm1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: kshiftlw $6, %k0, %k1
; KNL-NEXT: vpinsrb $6, %edx, %xmm1, %xmm1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %edx
; KNL-NEXT: kshiftlw $5, %k0, %k1
; KNL-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $4, %k0, %k1
; KNL-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: kshiftlw $3, %k0, %k1
; KNL-NEXT: vpinsrb $9, %edx, %xmm1, %xmm1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %edx
; KNL-NEXT: kshiftlw $2, %k0, %k1
; KNL-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $1, %k0, %k1
; KNL-NEXT: vpinsrb $11, %ecx, %xmm1, %xmm1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: vpinsrb $12, %edx, %xmm1, %xmm1
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $14, %r9d, %xmm0, %xmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: vpinsrb $14, %ecx, %xmm1, %xmm0
; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; KNL-NEXT: kmovw %k0, 2(%rdi)
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, (%rdi)
; KNL-NEXT: popq %rbx
; KNL-NEXT: popq %r12
; KNL-NEXT: popq %r13
; KNL-NEXT: popq %r14
; KNL-NEXT: popq %r15
; KNL-NEXT: popq %rbp
; KNL-NEXT: retq
;
; SKX-LABEL: store_64i1:

View File

@ -269,8 +269,6 @@ define i32 @test12_v32i32(<32 x i32> %a, <32 x i32> %b) nounwind {
; KNL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; KNL-NEXT: vpcmpeqd %zmm2, %zmm0, %k0
; KNL-NEXT: kshiftlw $14, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
@ -327,11 +325,13 @@ define i32 @test12_v32i32(<32 x i32> %a, <32 x i32> %b) nounwind {
; KNL-NEXT: kshiftlw $2, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $1, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; KNL-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@ -577,75 +577,75 @@ define i64 @test12_v64i16(<64 x i16> %a, <64 x i16> %b) nounwind {
; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; KNL-NEXT: vpcmpeqw %ymm6, %ymm2, %ymm0
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: vpcmpeqw %ymm6, %ymm2, %ymm1
; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
; KNL-NEXT: kshiftlw $14, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $15, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: vmovd %ecx, %xmm0
; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; KNL-NEXT: vmovd %ecx, %xmm1
; KNL-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $13, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $12, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $11, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $10, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $9, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $8, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $7, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $6, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $5, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $4, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $3, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $2, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $1, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm0
; KNL-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0

File diff suppressed because it is too large Load Diff

View File

@ -2750,23 +2750,23 @@ define <8 x i32> @test_mask_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask) {
; CHECK-NEXT: vpcmpgtb %ymm0, %ymm1, %k0 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x64,0xc0]
; CHECK-NEXT: kmovd %k0, %r9d ## encoding: [0xc5,0x7b,0x93,0xc8]
; CHECK-NEXT: vpcmpleb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x02]
; CHECK-NEXT: kmovd %k0, %r10d ## encoding: [0xc5,0x7b,0x93,0xd0]
; CHECK-NEXT: kxord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x47,0xc0]
; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0]
; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vpcmpleb %ymm0, %ymm1, %k0 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x3f,0xc0,0x02]
; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x64,0xc1]
; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0]
; CHECK-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
; CHECK-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x01]
; CHECK-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02]
; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04]
; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0]
; CHECK-NEXT: vpcmpleb %ymm0, %ymm1, %k0 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x3f,0xc0,0x02]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x64,0xc1]
; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
; CHECK-NEXT: vmovd %esi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
; CHECK-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01]
; CHECK-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x02]
; CHECK-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03]
; CHECK-NEXT: vmovd %r8d, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xc8]
; CHECK-NEXT: vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01]
; CHECK-NEXT: vpinsrd $2, %r10d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xca,0x02]
; CHECK-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xce,0x03]
; CHECK-NEXT: vpinsrd $2, %edx, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xca,0x02]
; CHECK-NEXT: kxord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x47,0xc0]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xc8,0x03]
; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask)
@ -2848,23 +2848,23 @@ define <8 x i32> @test_mask_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask)
; CHECK-NEXT: vpcmpltub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x01]
; CHECK-NEXT: kmovd %k0, %r9d ## encoding: [0xc5,0x7b,0x93,0xc8]
; CHECK-NEXT: vpcmpleub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x02]
; CHECK-NEXT: kmovd %k0, %r10d ## encoding: [0xc5,0x7b,0x93,0xd0]
; CHECK-NEXT: kxord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x47,0xc0]
; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0]
; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x05]
; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
; CHECK-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x06]
; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0]
; CHECK-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
; CHECK-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x01]
; CHECK-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02]
; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04]
; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0]
; CHECK-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x05]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x06]
; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
; CHECK-NEXT: vmovd %esi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
; CHECK-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01]
; CHECK-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x02]
; CHECK-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03]
; CHECK-NEXT: vmovd %r8d, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xc8]
; CHECK-NEXT: vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01]
; CHECK-NEXT: vpinsrd $2, %r10d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xca,0x02]
; CHECK-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xce,0x03]
; CHECK-NEXT: vpinsrd $2, %edx, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xca,0x02]
; CHECK-NEXT: kxord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x47,0xc0]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xc8,0x03]
; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask)

View File

@ -6,7 +6,6 @@ declare <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double>, i32,
define <2 x double>@test_int_x86_avx512_mask_vextractf64x2_512(<8 x double> %x0, <2 x double> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_vextractf64x2_512:
; CHECK: ## BB#0:
; CHECK-NEXT: vextractf32x4 $1, %zmm0, %xmm0
; CHECK-NEXT: kmovw %edi, %k0
; CHECK-NEXT: kshiftlb $7, %k0, %k1
; CHECK-NEXT: kshiftrb $7, %k1, %k1
@ -16,6 +15,7 @@ define <2 x double>@test_int_x86_avx512_mask_vextractf64x2_512(<8 x double> %x0,
; CHECK-NEXT: kmovw %k1, %ecx
; CHECK-NEXT: vmovd %ecx, %xmm2
; CHECK-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
; CHECK-NEXT: vextractf32x4 $1, %zmm0, %xmm0
; CHECK-NEXT: vpsllq $63, %xmm2, %xmm2
; CHECK-NEXT: vpsraq $63, %zmm2, %zmm2
; CHECK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1

View File

@ -314,8 +314,8 @@ define <4 x i64> @test256_11(<4 x i64> %x, <4 x i64>* %y.ptr, <4 x i64> %x1, <4
;
; NoVLX-LABEL: test256_11:
; NoVLX: # BB#0:
; NoVLX-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm2
; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm3
; NoVLX-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm2
; NoVLX-NEXT: vpand %ymm2, %ymm3, %ymm2
; NoVLX-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: retq
@ -824,8 +824,8 @@ define <2 x i64> @test128_11(<2 x i64> %x, <2 x i64>* %y.ptr, <2 x i64> %x1, <2
;
; NoVLX-LABEL: test128_11:
; NoVLX: # BB#0:
; NoVLX-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm2
; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm3
; NoVLX-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm2
; NoVLX-NEXT: vpand %xmm2, %xmm3, %xmm2
; NoVLX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: retq

File diff suppressed because it is too large Load Diff

View File

@ -20,10 +20,10 @@ define i16 @test_andn_i16(i16 zeroext %a0, i16 zeroext %a1, i16 *%a2) {
; HASWELL: # BB#0:
; HASWELL-NEXT: andnl %esi, %edi, %eax # sched: [1:0.50]
; HASWELL-NEXT: notl %edi # sched: [1:0.25]
; HASWELL-NEXT: andw (%rdx), %di # sched: [5:0.50]
; HASWELL-NEXT: andw (%rdx), %di # sched: [1:0.50]
; HASWELL-NEXT: addl %edi, %eax # sched: [1:0.25]
; HASWELL-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_andn_i16:
; BTVER2: # BB#0:
@ -61,9 +61,9 @@ define i32 @test_andn_i32(i32 %a0, i32 %a1, i32 *%a2) {
; HASWELL-LABEL: test_andn_i32:
; HASWELL: # BB#0:
; HASWELL-NEXT: andnl %esi, %edi, %ecx # sched: [1:0.50]
; HASWELL-NEXT: andnl (%rdx), %edi, %eax # sched: [4:0.50]
; HASWELL-NEXT: andnl (%rdx), %edi, %eax # sched: [1:0.50]
; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_andn_i32:
; BTVER2: # BB#0:
@ -97,9 +97,9 @@ define i64 @test_andn_i64(i64 %a0, i64 %a1, i64 *%a2) {
; HASWELL-LABEL: test_andn_i64:
; HASWELL: # BB#0:
; HASWELL-NEXT: andnq %rsi, %rdi, %rcx # sched: [1:0.50]
; HASWELL-NEXT: andnq (%rdx), %rdi, %rax # sched: [4:0.50]
; HASWELL-NEXT: andnq (%rdx), %rdi, %rax # sched: [1:0.50]
; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_andn_i64:
; BTVER2: # BB#0:
@ -132,10 +132,10 @@ define i32 @test_bextr_i32(i32 %a0, i32 %a1, i32 *%a2) {
;
; HASWELL-LABEL: test_bextr_i32:
; HASWELL: # BB#0:
; HASWELL-NEXT: bextrl %edi, (%rdx), %ecx # sched: [6:0.50]
; HASWELL-NEXT: bextrl %edi, (%rdx), %ecx # sched: [2:0.50]
; HASWELL-NEXT: bextrl %edi, %esi, %eax # sched: [2:0.50]
; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_bextr_i32:
; BTVER2: # BB#0:
@ -168,10 +168,10 @@ define i64 @test_bextr_i64(i64 %a0, i64 %a1, i64 *%a2) {
;
; HASWELL-LABEL: test_bextr_i64:
; HASWELL: # BB#0:
; HASWELL-NEXT: bextrq %rdi, (%rdx), %rcx # sched: [6:0.50]
; HASWELL-NEXT: bextrq %rdi, (%rdx), %rcx # sched: [2:0.50]
; HASWELL-NEXT: bextrq %rdi, %rsi, %rax # sched: [2:0.50]
; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_bextr_i64:
; BTVER2: # BB#0:
@ -204,10 +204,10 @@ define i32 @test_blsi_i32(i32 %a0, i32 *%a1) {
;
; HASWELL-LABEL: test_blsi_i32:
; HASWELL: # BB#0:
; HASWELL-NEXT: blsil (%rsi), %ecx # sched: [4:0.50]
; HASWELL-NEXT: blsil (%rsi), %ecx # sched: [1:0.50]
; HASWELL-NEXT: blsil %edi, %eax # sched: [1:0.50]
; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_blsi_i32:
; BTVER2: # BB#0:
@ -241,10 +241,10 @@ define i64 @test_blsi_i64(i64 %a0, i64 *%a1) {
;
; HASWELL-LABEL: test_blsi_i64:
; HASWELL: # BB#0:
; HASWELL-NEXT: blsiq (%rsi), %rcx # sched: [4:0.50]
; HASWELL-NEXT: blsiq (%rsi), %rcx # sched: [1:0.50]
; HASWELL-NEXT: blsiq %rdi, %rax # sched: [1:0.50]
; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_blsi_i64:
; BTVER2: # BB#0:
@ -278,10 +278,10 @@ define i32 @test_blsmsk_i32(i32 %a0, i32 *%a1) {
;
; HASWELL-LABEL: test_blsmsk_i32:
; HASWELL: # BB#0:
; HASWELL-NEXT: blsmskl (%rsi), %ecx # sched: [4:0.50]
; HASWELL-NEXT: blsmskl (%rsi), %ecx # sched: [1:0.50]
; HASWELL-NEXT: blsmskl %edi, %eax # sched: [1:0.50]
; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_blsmsk_i32:
; BTVER2: # BB#0:
@ -315,10 +315,10 @@ define i64 @test_blsmsk_i64(i64 %a0, i64 *%a1) {
;
; HASWELL-LABEL: test_blsmsk_i64:
; HASWELL: # BB#0:
; HASWELL-NEXT: blsmskq (%rsi), %rcx # sched: [4:0.50]
; HASWELL-NEXT: blsmskq (%rsi), %rcx # sched: [1:0.50]
; HASWELL-NEXT: blsmskq %rdi, %rax # sched: [1:0.50]
; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_blsmsk_i64:
; BTVER2: # BB#0:
@ -352,10 +352,10 @@ define i32 @test_blsr_i32(i32 %a0, i32 *%a1) {
;
; HASWELL-LABEL: test_blsr_i32:
; HASWELL: # BB#0:
; HASWELL-NEXT: blsrl (%rsi), %ecx # sched: [4:0.50]
; HASWELL-NEXT: blsrl (%rsi), %ecx # sched: [1:0.50]
; HASWELL-NEXT: blsrl %edi, %eax # sched: [1:0.50]
; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_blsr_i32:
; BTVER2: # BB#0:
@ -389,10 +389,10 @@ define i64 @test_blsr_i64(i64 %a0, i64 *%a1) {
;
; HASWELL-LABEL: test_blsr_i64:
; HASWELL: # BB#0:
; HASWELL-NEXT: blsrq (%rsi), %rcx # sched: [4:0.50]
; HASWELL-NEXT: blsrq (%rsi), %rcx # sched: [1:0.50]
; HASWELL-NEXT: blsrq %rdi, %rax # sched: [1:0.50]
; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_blsr_i64:
; BTVER2: # BB#0:
@ -427,11 +427,11 @@ define i16 @test_cttz_i16(i16 zeroext %a0, i16 *%a1) {
;
; HASWELL-LABEL: test_cttz_i16:
; HASWELL: # BB#0:
; HASWELL-NEXT: tzcntw (%rsi), %cx # sched: [7:1.00]
; HASWELL-NEXT: tzcntw (%rsi), %cx # sched: [3:1.00]
; HASWELL-NEXT: tzcntw %di, %ax # sched: [3:1.00]
; HASWELL-NEXT: orl %ecx, %eax # sched: [1:0.25]
; HASWELL-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_cttz_i16:
; BTVER2: # BB#0:
@ -466,10 +466,10 @@ define i32 @test_cttz_i32(i32 %a0, i32 *%a1) {
;
; HASWELL-LABEL: test_cttz_i32:
; HASWELL: # BB#0:
; HASWELL-NEXT: tzcntl (%rsi), %ecx # sched: [7:1.00]
; HASWELL-NEXT: tzcntl (%rsi), %ecx # sched: [3:1.00]
; HASWELL-NEXT: tzcntl %edi, %eax # sched: [3:1.00]
; HASWELL-NEXT: orl %ecx, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_cttz_i32:
; BTVER2: # BB#0:
@ -502,10 +502,10 @@ define i64 @test_cttz_i64(i64 %a0, i64 *%a1) {
;
; HASWELL-LABEL: test_cttz_i64:
; HASWELL: # BB#0:
; HASWELL-NEXT: tzcntq (%rsi), %rcx # sched: [7:1.00]
; HASWELL-NEXT: tzcntq (%rsi), %rcx # sched: [3:1.00]
; HASWELL-NEXT: tzcntq %rdi, %rax # sched: [3:1.00]
; HASWELL-NEXT: orq %rcx, %rax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_cttz_i64:
; BTVER2: # BB#0:

View File

@ -15,10 +15,10 @@ define i32 @test_bzhi_i32(i32 %a0, i32 %a1, i32 *%a2) {
;
; HASWELL-LABEL: test_bzhi_i32:
; HASWELL: # BB#0:
; HASWELL-NEXT: bzhil %edi, (%rdx), %ecx # sched: [4:0.50]
; HASWELL-NEXT: bzhil %edi, (%rdx), %ecx # sched: [1:0.50]
; HASWELL-NEXT: bzhil %edi, %esi, %eax # sched: [1:0.50]
; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; ZNVER1-LABEL: test_bzhi_i32:
; ZNVER1: # BB#0:
@ -44,10 +44,10 @@ define i64 @test_bzhi_i64(i64 %a0, i64 %a1, i64 *%a2) {
;
; HASWELL-LABEL: test_bzhi_i64:
; HASWELL: # BB#0:
; HASWELL-NEXT: bzhiq %rdi, (%rdx), %rcx # sched: [4:0.50]
; HASWELL-NEXT: bzhiq %rdi, (%rdx), %rcx # sched: [1:0.50]
; HASWELL-NEXT: bzhiq %rdi, %rsi, %rax # sched: [1:0.50]
; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; ZNVER1-LABEL: test_bzhi_i64:
; ZNVER1: # BB#0:
@ -80,9 +80,9 @@ define i64 @test_mulx_i64(i64 %a0, i64 %a1, i64 *%a2) {
; HASWELL-NEXT: movq %rdx, %rax # sched: [1:0.25]
; HASWELL-NEXT: movq %rdi, %rdx # sched: [1:0.25]
; HASWELL-NEXT: mulxq %rsi, %rsi, %rcx # sched: [4:1.00]
; HASWELL-NEXT: mulxq (%rax), %rdx, %rax # sched: [8:1.00]
; HASWELL-NEXT: mulxq (%rax), %rdx, %rax # sched: [4:1.00]
; HASWELL-NEXT: orq %rcx, %rax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; ZNVER1-LABEL: test_mulx_i64:
; ZNVER1: # BB#0:
@ -116,10 +116,10 @@ define i32 @test_pdep_i32(i32 %a0, i32 %a1, i32 *%a2) {
;
; HASWELL-LABEL: test_pdep_i32:
; HASWELL: # BB#0:
; HASWELL-NEXT: pdepl (%rdx), %edi, %ecx # sched: [7:1.00]
; HASWELL-NEXT: pdepl (%rdx), %edi, %ecx # sched: [3:1.00]
; HASWELL-NEXT: pdepl %esi, %edi, %eax # sched: [3:1.00]
; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; ZNVER1-LABEL: test_pdep_i32:
; ZNVER1: # BB#0:
@ -145,10 +145,10 @@ define i64 @test_pdep_i64(i64 %a0, i64 %a1, i64 *%a2) {
;
; HASWELL-LABEL: test_pdep_i64:
; HASWELL: # BB#0:
; HASWELL-NEXT: pdepq (%rdx), %rdi, %rcx # sched: [7:1.00]
; HASWELL-NEXT: pdepq (%rdx), %rdi, %rcx # sched: [3:1.00]
; HASWELL-NEXT: pdepq %rsi, %rdi, %rax # sched: [3:1.00]
; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; ZNVER1-LABEL: test_pdep_i64:
; ZNVER1: # BB#0:
@ -174,10 +174,10 @@ define i32 @test_pext_i32(i32 %a0, i32 %a1, i32 *%a2) {
;
; HASWELL-LABEL: test_pext_i32:
; HASWELL: # BB#0:
; HASWELL-NEXT: pextl (%rdx), %edi, %ecx # sched: [7:1.00]
; HASWELL-NEXT: pextl (%rdx), %edi, %ecx # sched: [3:1.00]
; HASWELL-NEXT: pextl %esi, %edi, %eax # sched: [3:1.00]
; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; ZNVER1-LABEL: test_pext_i32:
; ZNVER1: # BB#0:
@ -203,10 +203,10 @@ define i64 @test_pext_i64(i64 %a0, i64 %a1, i64 *%a2) {
;
; HASWELL-LABEL: test_pext_i64:
; HASWELL: # BB#0:
; HASWELL-NEXT: pextq (%rdx), %rdi, %rcx # sched: [7:1.00]
; HASWELL-NEXT: pextq (%rdx), %rdi, %rcx # sched: [3:1.00]
; HASWELL-NEXT: pextq %rsi, %rdi, %rax # sched: [3:1.00]
; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; ZNVER1-LABEL: test_pext_i64:
; ZNVER1: # BB#0:
@ -233,9 +233,9 @@ define i32 @test_rorx_i32(i32 %a0, i32 %a1, i32 *%a2) {
; HASWELL-LABEL: test_rorx_i32:
; HASWELL: # BB#0:
; HASWELL-NEXT: rorxl $5, %edi, %ecx # sched: [1:0.50]
; HASWELL-NEXT: rorxl $5, (%rdx), %eax # sched: [5:0.50]
; HASWELL-NEXT: rorxl $5, (%rdx), %eax # sched: [1:0.50]
; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; ZNVER1-LABEL: test_rorx_i32:
; ZNVER1: # BB#0:
@ -265,9 +265,9 @@ define i64 @test_rorx_i64(i64 %a0, i64 %a1, i64 *%a2) {
; HASWELL-LABEL: test_rorx_i64:
; HASWELL: # BB#0:
; HASWELL-NEXT: rorxq $5, %rdi, %rcx # sched: [1:0.50]
; HASWELL-NEXT: rorxq $5, (%rdx), %rax # sched: [5:0.50]
; HASWELL-NEXT: rorxq $5, (%rdx), %rax # sched: [1:0.50]
; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; ZNVER1-LABEL: test_rorx_i64:
; ZNVER1: # BB#0:
@ -297,9 +297,9 @@ define i32 @test_sarx_i32(i32 %a0, i32 %a1, i32 *%a2) {
; HASWELL-LABEL: test_sarx_i32:
; HASWELL: # BB#0:
; HASWELL-NEXT: sarxl %esi, %edi, %ecx # sched: [1:0.50]
; HASWELL-NEXT: sarxl %esi, (%rdx), %eax # sched: [5:0.50]
; HASWELL-NEXT: sarxl %esi, (%rdx), %eax # sched: [1:0.50]
; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; ZNVER1-LABEL: test_sarx_i32:
; ZNVER1: # BB#0:
@ -325,9 +325,9 @@ define i64 @test_sarx_i64(i64 %a0, i64 %a1, i64 *%a2) {
; HASWELL-LABEL: test_sarx_i64:
; HASWELL: # BB#0:
; HASWELL-NEXT: sarxq %rsi, %rdi, %rcx # sched: [1:0.50]
; HASWELL-NEXT: sarxq %rsi, (%rdx), %rax # sched: [5:0.50]
; HASWELL-NEXT: sarxq %rsi, (%rdx), %rax # sched: [1:0.50]
; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; ZNVER1-LABEL: test_sarx_i64:
; ZNVER1: # BB#0:
@ -353,9 +353,9 @@ define i32 @test_shlx_i32(i32 %a0, i32 %a1, i32 *%a2) {
; HASWELL-LABEL: test_shlx_i32:
; HASWELL: # BB#0:
; HASWELL-NEXT: shlxl %esi, %edi, %ecx # sched: [1:0.50]
; HASWELL-NEXT: shlxl %esi, (%rdx), %eax # sched: [5:0.50]
; HASWELL-NEXT: shlxl %esi, (%rdx), %eax # sched: [1:0.50]
; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; ZNVER1-LABEL: test_shlx_i32:
; ZNVER1: # BB#0:
@ -381,9 +381,9 @@ define i64 @test_shlx_i64(i64 %a0, i64 %a1, i64 *%a2) {
; HASWELL-LABEL: test_shlx_i64:
; HASWELL: # BB#0:
; HASWELL-NEXT: shlxq %rsi, %rdi, %rcx # sched: [1:0.50]
; HASWELL-NEXT: shlxq %rsi, (%rdx), %rax # sched: [5:0.50]
; HASWELL-NEXT: shlxq %rsi, (%rdx), %rax # sched: [1:0.50]
; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; ZNVER1-LABEL: test_shlx_i64:
; ZNVER1: # BB#0:
@ -409,9 +409,9 @@ define i32 @test_shrx_i32(i32 %a0, i32 %a1, i32 *%a2) {
; HASWELL-LABEL: test_shrx_i32:
; HASWELL: # BB#0:
; HASWELL-NEXT: shrxl %esi, %edi, %ecx # sched: [1:0.50]
; HASWELL-NEXT: shrxl %esi, (%rdx), %eax # sched: [5:0.50]
; HASWELL-NEXT: shrxl %esi, (%rdx), %eax # sched: [1:0.50]
; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; ZNVER1-LABEL: test_shrx_i32:
; ZNVER1: # BB#0:
@ -437,9 +437,9 @@ define i64 @test_shrx_i64(i64 %a0, i64 %a1, i64 *%a2) {
; HASWELL-LABEL: test_shrx_i64:
; HASWELL: # BB#0:
; HASWELL-NEXT: shrxq %rsi, %rdi, %rcx # sched: [1:0.50]
; HASWELL-NEXT: shrxq %rsi, (%rdx), %rax # sched: [5:0.50]
; HASWELL-NEXT: shrxq %rsi, (%rdx), %rax # sched: [1:0.50]
; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; ZNVER1-LABEL: test_shrx_i64:
; ZNVER1: # BB#0:

View File

@ -23,10 +23,10 @@ define <4 x float> @test_vcvtph2ps_128(<8 x i16> %a0, <8 x i16> *%a1) {
;
; HASWELL-LABEL: test_vcvtph2ps_128:
; HASWELL: # BB#0:
; HASWELL-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [7:1.00]
; HASWELL-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [4:1.00]
; HASWELL-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [1:1.00]
; HASWELL-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [2:1.00]
; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_vcvtph2ps_128:
; BTVER2: # BB#0:
@ -66,10 +66,10 @@ define <8 x float> @test_vcvtph2ps_256(<8 x i16> %a0, <8 x i16> *%a1) {
;
; HASWELL-LABEL: test_vcvtph2ps_256:
; HASWELL: # BB#0:
; HASWELL-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [7:1.00]
; HASWELL-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [4:1.00]
; HASWELL-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [1:1.00]
; HASWELL-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [2:1.00]
; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_vcvtph2ps_256:
; BTVER2: # BB#0:
@ -108,8 +108,8 @@ define <8 x i16> @test_vcvtps2ph_128(<4 x float> %a0, <4 x float> %a1, <4 x i16>
; HASWELL-LABEL: test_vcvtps2ph_128:
; HASWELL: # BB#0:
; HASWELL-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [4:1.00]
; HASWELL-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [8:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [4:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_vcvtps2ph_128:
; BTVER2: # BB#0:
@ -147,10 +147,10 @@ define <8 x i16> @test_vcvtps2ph_256(<8 x float> %a0, <8 x float> %a1, <8 x i16>
;
; HASWELL-LABEL: test_vcvtps2ph_256:
; HASWELL: # BB#0:
; HASWELL-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [4:1.00]
; HASWELL-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [8:1.00]
; HASWELL-NEXT: vzeroupper # sched: [1:?]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [6:1.00]
; HASWELL-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [6:1.00]
; HASWELL-NEXT: vzeroupper # sched: [4:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_vcvtps2ph_256:
; BTVER2: # BB#0:

View File

@ -45,7 +45,7 @@ define i32 @test_lea_offset(i32) {
; HASWELL: # BB#0:
; HASWELL-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; HASWELL-NEXT: leal -24(%rdi), %eax # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_lea_offset:
; BTVER2: # BB#0:
@ -97,7 +97,7 @@ define i32 @test_lea_offset_big(i32) {
; HASWELL: # BB#0:
; HASWELL-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; HASWELL-NEXT: leal 1024(%rdi), %eax # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_lea_offset_big:
; BTVER2: # BB#0:
@ -155,7 +155,7 @@ define i32 @test_lea_add(i32, i32) {
; HASWELL-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; HASWELL-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; HASWELL-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_lea_add:
; BTVER2: # BB#0:
@ -217,7 +217,7 @@ define i32 @test_lea_add_offset(i32, i32) {
; HASWELL-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; HASWELL-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50]
; HASWELL-NEXT: addl $16, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_lea_add_offset:
; BTVER2: # BB#0:
@ -283,7 +283,7 @@ define i32 @test_lea_add_offset_big(i32, i32) {
; HASWELL-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50]
; HASWELL-NEXT: addl $-4096, %eax # imm = 0xF000
; HASWELL-NEXT: # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_lea_add_offset_big:
; BTVER2: # BB#0:
@ -338,7 +338,7 @@ define i32 @test_lea_mul(i32) {
; HASWELL: # BB#0:
; HASWELL-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; HASWELL-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_lea_mul:
; BTVER2: # BB#0:
@ -393,7 +393,7 @@ define i32 @test_lea_mul_offset(i32) {
; HASWELL-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; HASWELL-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
; HASWELL-NEXT: addl $-32, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_lea_mul_offset:
; BTVER2: # BB#0:
@ -452,7 +452,7 @@ define i32 @test_lea_mul_offset_big(i32) {
; HASWELL-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
; HASWELL-NEXT: addl $10000, %eax # imm = 0x2710
; HASWELL-NEXT: # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_lea_mul_offset_big:
; BTVER2: # BB#0:
@ -510,7 +510,7 @@ define i32 @test_lea_add_scale(i32, i32) {
; HASWELL-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; HASWELL-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; HASWELL-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_lea_add_scale:
; BTVER2: # BB#0:
@ -573,7 +573,7 @@ define i32 @test_lea_add_scale_offset(i32, i32) {
; HASWELL-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; HASWELL-NEXT: leal (%rdi,%rsi,4), %eax # sched: [1:0.50]
; HASWELL-NEXT: addl $96, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_lea_add_scale_offset:
; BTVER2: # BB#0:
@ -640,7 +640,7 @@ define i32 @test_lea_add_scale_offset_big(i32, i32) {
; HASWELL-NEXT: leal (%rdi,%rsi,8), %eax # sched: [1:0.50]
; HASWELL-NEXT: addl $-1200, %eax # imm = 0xFB50
; HASWELL-NEXT: # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_lea_add_scale_offset_big:
; BTVER2: # BB#0:

View File

@ -40,7 +40,7 @@ define i64 @test_lea_offset(i64) {
; HASWELL-LABEL: test_lea_offset:
; HASWELL: # BB#0:
; HASWELL-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_lea_offset:
; BTVER2: # BB#0:
@ -85,7 +85,7 @@ define i64 @test_lea_offset_big(i64) {
; HASWELL-LABEL: test_lea_offset_big:
; HASWELL: # BB#0:
; HASWELL-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_lea_offset_big:
; BTVER2: # BB#0:
@ -131,7 +131,7 @@ define i64 @test_lea_add(i64, i64) {
; HASWELL-LABEL: test_lea_add:
; HASWELL: # BB#0:
; HASWELL-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_lea_add:
; BTVER2: # BB#0:
@ -179,7 +179,7 @@ define i64 @test_lea_add_offset(i64, i64) {
; HASWELL: # BB#0:
; HASWELL-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50]
; HASWELL-NEXT: addq $16, %rax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_lea_add_offset:
; BTVER2: # BB#0:
@ -231,7 +231,7 @@ define i64 @test_lea_add_offset_big(i64, i64) {
; HASWELL-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50]
; HASWELL-NEXT: addq $-4096, %rax # imm = 0xF000
; HASWELL-NEXT: # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_lea_add_offset_big:
; BTVER2: # BB#0:
@ -277,7 +277,7 @@ define i64 @test_lea_mul(i64) {
; HASWELL-LABEL: test_lea_mul:
; HASWELL: # BB#0:
; HASWELL-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_lea_mul:
; BTVER2: # BB#0:
@ -325,7 +325,7 @@ define i64 @test_lea_mul_offset(i64) {
; HASWELL: # BB#0:
; HASWELL-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
; HASWELL-NEXT: addq $-32, %rax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_lea_mul_offset:
; BTVER2: # BB#0:
@ -377,7 +377,7 @@ define i64 @test_lea_mul_offset_big(i64) {
; HASWELL-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
; HASWELL-NEXT: addq $10000, %rax # imm = 0x2710
; HASWELL-NEXT: # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_lea_mul_offset_big:
; BTVER2: # BB#0:
@ -423,7 +423,7 @@ define i64 @test_lea_add_scale(i64, i64) {
; HASWELL-LABEL: test_lea_add_scale:
; HASWELL: # BB#0:
; HASWELL-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_lea_add_scale:
; BTVER2: # BB#0:
@ -472,7 +472,7 @@ define i64 @test_lea_add_scale_offset(i64, i64) {
; HASWELL: # BB#0:
; HASWELL-NEXT: leaq (%rdi,%rsi,4), %rax # sched: [1:0.50]
; HASWELL-NEXT: addq $96, %rax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_lea_add_scale_offset:
; BTVER2: # BB#0:
@ -525,7 +525,7 @@ define i64 @test_lea_add_scale_offset_big(i64, i64) {
; HASWELL-NEXT: leaq (%rdi,%rsi,8), %rax # sched: [1:0.50]
; HASWELL-NEXT: addq $-1200, %rax # imm = 0xFB50
; HASWELL-NEXT: # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_lea_add_scale_offset_big:
; BTVER2: # BB#0:

View File

@ -17,11 +17,11 @@ define i16 @test_ctlz_i16(i16 zeroext %a0, i16 *%a1) {
;
; HASWELL-LABEL: test_ctlz_i16:
; HASWELL: # BB#0:
; HASWELL-NEXT: lzcntw (%rsi), %cx
; HASWELL-NEXT: lzcntw %di, %ax
; HASWELL-NEXT: lzcntw (%rsi), %cx # sched: [3:1.00]
; HASWELL-NEXT: lzcntw %di, %ax # sched: [3:1.00]
; HASWELL-NEXT: orl %ecx, %eax # sched: [1:0.25]
; HASWELL-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_ctlz_i16:
; BTVER2: # BB#0:
@ -56,10 +56,10 @@ define i32 @test_ctlz_i32(i32 %a0, i32 *%a1) {
;
; HASWELL-LABEL: test_ctlz_i32:
; HASWELL: # BB#0:
; HASWELL-NEXT: lzcntl (%rsi), %ecx
; HASWELL-NEXT: lzcntl %edi, %eax
; HASWELL-NEXT: lzcntl (%rsi), %ecx # sched: [3:1.00]
; HASWELL-NEXT: lzcntl %edi, %eax # sched: [3:1.00]
; HASWELL-NEXT: orl %ecx, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_ctlz_i32:
; BTVER2: # BB#0:
@ -92,10 +92,10 @@ define i64 @test_ctlz_i64(i64 %a0, i64 *%a1) {
;
; HASWELL-LABEL: test_ctlz_i64:
; HASWELL: # BB#0:
; HASWELL-NEXT: lzcntq (%rsi), %rcx
; HASWELL-NEXT: lzcntq %rdi, %rax
; HASWELL-NEXT: lzcntq (%rsi), %rcx # sched: [3:1.00]
; HASWELL-NEXT: lzcntq %rdi, %rax # sched: [3:1.00]
; HASWELL-NEXT: orq %rcx, %rax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_ctlz_i64:
; BTVER2: # BB#0:

View File

@ -33,9 +33,9 @@ define i16 @test_ctlz_i16(i16 *%a0, i16 %a1, i16 *%a2) {
;
; HASWELL-LABEL: test_ctlz_i16:
; HASWELL: # BB#0:
; HASWELL-NEXT: movbew (%rdi), %ax # sched: [6:0.50]
; HASWELL-NEXT: movbew (%rdi), %ax # sched: [1:0.50]
; HASWELL-NEXT: movbew %si, (%rdx) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_ctlz_i16:
; BTVER2: # BB#0:
@ -83,7 +83,7 @@ define i32 @test_ctlz_i32(i32 *%a0, i32 %a1, i32 *%a2) {
; HASWELL: # BB#0:
; HASWELL-NEXT: movbel (%rdi), %eax # sched: [1:0.50]
; HASWELL-NEXT: movbel %esi, (%rdx) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_ctlz_i32:
; BTVER2: # BB#0:
@ -129,9 +129,9 @@ define i64 @test_ctlz_i64(i64 *%a0, i64 %a1, i64 *%a2) {
;
; HASWELL-LABEL: test_ctlz_i64:
; HASWELL: # BB#0:
; HASWELL-NEXT: movbeq (%rdi), %rax # sched: [6:0.50]
; HASWELL-NEXT: movbeq (%rdi), %rax # sched: [1:0.50]
; HASWELL-NEXT: movbeq %rsi, (%rdx) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_ctlz_i64:
; BTVER2: # BB#0:

View File

@ -17,7 +17,7 @@ define i32 @test_mul_by_1(i32 %x) {
; X64-HSW-LABEL: test_mul_by_1:
; X64-HSW: # BB#0:
; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_1:
; X64-JAG: # BB#0:
@ -32,7 +32,7 @@ define i32 @test_mul_by_1(i32 %x) {
; HSW-NOOPT-LABEL: test_mul_by_1:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_1:
; JAG-NOOPT: # BB#0:
@ -63,7 +63,7 @@ define i32 @test_mul_by_2(i32 %x) {
; X64-HSW: # BB#0:
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_2:
; X64-JAG: # BB#0:
@ -81,7 +81,7 @@ define i32 @test_mul_by_2(i32 %x) {
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; HSW-NOOPT-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_2:
; JAG-NOOPT: # BB#0:
@ -114,7 +114,7 @@ define i32 @test_mul_by_3(i32 %x) {
; X64-HSW: # BB#0:
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_3:
; X64-JAG: # BB#0:
@ -131,7 +131,7 @@ define i32 @test_mul_by_3(i32 %x) {
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; HSW-NOOPT-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_3:
; JAG-NOOPT: # BB#0:
@ -165,7 +165,7 @@ define i32 @test_mul_by_4(i32 %x) {
; X64-HSW: # BB#0:
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_4:
; X64-JAG: # BB#0:
@ -183,7 +183,7 @@ define i32 @test_mul_by_4(i32 %x) {
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; HSW-NOOPT-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_4:
; JAG-NOOPT: # BB#0:
@ -216,7 +216,7 @@ define i32 @test_mul_by_5(i32 %x) {
; X64-HSW: # BB#0:
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_5:
; X64-JAG: # BB#0:
@ -233,7 +233,7 @@ define i32 @test_mul_by_5(i32 %x) {
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; HSW-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_5:
; JAG-NOOPT: # BB#0:
@ -269,7 +269,7 @@ define i32 @test_mul_by_6(i32 %x) {
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25]
; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_6:
; X64-JAG: # BB#0:
@ -285,8 +285,8 @@ define i32 @test_mul_by_6(i32 %x) {
;
; HSW-NOOPT-LABEL: test_mul_by_6:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $6, %edi, %eax # sched: [4:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: imull $6, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_6:
; JAG-NOOPT: # BB#0:
@ -321,7 +321,7 @@ define i32 @test_mul_by_7(i32 %x) {
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50]
; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_7:
; X64-JAG: # BB#0:
@ -337,8 +337,8 @@ define i32 @test_mul_by_7(i32 %x) {
;
; HSW-NOOPT-LABEL: test_mul_by_7:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $7, %edi, %eax # sched: [4:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: imull $7, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_7:
; JAG-NOOPT: # BB#0:
@ -371,7 +371,7 @@ define i32 @test_mul_by_8(i32 %x) {
; X64-HSW: # BB#0:
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_8:
; X64-JAG: # BB#0:
@ -389,7 +389,7 @@ define i32 @test_mul_by_8(i32 %x) {
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; HSW-NOOPT-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_8:
; JAG-NOOPT: # BB#0:
@ -422,7 +422,7 @@ define i32 @test_mul_by_9(i32 %x) {
; X64-HSW: # BB#0:
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_9:
; X64-JAG: # BB#0:
@ -439,7 +439,7 @@ define i32 @test_mul_by_9(i32 %x) {
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; HSW-NOOPT-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_9:
; JAG-NOOPT: # BB#0:
@ -475,7 +475,7 @@ define i32 @test_mul_by_10(i32 %x) {
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25]
; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_10:
; X64-JAG: # BB#0:
@ -491,8 +491,8 @@ define i32 @test_mul_by_10(i32 %x) {
;
; HSW-NOOPT-LABEL: test_mul_by_10:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $10, %edi, %eax # sched: [4:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: imull $10, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_10:
; JAG-NOOPT: # BB#0:
@ -527,7 +527,7 @@ define i32 @test_mul_by_11(i32 %x) {
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
; X64-HSW-NEXT: leal (%rdi,%rax,2), %eax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_11:
; X64-JAG: # BB#0:
@ -543,8 +543,8 @@ define i32 @test_mul_by_11(i32 %x) {
;
; HSW-NOOPT-LABEL: test_mul_by_11:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $11, %edi, %eax # sched: [4:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: imull $11, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_11:
; JAG-NOOPT: # BB#0:
@ -577,7 +577,7 @@ define i32 @test_mul_by_12(i32 %x) {
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: shll $2, %edi # sched: [1:0.50]
; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_12:
; X64-JAG: # BB#0:
@ -593,8 +593,8 @@ define i32 @test_mul_by_12(i32 %x) {
;
; HSW-NOOPT-LABEL: test_mul_by_12:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $12, %edi, %eax # sched: [4:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: imull $12, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_12:
; JAG-NOOPT: # BB#0:
@ -629,7 +629,7 @@ define i32 @test_mul_by_13(i32 %x) {
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_13:
; X64-JAG: # BB#0:
@ -645,8 +645,8 @@ define i32 @test_mul_by_13(i32 %x) {
;
; HSW-NOOPT-LABEL: test_mul_by_13:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $13, %edi, %eax # sched: [4:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: imull $13, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_13:
; JAG-NOOPT: # BB#0:
@ -681,7 +681,7 @@ define i32 @test_mul_by_14(i32 %x) {
; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_14:
; X64-JAG: # BB#0:
@ -698,8 +698,8 @@ define i32 @test_mul_by_14(i32 %x) {
;
; HSW-NOOPT-LABEL: test_mul_by_14:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $14, %edi, %eax # sched: [4:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: imull $14, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_14:
; JAG-NOOPT: # BB#0:
@ -732,7 +732,7 @@ define i32 @test_mul_by_15(i32 %x) {
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_15:
; X64-JAG: # BB#0:
@ -748,8 +748,8 @@ define i32 @test_mul_by_15(i32 %x) {
;
; HSW-NOOPT-LABEL: test_mul_by_15:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $15, %edi, %eax # sched: [4:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: imull $15, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_15:
; JAG-NOOPT: # BB#0:
@ -782,7 +782,7 @@ define i32 @test_mul_by_16(i32 %x) {
; X64-HSW: # BB#0:
; X64-HSW-NEXT: shll $4, %edi # sched: [1:0.50]
; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_16:
; X64-JAG: # BB#0:
@ -800,7 +800,7 @@ define i32 @test_mul_by_16(i32 %x) {
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: shll $4, %edi # sched: [1:0.50]
; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_16:
; JAG-NOOPT: # BB#0:
@ -838,7 +838,7 @@ define i32 @test_mul_by_17(i32 %x) {
; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
; X64-HSW-NEXT: shll $4, %eax # sched: [1:0.50]
; X64-HSW-NEXT: leal (%rax,%rdi), %eax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_17:
; X64-JAG: # BB#0:
@ -855,8 +855,8 @@ define i32 @test_mul_by_17(i32 %x) {
;
; HSW-NOOPT-LABEL: test_mul_by_17:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $17, %edi, %eax # sched: [4:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: imull $17, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_17:
; JAG-NOOPT: # BB#0:
@ -892,7 +892,7 @@ define i32 @test_mul_by_18(i32 %x) {
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25]
; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_18:
; X64-JAG: # BB#0:
@ -908,8 +908,8 @@ define i32 @test_mul_by_18(i32 %x) {
;
; HSW-NOOPT-LABEL: test_mul_by_18:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $18, %edi, %eax # sched: [4:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: imull $18, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_18:
; JAG-NOOPT: # BB#0:
@ -946,7 +946,7 @@ define i32 @test_mul_by_19(i32 %x) {
; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
; X64-HSW-NEXT: shll $2, %eax # sched: [1:0.50]
; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_19:
; X64-JAG: # BB#0:
@ -963,8 +963,8 @@ define i32 @test_mul_by_19(i32 %x) {
;
; HSW-NOOPT-LABEL: test_mul_by_19:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $19, %edi, %eax # sched: [4:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: imull $19, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_19:
; JAG-NOOPT: # BB#0:
@ -997,7 +997,7 @@ define i32 @test_mul_by_20(i32 %x) {
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: shll $2, %edi # sched: [1:0.50]
; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_20:
; X64-JAG: # BB#0:
@ -1013,8 +1013,8 @@ define i32 @test_mul_by_20(i32 %x) {
;
; HSW-NOOPT-LABEL: test_mul_by_20:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $20, %edi, %eax # sched: [4:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: imull $20, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_20:
; JAG-NOOPT: # BB#0:
@ -1049,7 +1049,7 @@ define i32 @test_mul_by_21(i32 %x) {
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_21:
; X64-JAG: # BB#0:
@ -1065,8 +1065,8 @@ define i32 @test_mul_by_21(i32 %x) {
;
; HSW-NOOPT-LABEL: test_mul_by_21:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $21, %edi, %eax # sched: [4:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: imull $21, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_21:
; JAG-NOOPT: # BB#0:
@ -1101,7 +1101,7 @@ define i32 @test_mul_by_22(i32 %x) {
; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_22:
; X64-JAG: # BB#0:
@ -1118,8 +1118,8 @@ define i32 @test_mul_by_22(i32 %x) {
;
; HSW-NOOPT-LABEL: test_mul_by_22:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $22, %edi, %eax # sched: [4:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: imull $22, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_22:
; JAG-NOOPT: # BB#0:
@ -1154,7 +1154,7 @@ define i32 @test_mul_by_23(i32 %x) {
; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
; X64-HSW-NEXT: shll $3, %eax # sched: [1:0.50]
; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_23:
; X64-JAG: # BB#0:
@ -1171,8 +1171,8 @@ define i32 @test_mul_by_23(i32 %x) {
;
; HSW-NOOPT-LABEL: test_mul_by_23:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $23, %edi, %eax # sched: [4:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: imull $23, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_23:
; JAG-NOOPT: # BB#0:
@ -1205,7 +1205,7 @@ define i32 @test_mul_by_24(i32 %x) {
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: shll $3, %edi # sched: [1:0.50]
; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_24:
; X64-JAG: # BB#0:
@ -1221,8 +1221,8 @@ define i32 @test_mul_by_24(i32 %x) {
;
; HSW-NOOPT-LABEL: test_mul_by_24:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $24, %edi, %eax # sched: [4:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: imull $24, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_24:
; JAG-NOOPT: # BB#0:
@ -1257,7 +1257,7 @@ define i32 @test_mul_by_25(i32 %x) {
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
; X64-HSW-NEXT: leal (%rax,%rax,4), %eax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_25:
; X64-JAG: # BB#0:
@ -1273,8 +1273,8 @@ define i32 @test_mul_by_25(i32 %x) {
;
; HSW-NOOPT-LABEL: test_mul_by_25:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $25, %edi, %eax # sched: [4:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: imull $25, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_25:
; JAG-NOOPT: # BB#0:
@ -1311,7 +1311,7 @@ define i32 @test_mul_by_26(i32 %x) {
; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_26:
; X64-JAG: # BB#0:
@ -1328,8 +1328,8 @@ define i32 @test_mul_by_26(i32 %x) {
;
; HSW-NOOPT-LABEL: test_mul_by_26:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $26, %edi, %eax # sched: [4:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: imull $26, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_26:
; JAG-NOOPT: # BB#0:
@ -1362,7 +1362,7 @@ define i32 @test_mul_by_27(i32 %x) {
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_27:
; X64-JAG: # BB#0:
@ -1378,8 +1378,8 @@ define i32 @test_mul_by_27(i32 %x) {
;
; HSW-NOOPT-LABEL: test_mul_by_27:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $27, %edi, %eax # sched: [4:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: imull $27, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_27:
; JAG-NOOPT: # BB#0:
@ -1416,7 +1416,7 @@ define i32 @test_mul_by_28(i32 %x) {
; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_28:
; X64-JAG: # BB#0:
@ -1433,8 +1433,8 @@ define i32 @test_mul_by_28(i32 %x) {
;
; HSW-NOOPT-LABEL: test_mul_by_28:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $28, %edi, %eax # sched: [4:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: imull $28, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_28:
; JAG-NOOPT: # BB#0:
@ -1471,7 +1471,7 @@ define i32 @test_mul_by_29(i32 %x) {
; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_29:
; X64-JAG: # BB#0:
@ -1489,8 +1489,8 @@ define i32 @test_mul_by_29(i32 %x) {
;
; HSW-NOOPT-LABEL: test_mul_by_29:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $29, %edi, %eax # sched: [4:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: imull $29, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_29:
; JAG-NOOPT: # BB#0:
@ -1526,7 +1526,7 @@ define i32 @test_mul_by_30(i32 %x) {
; X64-HSW-NEXT: shll $5, %eax # sched: [1:0.50]
; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_30:
; X64-JAG: # BB#0:
@ -1543,8 +1543,8 @@ define i32 @test_mul_by_30(i32 %x) {
;
; HSW-NOOPT-LABEL: test_mul_by_30:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $30, %edi, %eax # sched: [4:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: imull $30, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_30:
; JAG-NOOPT: # BB#0:
@ -1578,7 +1578,7 @@ define i32 @test_mul_by_31(i32 %x) {
; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
; X64-HSW-NEXT: shll $5, %eax # sched: [1:0.50]
; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_31:
; X64-JAG: # BB#0:
@ -1594,8 +1594,8 @@ define i32 @test_mul_by_31(i32 %x) {
;
; HSW-NOOPT-LABEL: test_mul_by_31:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $31, %edi, %eax # sched: [4:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: imull $31, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_31:
; JAG-NOOPT: # BB#0:
@ -1628,7 +1628,7 @@ define i32 @test_mul_by_32(i32 %x) {
; X64-HSW: # BB#0:
; X64-HSW-NEXT: shll $5, %edi # sched: [1:0.50]
; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_32:
; X64-JAG: # BB#0:
@ -1646,7 +1646,7 @@ define i32 @test_mul_by_32(i32 %x) {
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: shll $5, %edi # sched: [1:0.50]
; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_32:
; JAG-NOOPT: # BB#0:
@ -1687,7 +1687,7 @@ define i32 @test_mul_spec(i32 %x) nounwind {
; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
; X64-HSW-NEXT: addl $2, %eax # sched: [1:0.25]
; X64-HSW-NEXT: imull %ecx, %eax # sched: [4:1.00]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_spec:
; X64-JAG: # BB#0:
@ -1713,7 +1713,7 @@ define i32 @test_mul_spec(i32 %x) nounwind {
; HSW-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
; HSW-NOOPT-NEXT: addl $2, %eax # sched: [1:0.25]
; HSW-NOOPT-NEXT: imull %ecx, %eax # sched: [4:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_spec:
; JAG-NOOPT: # BB#0:

View File

@ -18,7 +18,7 @@ define i64 @test_mul_by_1(i64 %x) nounwind {
; X64-HSW-LABEL: test_mul_by_1:
; X64-HSW: # BB#0:
; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_1:
; X64-JAG: # BB#0:
@ -34,7 +34,7 @@ define i64 @test_mul_by_1(i64 %x) nounwind {
; HSW-NOOPT-LABEL: test_mul_by_1:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_1:
; JAG-NOOPT: # BB#0:
@ -66,7 +66,7 @@ define i64 @test_mul_by_2(i64 %x) {
; X64-HSW-LABEL: test_mul_by_2:
; X64-HSW: # BB#0:
; X64-HSW-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_2:
; X64-JAG: # BB#0:
@ -84,7 +84,7 @@ define i64 @test_mul_by_2(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_2:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_2:
; JAG-NOOPT: # BB#0:
@ -116,7 +116,7 @@ define i64 @test_mul_by_3(i64 %x) {
; X64-HSW-LABEL: test_mul_by_3:
; X64-HSW: # BB#0:
; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_3:
; X64-JAG: # BB#0:
@ -134,7 +134,7 @@ define i64 @test_mul_by_3(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_3:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_3:
; JAG-NOOPT: # BB#0:
@ -166,7 +166,7 @@ define i64 @test_mul_by_4(i64 %x) {
; X64-HSW-LABEL: test_mul_by_4:
; X64-HSW: # BB#0:
; X64-HSW-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_4:
; X64-JAG: # BB#0:
@ -184,7 +184,7 @@ define i64 @test_mul_by_4(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_4:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_4:
; JAG-NOOPT: # BB#0:
@ -216,7 +216,7 @@ define i64 @test_mul_by_5(i64 %x) {
; X64-HSW-LABEL: test_mul_by_5:
; X64-HSW: # BB#0:
; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_5:
; X64-JAG: # BB#0:
@ -234,7 +234,7 @@ define i64 @test_mul_by_5(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_5:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_5:
; JAG-NOOPT: # BB#0:
@ -268,7 +268,7 @@ define i64 @test_mul_by_6(i64 %x) {
; X64-HSW: # BB#0:
; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25]
; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_6:
; X64-JAG: # BB#0:
@ -287,7 +287,7 @@ define i64 @test_mul_by_6(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_6:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $6, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_6:
; JAG-NOOPT: # BB#0:
@ -323,7 +323,7 @@ define i64 @test_mul_by_7(i64 %x) {
; X64-HSW: # BB#0:
; X64-HSW-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50]
; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_7:
; X64-JAG: # BB#0:
@ -342,7 +342,7 @@ define i64 @test_mul_by_7(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_7:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $7, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_7:
; JAG-NOOPT: # BB#0:
@ -375,7 +375,7 @@ define i64 @test_mul_by_8(i64 %x) {
; X64-HSW-LABEL: test_mul_by_8:
; X64-HSW: # BB#0:
; X64-HSW-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_8:
; X64-JAG: # BB#0:
@ -393,7 +393,7 @@ define i64 @test_mul_by_8(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_8:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_8:
; JAG-NOOPT: # BB#0:
@ -425,7 +425,7 @@ define i64 @test_mul_by_9(i64 %x) {
; X64-HSW-LABEL: test_mul_by_9:
; X64-HSW: # BB#0:
; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_9:
; X64-JAG: # BB#0:
@ -443,7 +443,7 @@ define i64 @test_mul_by_9(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_9:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_9:
; JAG-NOOPT: # BB#0:
@ -477,7 +477,7 @@ define i64 @test_mul_by_10(i64 %x) {
; X64-HSW: # BB#0:
; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25]
; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_10:
; X64-JAG: # BB#0:
@ -496,7 +496,7 @@ define i64 @test_mul_by_10(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_10:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $10, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_10:
; JAG-NOOPT: # BB#0:
@ -532,7 +532,7 @@ define i64 @test_mul_by_11(i64 %x) {
; X64-HSW: # BB#0:
; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
; X64-HSW-NEXT: leaq (%rdi,%rax,2), %rax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_11:
; X64-JAG: # BB#0:
@ -551,7 +551,7 @@ define i64 @test_mul_by_11(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_11:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $11, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_11:
; JAG-NOOPT: # BB#0:
@ -585,7 +585,7 @@ define i64 @test_mul_by_12(i64 %x) {
; X64-HSW: # BB#0:
; X64-HSW-NEXT: shlq $2, %rdi # sched: [1:0.50]
; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_12:
; X64-JAG: # BB#0:
@ -604,7 +604,7 @@ define i64 @test_mul_by_12(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_12:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $12, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_12:
; JAG-NOOPT: # BB#0:
@ -640,7 +640,7 @@ define i64 @test_mul_by_13(i64 %x) {
; X64-HSW: # BB#0:
; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_13:
; X64-JAG: # BB#0:
@ -659,7 +659,7 @@ define i64 @test_mul_by_13(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_13:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $13, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_13:
; JAG-NOOPT: # BB#0:
@ -696,7 +696,7 @@ define i64 @test_mul_by_14(i64 %x) {
; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_14:
; X64-JAG: # BB#0:
@ -716,7 +716,7 @@ define i64 @test_mul_by_14(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_14:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $14, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_14:
; JAG-NOOPT: # BB#0:
@ -751,7 +751,7 @@ define i64 @test_mul_by_15(i64 %x) {
; X64-HSW: # BB#0:
; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_15:
; X64-JAG: # BB#0:
@ -770,7 +770,7 @@ define i64 @test_mul_by_15(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_15:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $15, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_15:
; JAG-NOOPT: # BB#0:
@ -804,7 +804,7 @@ define i64 @test_mul_by_16(i64 %x) {
; X64-HSW: # BB#0:
; X64-HSW-NEXT: shlq $4, %rdi # sched: [1:0.50]
; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_16:
; X64-JAG: # BB#0:
@ -824,7 +824,7 @@ define i64 @test_mul_by_16(i64 %x) {
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: shlq $4, %rdi # sched: [1:0.50]
; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_16:
; JAG-NOOPT: # BB#0:
@ -864,7 +864,7 @@ define i64 @test_mul_by_17(i64 %x) {
; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
; X64-HSW-NEXT: shlq $4, %rax # sched: [1:0.50]
; X64-HSW-NEXT: leaq (%rax,%rdi), %rax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_17:
; X64-JAG: # BB#0:
@ -884,7 +884,7 @@ define i64 @test_mul_by_17(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_17:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $17, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_17:
; JAG-NOOPT: # BB#0:
@ -920,7 +920,7 @@ define i64 @test_mul_by_18(i64 %x) {
; X64-HSW: # BB#0:
; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25]
; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_18:
; X64-JAG: # BB#0:
@ -939,7 +939,7 @@ define i64 @test_mul_by_18(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_18:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $18, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_18:
; JAG-NOOPT: # BB#0:
@ -977,7 +977,7 @@ define i64 @test_mul_by_19(i64 %x) {
; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
; X64-HSW-NEXT: shlq $2, %rax # sched: [1:0.50]
; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_19:
; X64-JAG: # BB#0:
@ -997,7 +997,7 @@ define i64 @test_mul_by_19(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_19:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $19, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_19:
; JAG-NOOPT: # BB#0:
@ -1031,7 +1031,7 @@ define i64 @test_mul_by_20(i64 %x) {
; X64-HSW: # BB#0:
; X64-HSW-NEXT: shlq $2, %rdi # sched: [1:0.50]
; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_20:
; X64-JAG: # BB#0:
@ -1050,7 +1050,7 @@ define i64 @test_mul_by_20(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_20:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $20, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_20:
; JAG-NOOPT: # BB#0:
@ -1086,7 +1086,7 @@ define i64 @test_mul_by_21(i64 %x) {
; X64-HSW: # BB#0:
; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_21:
; X64-JAG: # BB#0:
@ -1105,7 +1105,7 @@ define i64 @test_mul_by_21(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_21:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $21, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_21:
; JAG-NOOPT: # BB#0:
@ -1142,7 +1142,7 @@ define i64 @test_mul_by_22(i64 %x) {
; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_22:
; X64-JAG: # BB#0:
@ -1162,7 +1162,7 @@ define i64 @test_mul_by_22(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_22:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $22, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_22:
; JAG-NOOPT: # BB#0:
@ -1199,7 +1199,7 @@ define i64 @test_mul_by_23(i64 %x) {
; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
; X64-HSW-NEXT: shlq $3, %rax # sched: [1:0.50]
; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_23:
; X64-JAG: # BB#0:
@ -1219,7 +1219,7 @@ define i64 @test_mul_by_23(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_23:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $23, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_23:
; JAG-NOOPT: # BB#0:
@ -1253,7 +1253,7 @@ define i64 @test_mul_by_24(i64 %x) {
; X64-HSW: # BB#0:
; X64-HSW-NEXT: shlq $3, %rdi # sched: [1:0.50]
; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_24:
; X64-JAG: # BB#0:
@ -1272,7 +1272,7 @@ define i64 @test_mul_by_24(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_24:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $24, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_24:
; JAG-NOOPT: # BB#0:
@ -1308,7 +1308,7 @@ define i64 @test_mul_by_25(i64 %x) {
; X64-HSW: # BB#0:
; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
; X64-HSW-NEXT: leaq (%rax,%rax,4), %rax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_25:
; X64-JAG: # BB#0:
@ -1327,7 +1327,7 @@ define i64 @test_mul_by_25(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_25:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $25, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_25:
; JAG-NOOPT: # BB#0:
@ -1365,7 +1365,7 @@ define i64 @test_mul_by_26(i64 %x) {
; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_26:
; X64-JAG: # BB#0:
@ -1385,7 +1385,7 @@ define i64 @test_mul_by_26(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_26:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $26, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_26:
; JAG-NOOPT: # BB#0:
@ -1420,7 +1420,7 @@ define i64 @test_mul_by_27(i64 %x) {
; X64-HSW: # BB#0:
; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_27:
; X64-JAG: # BB#0:
@ -1439,7 +1439,7 @@ define i64 @test_mul_by_27(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_27:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $27, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_27:
; JAG-NOOPT: # BB#0:
@ -1477,7 +1477,7 @@ define i64 @test_mul_by_28(i64 %x) {
; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_28:
; X64-JAG: # BB#0:
@ -1497,7 +1497,7 @@ define i64 @test_mul_by_28(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_28:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $28, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_28:
; JAG-NOOPT: # BB#0:
@ -1536,7 +1536,7 @@ define i64 @test_mul_by_29(i64 %x) {
; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_29:
; X64-JAG: # BB#0:
@ -1557,7 +1557,7 @@ define i64 @test_mul_by_29(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_29:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $29, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_29:
; JAG-NOOPT: # BB#0:
@ -1596,7 +1596,7 @@ define i64 @test_mul_by_30(i64 %x) {
; X64-HSW-NEXT: shlq $5, %rax # sched: [1:0.50]
; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_30:
; X64-JAG: # BB#0:
@ -1617,7 +1617,7 @@ define i64 @test_mul_by_30(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_30:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $30, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_30:
; JAG-NOOPT: # BB#0:
@ -1654,7 +1654,7 @@ define i64 @test_mul_by_31(i64 %x) {
; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
; X64-HSW-NEXT: shlq $5, %rax # sched: [1:0.50]
; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_31:
; X64-JAG: # BB#0:
@ -1674,7 +1674,7 @@ define i64 @test_mul_by_31(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_31:
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $31, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_31:
; JAG-NOOPT: # BB#0:
@ -1709,7 +1709,7 @@ define i64 @test_mul_by_32(i64 %x) {
; X64-HSW: # BB#0:
; X64-HSW-NEXT: shlq $5, %rdi # sched: [1:0.50]
; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_32:
; X64-JAG: # BB#0:
@ -1729,7 +1729,7 @@ define i64 @test_mul_by_32(i64 %x) {
; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: shlq $5, %rdi # sched: [1:0.50]
; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_by_32:
; JAG-NOOPT: # BB#0:
@ -1792,8 +1792,8 @@ define i64 @test_mul_spec(i64 %x) nounwind {
; X64-HSW-NEXT: addq $42, %rcx # sched: [1:0.25]
; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
; X64-HSW-NEXT: addq $2, %rax # sched: [1:0.25]
; X64-HSW-NEXT: imulq %rcx, %rax # sched: [3:1.00]
; X64-HSW-NEXT: retq # sched: [1:1.00]
; X64-HSW-NEXT: imulq %rcx, %rax # sched: [4:1.00]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_spec:
; X64-JAG: # BB#0:
@ -1840,8 +1840,8 @@ define i64 @test_mul_spec(i64 %x) nounwind {
; HSW-NOOPT-NEXT: addq $42, %rcx # sched: [1:0.25]
; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
; HSW-NOOPT-NEXT: addq $2, %rax # sched: [1:0.25]
; HSW-NOOPT-NEXT: imulq %rcx, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
; HSW-NOOPT-NEXT: imulq %rcx, %rax # sched: [4:1.00]
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
;
; JAG-NOOPT-LABEL: test_mul_spec:
; JAG-NOOPT: # BB#0:

View File

@ -37,11 +37,11 @@ define i16 @test_ctpop_i16(i16 zeroext %a0, i16 *%a1) {
;
; HASWELL-LABEL: test_ctpop_i16:
; HASWELL: # BB#0:
; HASWELL-NEXT: popcntw (%rsi), %cx # sched: [7:1.00]
; HASWELL-NEXT: popcntw (%rsi), %cx # sched: [3:1.00]
; HASWELL-NEXT: popcntw %di, %ax # sched: [3:1.00]
; HASWELL-NEXT: orl %ecx, %eax # sched: [1:0.25]
; HASWELL-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_ctpop_i16:
; BTVER2: # BB#0:
@ -90,10 +90,10 @@ define i32 @test_ctpop_i32(i32 %a0, i32 *%a1) {
;
; HASWELL-LABEL: test_ctpop_i32:
; HASWELL: # BB#0:
; HASWELL-NEXT: popcntl (%rsi), %ecx # sched: [7:1.00]
; HASWELL-NEXT: popcntl (%rsi), %ecx # sched: [3:1.00]
; HASWELL-NEXT: popcntl %edi, %eax # sched: [3:1.00]
; HASWELL-NEXT: orl %ecx, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_ctpop_i32:
; BTVER2: # BB#0:
@ -140,10 +140,10 @@ define i64 @test_ctpop_i64(i64 %a0, i64 *%a1) {
;
; HASWELL-LABEL: test_ctpop_i64:
; HASWELL: # BB#0:
; HASWELL-NEXT: popcntq (%rsi), %rcx # sched: [7:1.00]
; HASWELL-NEXT: popcntq (%rsi), %rcx # sched: [3:1.00]
; HASWELL-NEXT: popcntq %rdi, %rax # sched: [3:1.00]
; HASWELL-NEXT: orq %rcx, %rax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_ctpop_i64:
; BTVER2: # BB#0:

View File

@ -36,33 +36,33 @@ define void @foo() local_unnamed_addr {
; X86-NEXT: .cfi_offset %ebx, -12
; X86-NEXT: .Lcfi7:
; X86-NEXT: .cfi_offset %ebp, -8
; X86-NEXT: movl obj, %edx
; X86-NEXT: movsbl var_27, %eax
; X86-NEXT: movzwl var_2, %esi
; X86-NEXT: movl var_310, %ecx
; X86-NEXT: imull %eax, %ecx
; X86-NEXT: movl obj, %esi
; X86-NEXT: addl var_24, %ecx
; X86-NEXT: andl $4194303, %edx # imm = 0x3FFFFF
; X86-NEXT: leal (%edx,%edx), %ebx
; X86-NEXT: subl %eax, %ebx
; X86-NEXT: movl %ebx, %edi
; X86-NEXT: subl %esi, %edi
; X86-NEXT: imull %edi, %ecx
; X86-NEXT: movzwl var_2, %edi
; X86-NEXT: andl $4194303, %esi # imm = 0x3FFFFF
; X86-NEXT: leal (%esi,%esi), %edx
; X86-NEXT: subl %eax, %edx
; X86-NEXT: movl %edx, %ebx
; X86-NEXT: subl %edi, %ebx
; X86-NEXT: imull %ebx, %ecx
; X86-NEXT: addl $-1437483407, %ecx # imm = 0xAA51BE71
; X86-NEXT: movl $9, %esi
; X86-NEXT: movl $9, %edi
; X86-NEXT: xorl %ebp, %ebp
; X86-NEXT: shldl %cl, %esi, %ebp
; X86-NEXT: shlxl %ecx, %esi, %esi
; X86-NEXT: shldl %cl, %edi, %ebp
; X86-NEXT: shlxl %ecx, %edi, %edi
; X86-NEXT: testb $32, %cl
; X86-NEXT: cmovnel %esi, %ebp
; X86-NEXT: cmovnel %edi, %ebp
; X86-NEXT: movl $0, %ecx
; X86-NEXT: cmovnel %ecx, %esi
; X86-NEXT: cmpl %edx, %edi
; X86-NEXT: cmovnel %ecx, %edi
; X86-NEXT: movl %ebp, var_50+4
; X86-NEXT: movl %esi, var_50
; X86-NEXT: cmpl %esi, %ebx
; X86-NEXT: setge var_205
; X86-NEXT: imull %eax, %ebx
; X86-NEXT: movb %bl, var_218
; X86-NEXT: imull %eax, %edx
; X86-NEXT: movl %edi, var_50
; X86-NEXT: movb %dl, var_218
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@ -71,25 +71,25 @@ define void @foo() local_unnamed_addr {
;
; X64-LABEL: foo:
; X64: # BB#0: # %entry
; X64-NEXT: movl {{.*}}(%rip), %eax
; X64-NEXT: movsbl {{.*}}(%rip), %r9d
; X64-NEXT: movsbl {{.*}}(%rip), %eax
; X64-NEXT: movl {{.*}}(%rip), %ecx
; X64-NEXT: imull %eax, %ecx
; X64-NEXT: movl {{.*}}(%rip), %edx
; X64-NEXT: addl {{.*}}(%rip), %ecx
; X64-NEXT: movzwl {{.*}}(%rip), %r8d
; X64-NEXT: movl {{.*}}(%rip), %esi
; X64-NEXT: imull %r9d, %esi
; X64-NEXT: addl {{.*}}(%rip), %esi
; X64-NEXT: andl $4194303, %eax # imm = 0x3FFFFF
; X64-NEXT: leal (%rax,%rax), %edi
; X64-NEXT: subl %r9d, %edi
; X64-NEXT: movl %edi, %edx
; X64-NEXT: subl %r8d, %edx
; X64-NEXT: imull %edx, %esi
; X64-NEXT: addl $-1437483407, %esi # imm = 0xAA51BE71
; X64-NEXT: movl $9, %ecx
; X64-NEXT: shlxq %rsi, %rcx, %rcx
; X64-NEXT: movq %rcx, {{.*}}(%rip)
; X64-NEXT: cmpl %eax, %edx
; X64-NEXT: andl $4194303, %edx # imm = 0x3FFFFF
; X64-NEXT: leal (%rdx,%rdx), %edi
; X64-NEXT: subl %eax, %edi
; X64-NEXT: movl %edi, %esi
; X64-NEXT: subl %r8d, %esi
; X64-NEXT: imull %esi, %ecx
; X64-NEXT: addl $-1437483407, %ecx # imm = 0xAA51BE71
; X64-NEXT: movl $9, %r8d
; X64-NEXT: cmpl %edx, %esi
; X64-NEXT: setge {{.*}}(%rip)
; X64-NEXT: imull %r9d, %edi
; X64-NEXT: shlxq %rcx, %r8, %rcx
; X64-NEXT: imull %eax, %edi
; X64-NEXT: movq %rcx, {{.*}}(%rip)
; X64-NEXT: movb %dil, {{.*}}(%rip)
; X64-NEXT: retq
entry:

View File

@ -51,9 +51,9 @@ define float @f32_no_estimate(float %x) #0 {
;
; HASWELL-LABEL: f32_no_estimate:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50]
; HASWELL-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [12:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50]
; HASWELL-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [13:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; HASWELL-NO-FMA-LABEL: f32_no_estimate:
; HASWELL-NO-FMA: # BB#0:
@ -63,9 +63,9 @@ define float @f32_no_estimate(float %x) #0 {
;
; AVX512-LABEL: f32_no_estimate:
; AVX512: # BB#0:
; AVX512-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50]
; AVX512-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [12:1.00]
; AVX512-NEXT: retq # sched: [1:1.00]
; AVX512-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50]
; AVX512-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [13:1.00]
; AVX512-NEXT: retq # sched: [2:1.00]
%div = fdiv fast float 1.0, %x
ret float %div
}
@ -122,9 +122,9 @@ define float @f32_one_step(float %x) #1 {
; HASWELL-LABEL: f32_one_step:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0
; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; HASWELL-NO-FMA-LABEL: f32_one_step:
; HASWELL-NO-FMA: # BB#0:
@ -139,9 +139,9 @@ define float @f32_one_step(float %x) #1 {
; AVX512-LABEL: f32_one_step:
; AVX512: # BB#0:
; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1
; AVX512-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0
; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0
; AVX512-NEXT: retq # sched: [1:1.00]
; AVX512-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50]
; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
; AVX512-NEXT: retq # sched: [2:1.00]
%div = fdiv fast float 1.0, %x
ret float %div
}
@ -220,13 +220,13 @@ define float @f32_two_step(float %x) #2 {
; HASWELL-LABEL: f32_two_step:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
; HASWELL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [1:0.50]
; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3
; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3
; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0
; HASWELL-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 # sched: [5:0.50]
; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; HASWELL-NO-FMA-LABEL: f32_two_step:
; HASWELL-NO-FMA: # BB#0:
@ -245,13 +245,13 @@ define float @f32_two_step(float %x) #2 {
; AVX512-LABEL: f32_two_step:
; AVX512: # BB#0:
; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1
; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [1:0.50]
; AVX512-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3
; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3
; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0
; AVX512-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0
; AVX512-NEXT: retq # sched: [1:1.00]
; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 # sched: [5:0.50]
; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 # sched: [5:0.50]
; AVX512-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 # sched: [5:0.50]
; AVX512-NEXT: retq # sched: [2:1.00]
%div = fdiv fast float 1.0, %x
ret float %div
}
@ -290,9 +290,9 @@ define <4 x float> @v4f32_no_estimate(<4 x float> %x) #0 {
;
; HASWELL-LABEL: v4f32_no_estimate:
; HASWELL: # BB#0:
; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] sched: [4:0.50]
; HASWELL-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [12:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] sched: [1:0.50]
; HASWELL-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [13:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; HASWELL-NO-FMA-LABEL: v4f32_no_estimate:
; HASWELL-NO-FMA: # BB#0:
@ -302,9 +302,9 @@ define <4 x float> @v4f32_no_estimate(<4 x float> %x) #0 {
;
; AVX512-LABEL: v4f32_no_estimate:
; AVX512: # BB#0:
; AVX512-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] sched: [4:0.50]
; AVX512-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [12:1.00]
; AVX512-NEXT: retq # sched: [1:1.00]
; AVX512-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] sched: [1:0.50]
; AVX512-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [13:1.00]
; AVX512-NEXT: retq # sched: [2:1.00]
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <4 x float> %div
}
@ -361,10 +361,10 @@ define <4 x float> @v4f32_one_step(<4 x float> %x) #1 {
; HASWELL-LABEL: v4f32_one_step:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [1:0.50]
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; HASWELL-NO-FMA-LABEL: v4f32_one_step:
; HASWELL-NO-FMA: # BB#0:
@ -379,17 +379,17 @@ define <4 x float> @v4f32_one_step(<4 x float> %x) #1 {
; KNL-LABEL: v4f32_one_step:
; KNL: # BB#0:
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
; KNL-NEXT: retq # sched: [1:1.00]
; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [1:0.50]
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
; SKX-LABEL: v4f32_one_step:
; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %xmm0, %xmm1
; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to4}, %xmm1, %xmm0
; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
; SKX-NEXT: retq # sched: [1:1.00]
; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
; SKX-NEXT: retq # sched: [2:1.00]
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <4 x float> %div
}
@ -468,13 +468,13 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 {
; HASWELL-LABEL: v4f32_two_step:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [1:0.50]
; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0
; HASWELL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 # sched: [5:0.50]
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; HASWELL-NO-FMA-LABEL: v4f32_two_step:
; HASWELL-NO-FMA: # BB#0:
@ -493,24 +493,24 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 {
; KNL-LABEL: v4f32_two_step:
; KNL: # BB#0:
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [1:0.50]
; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0
; KNL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0
; KNL-NEXT: retq # sched: [1:1.00]
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 # sched: [5:0.50]
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 # sched: [5:0.50]
; KNL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
; SKX-LABEL: v4f32_two_step:
; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %xmm0, %xmm1
; SKX-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; SKX-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [1:0.50]
; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; SKX-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
; SKX-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0
; SKX-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0
; SKX-NEXT: retq # sched: [1:1.00]
; SKX-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 # sched: [5:0.50]
; SKX-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 # sched: [5:0.50]
; SKX-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 # sched: [5:0.50]
; SKX-NEXT: retq # sched: [2:1.00]
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <4 x float> %div
}
@ -552,9 +552,9 @@ define <8 x float> @v8f32_no_estimate(<8 x float> %x) #0 {
;
; HASWELL-LABEL: v8f32_no_estimate:
; HASWELL: # BB#0:
; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; HASWELL-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [19:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] sched: [1:0.50]
; HASWELL-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [21:2.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; HASWELL-NO-FMA-LABEL: v8f32_no_estimate:
; HASWELL-NO-FMA: # BB#0:
@ -564,9 +564,9 @@ define <8 x float> @v8f32_no_estimate(<8 x float> %x) #0 {
;
; AVX512-LABEL: v8f32_no_estimate:
; AVX512: # BB#0:
; AVX512-NEXT: vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; AVX512-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [19:2.00]
; AVX512-NEXT: retq # sched: [1:1.00]
; AVX512-NEXT: vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] sched: [1:0.50]
; AVX512-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [21:2.00]
; AVX512-NEXT: retq # sched: [2:1.00]
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <8 x float> %div
}
@ -629,11 +629,11 @@ define <8 x float> @v8f32_one_step(<8 x float> %x) #1 {
;
; HASWELL-LABEL: v8f32_one_step:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [1:0.50]
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; HASWELL-NO-FMA-LABEL: v8f32_one_step:
; HASWELL-NO-FMA: # BB#0:
@ -647,18 +647,18 @@ define <8 x float> @v8f32_one_step(<8 x float> %x) #1 {
;
; KNL-LABEL: v8f32_one_step:
; KNL: # BB#0:
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
; KNL-NEXT: retq # sched: [1:1.00]
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [1:0.50]
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
; SKX-LABEL: v8f32_one_step:
; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %ymm0, %ymm1
; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to8}, %ymm1, %ymm0
; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
; SKX-NEXT: retq # sched: [1:1.00]
; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50]
; SKX-NEXT: retq # sched: [2:1.00]
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <8 x float> %div
}
@ -749,14 +749,14 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 {
;
; HASWELL-LABEL: v8f32_two_step:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [1:0.50]
; HASWELL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0
; HASWELL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 # sched: [5:0.50]
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; HASWELL-NO-FMA-LABEL: v8f32_two_step:
; HASWELL-NO-FMA: # BB#0:
@ -774,25 +774,25 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 {
;
; KNL-LABEL: v8f32_two_step:
; KNL: # BB#0:
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [1:0.50]
; KNL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0
; KNL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0
; KNL-NEXT: retq # sched: [1:1.00]
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [5:0.50]
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 # sched: [5:0.50]
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 # sched: [5:0.50]
; KNL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
; SKX-LABEL: v8f32_two_step:
; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %ymm0, %ymm1
; SKX-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; SKX-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [1:0.50]
; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
; SKX-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3
; SKX-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0
; SKX-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0
; SKX-NEXT: retq # sched: [1:1.00]
; SKX-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [5:0.50]
; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 # sched: [5:0.50]
; SKX-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 # sched: [5:0.50]
; SKX-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 # sched: [5:0.50]
; SKX-NEXT: retq # sched: [2:1.00]
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <8 x float> %div
}

View File

@ -45,20 +45,20 @@ define float @f32_no_step_2(float %x) #3 {
; HASWELL-LABEL: f32_no_step_2:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; HASWELL-NO-FMA-LABEL: f32_no_step_2:
; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00]
; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00]
;
; AVX512-LABEL: f32_no_step_2:
; AVX512: # BB#0:
; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
; AVX512-NEXT: retq # sched: [1:1.00]
; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
; AVX512-NEXT: retq # sched: [2:1.00]
%div = fdiv fast float 1234.0, %x
ret float %div
}
@ -120,29 +120,29 @@ define float @f32_one_step_2(float %x) #1 {
; HASWELL-LABEL: f32_one_step_2:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0
; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0
; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; HASWELL-NO-FMA-LABEL: f32_one_step_2:
; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [1:0.50]
; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00]
; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00]
;
; AVX512-LABEL: f32_one_step_2:
; AVX512: # BB#0:
; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1
; AVX512-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0
; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0
; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
; AVX512-NEXT: retq # sched: [1:1.00]
; AVX512-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50]
; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
; AVX512-NEXT: retq # sched: [2:1.00]
%div = fdiv fast float 3456.0, %x
ret float %div
}
@ -209,32 +209,32 @@ define float @f32_one_step_2_divs(float %x) #1 {
; HASWELL-LABEL: f32_one_step_2_divs:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0
; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0
; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
; HASWELL-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50]
; HASWELL-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; HASWELL-NO-FMA-LABEL: f32_one_step_2_divs:
; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [1:0.50]
; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00]
; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00]
;
; AVX512-LABEL: f32_one_step_2_divs:
; AVX512: # BB#0:
; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1
; AVX512-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0
; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0
; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
; AVX512-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50]
; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50]
; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; AVX512-NEXT: retq # sched: [1:1.00]
; AVX512-NEXT: retq # sched: [2:1.00]
%div = fdiv fast float 3456.0, %x
%div2 = fdiv fast float %div, %x
ret float %div2
@ -319,20 +319,20 @@ define float @f32_two_step_2(float %x) #2 {
; HASWELL-LABEL: f32_two_step_2:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
; HASWELL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [1:0.50]
; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3
; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3
; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0
; HASWELL-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0
; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 # sched: [5:0.50]
; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; HASWELL-NO-FMA-LABEL: f32_two_step_2:
; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [4:0.50]
; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [1:0.50]
; HASWELL-NO-FMA-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
@ -340,20 +340,20 @@ define float @f32_two_step_2(float %x) #2 {
; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00]
; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00]
;
; AVX512-LABEL: f32_two_step_2:
; AVX512: # BB#0:
; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1
; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [1:0.50]
; AVX512-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3
; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3
; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0
; AVX512-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0
; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
; AVX512-NEXT: retq # sched: [1:1.00]
; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 # sched: [5:0.50]
; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 # sched: [5:0.50]
; AVX512-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 # sched: [5:0.50]
; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
; AVX512-NEXT: retq # sched: [2:1.00]
%div = fdiv fast float 6789.0, %x
ret float %div
}
@ -415,39 +415,39 @@ define <4 x float> @v4f32_one_step2(<4 x float> %x) #1 {
; HASWELL-LABEL: v4f32_one_step2:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [1:0.50]
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; HASWELL-NO-FMA-LABEL: v4f32_one_step2:
; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [1:0.50]
; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00]
; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00]
;
; KNL-LABEL: v4f32_one_step2:
; KNL: # BB#0:
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
; KNL-NEXT: retq # sched: [1:1.00]
; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [1:0.50]
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
; SKX-LABEL: v4f32_one_step2:
; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %xmm0, %xmm1
; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to4}, %xmm1, %xmm0
; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [1:1.00]
; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
; SKX-NEXT: retq # sched: [2:1.00]
%div = fdiv fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x
ret <4 x float> %div
}
@ -514,43 +514,43 @@ define <4 x float> @v4f32_one_step_2_divs(<4 x float> %x) #1 {
; HASWELL-LABEL: v4f32_one_step_2_divs:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [1:0.50]
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50]
; HASWELL-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; HASWELL-NO-FMA-LABEL: v4f32_one_step_2_divs:
; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [1:0.50]
; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00]
; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00]
;
; KNL-LABEL: v4f32_one_step_2_divs:
; KNL: # BB#0:
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [1:0.50]
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50]
; KNL-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [1:1.00]
; KNL-NEXT: retq # sched: [2:1.00]
;
; SKX-LABEL: v4f32_one_step_2_divs:
; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %xmm0, %xmm1
; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to4}, %xmm1, %xmm0
; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50]
; SKX-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; SKX-NEXT: retq # sched: [1:1.00]
; SKX-NEXT: retq # sched: [2:1.00]
%div = fdiv fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x
%div2 = fdiv fast <4 x float> %div, %x
ret <4 x float> %div2
@ -635,20 +635,20 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 {
; HASWELL-LABEL: v4f32_two_step2:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [1:0.50]
; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0
; HASWELL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0
; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 # sched: [5:0.50]
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; HASWELL-NO-FMA-LABEL: v4f32_two_step2:
; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm3 = [1,1,1,1] sched: [4:0.50]
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm3 = [1,1,1,1] sched: [1:0.50]
; HASWELL-NO-FMA-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
@ -656,32 +656,32 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 {
; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00]
; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00]
;
; KNL-LABEL: v4f32_two_step2:
; KNL: # BB#0:
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [1:0.50]
; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0
; KNL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0
; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
; KNL-NEXT: retq # sched: [1:1.00]
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 # sched: [5:0.50]
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 # sched: [5:0.50]
; KNL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 # sched: [5:0.50]
; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
; SKX-LABEL: v4f32_two_step2:
; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %xmm0, %xmm1
; SKX-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; SKX-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [1:0.50]
; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; SKX-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
; SKX-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0
; SKX-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0
; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [1:1.00]
; SKX-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 # sched: [5:0.50]
; SKX-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 # sched: [5:0.50]
; SKX-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 # sched: [5:0.50]
; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
; SKX-NEXT: retq # sched: [2:1.00]
%div = fdiv fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x
ret <4 x float> %div
}
@ -750,40 +750,40 @@ define <8 x float> @v8f32_one_step2(<8 x float> %x) #1 {
;
; HASWELL-LABEL: v8f32_one_step2:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [1:0.50]
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; HASWELL-NO-FMA-LABEL: v8f32_one_step2:
; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [1:0.50]
; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00]
; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00]
;
; KNL-LABEL: v8f32_one_step2:
; KNL: # BB#0:
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
; KNL-NEXT: retq # sched: [1:1.00]
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [1:0.50]
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50]
; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
; SKX-LABEL: v8f32_one_step2:
; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %ymm0, %ymm1
; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to8}, %ymm1, %ymm0
; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
; SKX-NEXT: retq # sched: [1:1.00]
; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50]
; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
; SKX-NEXT: retq # sched: [2:1.00]
%div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x
ret <8 x float> %div
}
@ -858,44 +858,44 @@ define <8 x float> @v8f32_one_step_2_divs(<8 x float> %x) #1 {
;
; HASWELL-LABEL: v8f32_one_step_2_divs:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00]
; HASWELL-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [1:0.50]
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [5:0.50]
; HASWELL-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; HASWELL-NO-FMA-LABEL: v8f32_one_step_2_divs:
; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [1:0.50]
; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00]
; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00]
;
; KNL-LABEL: v8f32_one_step_2_divs:
; KNL: # BB#0:
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00]
; KNL-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
; KNL-NEXT: retq # sched: [1:1.00]
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [1:0.50]
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50]
; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [5:0.50]
; KNL-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
; SKX-LABEL: v8f32_one_step_2_divs:
; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %ymm0, %ymm1
; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to8}, %ymm1, %ymm0
; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00]
; SKX-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
; SKX-NEXT: retq # sched: [1:1.00]
; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50]
; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [5:0.50]
; SKX-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
; SKX-NEXT: retq # sched: [2:1.00]
%div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x
%div2 = fdiv fast <8 x float> %div, %x
ret <8 x float> %div2
@ -993,54 +993,54 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 {
;
; HASWELL-LABEL: v8f32_two_step2:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [1:0.50]
; HASWELL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0
; HASWELL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0
; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 # sched: [5:0.50]
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; HASWELL-NO-FMA-LABEL: v8f32_two_step2:
; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [1:0.50]
; HASWELL-NO-FMA-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00]
; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00]
;
; KNL-LABEL: v8f32_two_step2:
; KNL: # BB#0:
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [1:0.50]
; KNL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0
; KNL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0
; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
; KNL-NEXT: retq # sched: [1:1.00]
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [5:0.50]
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 # sched: [5:0.50]
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 # sched: [5:0.50]
; KNL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 # sched: [5:0.50]
; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
; SKX-LABEL: v8f32_two_step2:
; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %ymm0, %ymm1
; SKX-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; SKX-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [1:0.50]
; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
; SKX-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3
; SKX-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0
; SKX-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0
; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
; SKX-NEXT: retq # sched: [1:1.00]
; SKX-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [5:0.50]
; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 # sched: [5:0.50]
; SKX-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 # sched: [5:0.50]
; SKX-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 # sched: [5:0.50]
; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
; SKX-NEXT: retq # sched: [2:1.00]
%div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x
ret <8 x float> %div
}
@ -1074,23 +1074,23 @@ define <8 x float> @v8f32_no_step(<8 x float> %x) #3 {
;
; HASWELL-LABEL: v8f32_no_step:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; HASWELL-NO-FMA-LABEL: v8f32_no_step:
; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00]
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00]
;
; KNL-LABEL: v8f32_no_step:
; KNL: # BB#0:
; KNL-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
; KNL-NEXT: retq # sched: [1:1.00]
; KNL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
; KNL-NEXT: retq # sched: [2:1.00]
;
; SKX-LABEL: v8f32_no_step:
; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %ymm0, %ymm0
; SKX-NEXT: retq # sched: [1:1.00]
; SKX-NEXT: retq # sched: [2:1.00]
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <8 x float> %div
}
@ -1130,27 +1130,27 @@ define <8 x float> @v8f32_no_step2(<8 x float> %x) #3 {
;
; HASWELL-LABEL: v8f32_no_step2:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; HASWELL-NO-FMA-LABEL: v8f32_no_step2:
; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00]
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00]
;
; KNL-LABEL: v8f32_no_step2:
; KNL: # BB#0:
; KNL-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
; KNL-NEXT: retq # sched: [1:1.00]
; KNL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
; SKX-LABEL: v8f32_no_step2:
; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %ymm0, %ymm0
; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
; SKX-NEXT: retq # sched: [1:1.00]
; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
; SKX-NEXT: retq # sched: [2:1.00]
%div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x
ret <8 x float> %div
}

View File

@ -25,7 +25,7 @@ define <4 x i32> @test_sha1msg1(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; CANNONLAKE: # BB#0:
; CANNONLAKE-NEXT: sha1msg1 %xmm1, %xmm0
; CANNONLAKE-NEXT: sha1msg1 (%rdi), %xmm0
; CANNONLAKE-NEXT: retq # sched: [1:1.00]
; CANNONLAKE-NEXT: retq # sched: [2:1.00]
;
; ZNVER1-LABEL: test_sha1msg1:
; ZNVER1: # BB#0:
@ -56,7 +56,7 @@ define <4 x i32> @test_sha1msg2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; CANNONLAKE: # BB#0:
; CANNONLAKE-NEXT: sha1msg2 %xmm1, %xmm0
; CANNONLAKE-NEXT: sha1msg2 (%rdi), %xmm0
; CANNONLAKE-NEXT: retq # sched: [1:1.00]
; CANNONLAKE-NEXT: retq # sched: [2:1.00]
;
; ZNVER1-LABEL: test_sha1msg2:
; ZNVER1: # BB#0:
@ -87,7 +87,7 @@ define <4 x i32> @test_sha1nexte(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; CANNONLAKE: # BB#0:
; CANNONLAKE-NEXT: sha1nexte %xmm1, %xmm0
; CANNONLAKE-NEXT: sha1nexte (%rdi), %xmm0
; CANNONLAKE-NEXT: retq # sched: [1:1.00]
; CANNONLAKE-NEXT: retq # sched: [2:1.00]
;
; ZNVER1-LABEL: test_sha1nexte:
; ZNVER1: # BB#0:
@ -118,7 +118,7 @@ define <4 x i32> @test_sha1rnds4(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; CANNONLAKE: # BB#0:
; CANNONLAKE-NEXT: sha1rnds4 $3, %xmm1, %xmm0
; CANNONLAKE-NEXT: sha1rnds4 $3, (%rdi), %xmm0
; CANNONLAKE-NEXT: retq # sched: [1:1.00]
; CANNONLAKE-NEXT: retq # sched: [2:1.00]
;
; ZNVER1-LABEL: test_sha1rnds4:
; ZNVER1: # BB#0:
@ -153,7 +153,7 @@ define <4 x i32> @test_sha256msg1(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2)
; CANNONLAKE: # BB#0:
; CANNONLAKE-NEXT: sha256msg1 %xmm1, %xmm0
; CANNONLAKE-NEXT: sha256msg1 (%rdi), %xmm0
; CANNONLAKE-NEXT: retq # sched: [1:1.00]
; CANNONLAKE-NEXT: retq # sched: [2:1.00]
;
; ZNVER1-LABEL: test_sha256msg1:
; ZNVER1: # BB#0:
@ -184,7 +184,7 @@ define <4 x i32> @test_sha256msg2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2)
; CANNONLAKE: # BB#0:
; CANNONLAKE-NEXT: sha256msg2 %xmm1, %xmm0
; CANNONLAKE-NEXT: sha256msg2 (%rdi), %xmm0
; CANNONLAKE-NEXT: retq # sched: [1:1.00]
; CANNONLAKE-NEXT: retq # sched: [2:1.00]
;
; ZNVER1-LABEL: test_sha256msg2:
; ZNVER1: # BB#0:
@ -224,7 +224,7 @@ define <4 x i32> @test_sha256rnds2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2,
; CANNONLAKE-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3
; CANNONLAKE-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3
; CANNONLAKE-NEXT: vmovaps %xmm3, %xmm0 # sched: [1:1.00]
; CANNONLAKE-NEXT: retq # sched: [1:1.00]
; CANNONLAKE-NEXT: retq # sched: [2:1.00]
;
; ZNVER1-LABEL: test_sha256rnds2:
; ZNVER1: # BB#0:

View File

@ -37,8 +37,8 @@ define <4 x float> @test_addps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
; HASWELL-LABEL: test_addps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_addps:
; BTVER2: # BB#0:
@ -85,8 +85,8 @@ define float @test_addss(float %a0, float %a1, float *%a2) {
; HASWELL-LABEL: test_addss:
; HASWELL: # BB#0:
; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_addss:
; BTVER2: # BB#0:
@ -137,8 +137,8 @@ define <4 x float> @test_andps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
; HASWELL-LABEL: test_andps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_andps:
; BTVER2: # BB#0:
@ -193,8 +193,8 @@ define <4 x float> @test_andnotps(<4 x float> %a0, <4 x float> %a1, <4 x float>
; HASWELL-LABEL: test_andnotps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_andnotps:
; BTVER2: # BB#0:
@ -251,9 +251,9 @@ define <4 x float> @test_cmpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
; HASWELL-LABEL: test_cmpps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
; HASWELL-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; HASWELL-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_cmpps:
; BTVER2: # BB#0:
@ -306,7 +306,7 @@ define float @test_cmpss(float %a0, float %a1, float *%a2) {
; HASWELL: # BB#0:
; HASWELL-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_cmpss:
; BTVER2: # BB#0:
@ -399,7 +399,7 @@ define i32 @test_comiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25]
; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25]
; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_comiss:
; BTVER2: # BB#0:
@ -470,7 +470,7 @@ define float @test_cvtsi2ss(i32 %a0, i32 *%a1) {
; HASWELL-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00]
; HASWELL-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_cvtsi2ss:
; BTVER2: # BB#0:
@ -523,10 +523,10 @@ define float @test_cvtsi2ssq(i64 %a0, i64 *%a1) {
;
; HASWELL-LABEL: test_cvtsi2ssq:
; HASWELL: # BB#0:
; HASWELL-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [4:1.00]
; HASWELL-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00]
; HASWELL-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_cvtsi2ssq:
; BTVER2: # BB#0:
@ -580,9 +580,9 @@ define i32 @test_cvtss2si(float %a0, float *%a1) {
; HASWELL-LABEL: test_cvtss2si:
; HASWELL: # BB#0:
; HASWELL-NEXT: vcvtss2si %xmm0, %ecx # sched: [4:1.00]
; HASWELL-NEXT: vcvtss2si (%rdi), %eax # sched: [8:1.00]
; HASWELL-NEXT: vcvtss2si (%rdi), %eax # sched: [4:1.00]
; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_cvtss2si:
; BTVER2: # BB#0:
@ -639,9 +639,9 @@ define i64 @test_cvtss2siq(float %a0, float *%a1) {
; HASWELL-LABEL: test_cvtss2siq:
; HASWELL: # BB#0:
; HASWELL-NEXT: vcvtss2si %xmm0, %rcx # sched: [4:1.00]
; HASWELL-NEXT: vcvtss2si (%rdi), %rax # sched: [8:1.00]
; HASWELL-NEXT: vcvtss2si (%rdi), %rax # sched: [4:1.00]
; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_cvtss2siq:
; BTVER2: # BB#0:
@ -698,9 +698,9 @@ define i32 @test_cvttss2si(float %a0, float *%a1) {
; HASWELL-LABEL: test_cvttss2si:
; HASWELL: # BB#0:
; HASWELL-NEXT: vcvttss2si %xmm0, %ecx # sched: [4:1.00]
; HASWELL-NEXT: vcvttss2si (%rdi), %eax # sched: [8:1.00]
; HASWELL-NEXT: vcvttss2si (%rdi), %eax # sched: [4:1.00]
; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_cvttss2si:
; BTVER2: # BB#0:
@ -754,9 +754,9 @@ define i64 @test_cvttss2siq(float %a0, float *%a1) {
; HASWELL-LABEL: test_cvttss2siq:
; HASWELL: # BB#0:
; HASWELL-NEXT: vcvttss2si %xmm0, %rcx # sched: [4:1.00]
; HASWELL-NEXT: vcvttss2si (%rdi), %rax # sched: [8:1.00]
; HASWELL-NEXT: vcvttss2si (%rdi), %rax # sched: [4:1.00]
; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_cvttss2siq:
; BTVER2: # BB#0:
@ -805,9 +805,9 @@ define <4 x float> @test_divps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
;
; HASWELL-LABEL: test_divps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [12:1.00]
; HASWELL-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [13:1.00]
; HASWELL-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_divps:
; BTVER2: # BB#0:
@ -853,9 +853,9 @@ define float @test_divss(float %a0, float %a1, float *%a2) {
;
; HASWELL-LABEL: test_divss:
; HASWELL: # BB#0:
; HASWELL-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [12:1.00]
; HASWELL-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [13:1.00]
; HASWELL-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_divss:
; BTVER2: # BB#0:
@ -902,8 +902,8 @@ define void @test_ldmxcsr(i32 %a0) {
; HASWELL-LABEL: test_ldmxcsr:
; HASWELL: # BB#0:
; HASWELL-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
; HASWELL-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [6:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_ldmxcsr:
; BTVER2: # BB#0:
@ -952,8 +952,8 @@ define <4 x float> @test_maxps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
; HASWELL-LABEL: test_maxps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_maxps:
; BTVER2: # BB#0:
@ -1001,8 +1001,8 @@ define <4 x float> @test_maxss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
; HASWELL-LABEL: test_maxss:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_maxss:
; BTVER2: # BB#0:
@ -1050,8 +1050,8 @@ define <4 x float> @test_minps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
; HASWELL-LABEL: test_minps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_minps:
; BTVER2: # BB#0:
@ -1099,8 +1099,8 @@ define <4 x float> @test_minss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
; HASWELL-LABEL: test_minss:
; HASWELL: # BB#0:
; HASWELL-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_minss:
; BTVER2: # BB#0:
@ -1151,10 +1151,10 @@ define void @test_movaps(<4 x float> *%a0, <4 x float> *%a1) {
;
; HASWELL-LABEL: test_movaps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmovaps (%rdi), %xmm0 # sched: [4:0.50]
; HASWELL-NEXT: vmovaps (%rdi), %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_movaps:
; BTVER2: # BB#0:
@ -1207,7 +1207,7 @@ define <4 x float> @test_movhlps(<4 x float> %a0, <4 x float> %a1) {
; HASWELL-LABEL: test_movhlps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_movhlps:
; BTVER2: # BB#0:
@ -1257,10 +1257,10 @@ define void @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) {
;
; HASWELL-LABEL: test_movhps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [5:1.00]
; HASWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00]
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_movhps:
; BTVER2: # BB#0:
@ -1316,7 +1316,7 @@ define <4 x float> @test_movlhps(<4 x float> %a0, <4 x float> %a1) {
; HASWELL: # BB#0:
; HASWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_movlhps:
; BTVER2: # BB#0:
@ -1365,10 +1365,10 @@ define void @test_movlps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) {
;
; HASWELL-LABEL: test_movlps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [5:1.00]
; HASWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00]
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_movlps:
; BTVER2: # BB#0:
@ -1419,7 +1419,7 @@ define i32 @test_movmskps(<4 x float> %a0) {
; HASWELL-LABEL: test_movmskps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmovmskps %xmm0, %eax # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_movmskps:
; BTVER2: # BB#0:
@ -1465,7 +1465,7 @@ define void @test_movntps(<4 x float> %a0, <4 x float> *%a1) {
; HASWELL-LABEL: test_movntps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_movntps:
; BTVER2: # BB#0:
@ -1511,10 +1511,10 @@ define void @test_movss_mem(float* %a0, float* %a1) {
;
; HASWELL-LABEL: test_movss_mem:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [4:0.50]
; HASWELL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [1:0.50]
; HASWELL-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_movss_mem:
; BTVER2: # BB#0:
@ -1565,7 +1565,7 @@ define <4 x float> @test_movss_reg(<4 x float> %a0, <4 x float> %a1) {
; HASWELL-LABEL: test_movss_reg:
; HASWELL: # BB#0:
; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_movss_reg:
; BTVER2: # BB#0:
@ -1611,10 +1611,10 @@ define void @test_movups(<4 x float> *%a0, <4 x float> *%a1) {
;
; HASWELL-LABEL: test_movups:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmovups (%rdi), %xmm0 # sched: [4:0.50]
; HASWELL-NEXT: vmovups (%rdi), %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_movups:
; BTVER2: # BB#0:
@ -1663,8 +1663,8 @@ define <4 x float> @test_mulps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
; HASWELL-LABEL: test_mulps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_mulps:
; BTVER2: # BB#0:
@ -1711,8 +1711,8 @@ define float @test_mulss(float %a0, float %a1, float *%a2) {
; HASWELL-LABEL: test_mulss:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_mulss:
; BTVER2: # BB#0:
@ -1763,8 +1763,8 @@ define <4 x float> @test_orps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2
; HASWELL-LABEL: test_orps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_orps:
; BTVER2: # BB#0:
@ -1816,8 +1816,8 @@ define void @test_prefetchnta(i8* %a0) {
;
; HASWELL-LABEL: test_prefetchnta:
; HASWELL: # BB#0:
; HASWELL-NEXT: prefetchnta (%rdi) # sched: [4:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: prefetchnta (%rdi) # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_prefetchnta:
; BTVER2: # BB#0:
@ -1867,9 +1867,9 @@ define <4 x float> @test_rcpps(<4 x float> %a0, <4 x float> *%a1) {
; HASWELL-LABEL: test_rcpps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: vrcpps (%rdi), %xmm1 # sched: [9:1.00]
; HASWELL-NEXT: vrcpps (%rdi), %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_rcpps:
; BTVER2: # BB#0:
@ -1929,11 +1929,11 @@ define <4 x float> @test_rcpss(float %a0, float *%a1) {
;
; HASWELL-LABEL: test_rcpss:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [9:1.00]
; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50]
; HASWELL-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [9:1.00]
; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50]
; HASWELL-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_rcpss:
; BTVER2: # BB#0:
@ -1994,9 +1994,9 @@ define <4 x float> @test_rsqrtps(<4 x float> %a0, <4 x float> *%a1) {
; HASWELL-LABEL: test_rsqrtps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [9:1.00]
; HASWELL-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_rsqrtps:
; BTVER2: # BB#0:
@ -2057,10 +2057,10 @@ define <4 x float> @test_rsqrtss(float %a0, float *%a1) {
; HASWELL-LABEL: test_rsqrtss:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50]
; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50]
; HASWELL-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_rsqrtss:
; BTVER2: # BB#0:
@ -2116,8 +2116,8 @@ define void @test_sfence() {
;
; HASWELL-LABEL: test_sfence:
; HASWELL: # BB#0:
; HASWELL-NEXT: sfence # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: sfence # sched: [1:0.33]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_sfence:
; BTVER2: # BB#0:
@ -2165,8 +2165,8 @@ define <4 x float> @test_shufps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%
; HASWELL-LABEL: test_shufps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
; HASWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_shufps:
; BTVER2: # BB#0:
@ -2217,10 +2217,10 @@ define <4 x float> @test_sqrtps(<4 x float> %a0, <4 x float> *%a1) {
;
; HASWELL-LABEL: test_sqrtps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [15:1.00]
; HASWELL-NEXT: vsqrtps (%rdi), %xmm1 # sched: [19:1.00]
; HASWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:1.00]
; HASWELL-NEXT: vsqrtps (%rdi), %xmm1 # sched: [14:1.00]
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_sqrtps:
; BTVER2: # BB#0:
@ -2280,11 +2280,11 @@ define <4 x float> @test_sqrtss(<4 x float> %a0, <4 x float> *%a1) {
;
; HASWELL-LABEL: test_sqrtss:
; HASWELL: # BB#0:
; HASWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [19:1.00]
; HASWELL-NEXT: vmovaps (%rdi), %xmm1 # sched: [4:0.50]
; HASWELL-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [19:1.00]
; HASWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:1.00]
; HASWELL-NEXT: vmovaps (%rdi), %xmm1 # sched: [1:0.50]
; HASWELL-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [14:1.00]
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_sqrtss:
; BTVER2: # BB#0:
@ -2336,9 +2336,9 @@ define i32 @test_stmxcsr() {
;
; HASWELL-LABEL: test_stmxcsr:
; HASWELL: # BB#0:
; HASWELL-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
; HASWELL-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [4:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
; HASWELL-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_stmxcsr:
; BTVER2: # BB#0:
@ -2387,8 +2387,8 @@ define <4 x float> @test_subps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
; HASWELL-LABEL: test_subps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_subps:
; BTVER2: # BB#0:
@ -2435,8 +2435,8 @@ define float @test_subss(float %a0, float %a1, float *%a2) {
; HASWELL-LABEL: test_subss:
; HASWELL: # BB#0:
; HASWELL-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_subss:
; BTVER2: # BB#0:
@ -2524,7 +2524,7 @@ define i32 @test_ucomiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25]
; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25]
; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_ucomiss:
; BTVER2: # BB#0:
@ -2593,8 +2593,8 @@ define <4 x float> @test_unpckhps(<4 x float> %a0, <4 x float> %a1, <4 x float>
; HASWELL-LABEL: test_unpckhps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
; HASWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_unpckhps:
; BTVER2: # BB#0:
@ -2645,8 +2645,8 @@ define <4 x float> @test_unpcklps(<4 x float> %a0, <4 x float> %a1, <4 x float>
; HASWELL-LABEL: test_unpcklps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
; HASWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_unpcklps:
; BTVER2: # BB#0:
@ -2697,8 +2697,8 @@ define <4 x float> @test_xorps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
; HASWELL-LABEL: test_xorps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_xorps:
; BTVER2: # BB#0:

File diff suppressed because it is too large Load Diff

View File

@ -37,8 +37,8 @@ define <2 x double> @test_addsubpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
; HASWELL-LABEL: test_addsubpd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_addsubpd:
; BTVER2: # BB#0:
@ -86,8 +86,8 @@ define <4 x float> @test_addsubps(<4 x float> %a0, <4 x float> %a1, <4 x float>
; HASWELL-LABEL: test_addsubps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_addsubps:
; BTVER2: # BB#0:
@ -135,8 +135,8 @@ define <2 x double> @test_haddpd(<2 x double> %a0, <2 x double> %a1, <2 x double
; HASWELL-LABEL: test_haddpd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
; HASWELL-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [5:2.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_haddpd:
; BTVER2: # BB#0:
@ -184,8 +184,8 @@ define <4 x float> @test_haddps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%
; HASWELL-LABEL: test_haddps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
; HASWELL-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [5:2.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_haddps:
; BTVER2: # BB#0:
@ -233,8 +233,8 @@ define <2 x double> @test_hsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double
; HASWELL-LABEL: test_hsubpd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
; HASWELL-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [5:2.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_hsubpd:
; BTVER2: # BB#0:
@ -282,8 +282,8 @@ define <4 x float> @test_hsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%
; HASWELL-LABEL: test_hsubps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
; HASWELL-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [5:2.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_hsubps:
; BTVER2: # BB#0:
@ -328,8 +328,8 @@ define <16 x i8> @test_lddqu(i8* %a0) {
;
; HASWELL-LABEL: test_lddqu:
; HASWELL: # BB#0:
; HASWELL-NEXT: vlddqu (%rdi), %xmm0 # sched: [4:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vlddqu (%rdi), %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_lddqu:
; BTVER2: # BB#0:
@ -379,7 +379,7 @@ define void @test_monitor(i8* %a0, i32 %a1, i32 %a2) {
; HASWELL-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
; HASWELL-NEXT: movl %esi, %ecx # sched: [1:0.25]
; HASWELL-NEXT: monitor # sched: [100:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_monitor:
; BTVER2: # BB#0:
@ -432,9 +432,9 @@ define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) {
; HASWELL-LABEL: test_movddup:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
; HASWELL-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [4:0.50]
; HASWELL-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [1:0.50]
; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_movddup:
; BTVER2: # BB#0:
@ -489,9 +489,9 @@ define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) {
; HASWELL-LABEL: test_movshdup:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
; HASWELL-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [4:0.50]
; HASWELL-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [1:0.50]
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_movshdup:
; BTVER2: # BB#0:
@ -546,9 +546,9 @@ define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) {
; HASWELL-LABEL: test_movsldup:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
; HASWELL-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [4:0.50]
; HASWELL-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [1:0.50]
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_movsldup:
; BTVER2: # BB#0:
@ -603,8 +603,8 @@ define void @test_mwait(i32 %a0, i32 %a1) {
; HASWELL: # BB#0:
; HASWELL-NEXT: movl %edi, %ecx # sched: [1:0.25]
; HASWELL-NEXT: movl %esi, %eax # sched: [1:0.25]
; HASWELL-NEXT: mwait # sched: [100:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: mwait # sched: [20:2.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_mwait:
; BTVER2: # BB#0:

View File

@ -34,8 +34,8 @@ define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x doubl
; HASWELL: # BB#0:
; HASWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_blendpd:
; BTVER2: # BB#0:
@ -79,8 +79,8 @@ define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> *
; HASWELL-LABEL: test_blendps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_blendps:
; BTVER2: # BB#0:
@ -127,8 +127,8 @@ define <2 x double> @test_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
; HASWELL-LABEL: test_blendvpd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
; HASWELL-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:2.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_blendvpd:
; BTVER2: # BB#0:
@ -176,8 +176,8 @@ define <4 x float> @test_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float>
; HASWELL-LABEL: test_blendvps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
; HASWELL-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:2.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_blendvps:
; BTVER2: # BB#0:
@ -219,8 +219,8 @@ define <2 x double> @test_dppd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; HASWELL-LABEL: test_dppd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
; HASWELL-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_dppd:
; BTVER2: # BB#0:
@ -262,8 +262,8 @@ define <4 x float> @test_dpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2
; HASWELL-LABEL: test_dpps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [14:2.00]
; HASWELL-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [18:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [14:2.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_dpps:
; BTVER2: # BB#0:
@ -305,8 +305,8 @@ define <4 x float> @test_insertps(<4 x float> %a0, <4 x float> %a1, float *%a2)
; HASWELL-LABEL: test_insertps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
; HASWELL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_insertps:
; BTVER2: # BB#0:
@ -344,8 +344,8 @@ define <2 x i64> @test_movntdqa(i8* %a0) {
;
; HASWELL-LABEL: test_movntdqa:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [4:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_movntdqa:
; BTVER2: # BB#0:
@ -382,9 +382,9 @@ define <8 x i16> @test_mpsadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
;
; HASWELL-LABEL: test_mpsadbw:
; HASWELL: # BB#0:
; HASWELL-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
; HASWELL-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:2.00]
; HASWELL-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_mpsadbw:
; BTVER2: # BB#0:
@ -427,8 +427,8 @@ define <8 x i16> @test_packusdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; HASWELL-LABEL: test_packusdw:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_packusdw:
; BTVER2: # BB#0:
@ -477,8 +477,8 @@ define <16 x i8> @test_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2, <16
; HASWELL-LABEL: test_pblendvb:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
; HASWELL-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:2.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pblendvb:
; BTVER2: # BB#0:
@ -521,7 +521,7 @@ define <8 x i16> @test_pblendw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; HASWELL: # BB#0:
; HASWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
; HASWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [4:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pblendw:
; BTVER2: # BB#0:
@ -562,8 +562,8 @@ define <2 x i64> @test_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; HASWELL-LABEL: test_pcmpeqq:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pcmpeqq:
; BTVER2: # BB#0:
@ -605,9 +605,9 @@ define i32 @test_pextrb(<16 x i8> %a0, i8 *%a1) {
;
; HASWELL-LABEL: test_pextrb:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpextrb $3, %xmm0, %eax # sched: [1:1.00]
; HASWELL-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpextrb $3, %xmm0, %eax # sched: [2:1.00]
; HASWELL-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pextrb:
; BTVER2: # BB#0:
@ -648,9 +648,9 @@ define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) {
;
; HASWELL-LABEL: test_pextrd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpextrd $3, %xmm0, %eax # sched: [1:1.00]
; HASWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpextrd $3, %xmm0, %eax # sched: [2:1.00]
; HASWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pextrd:
; BTVER2: # BB#0:
@ -690,9 +690,9 @@ define i64 @test_pextrq(<2 x i64> %a0, <2 x i64> %a1, i64 *%a2) {
;
; HASWELL-LABEL: test_pextrq:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpextrq $1, %xmm0, %rax # sched: [1:1.00]
; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpextrq $1, %xmm0, %rax # sched: [2:1.00]
; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pextrq:
; BTVER2: # BB#0:
@ -732,9 +732,9 @@ define i32 @test_pextrw(<8 x i16> %a0, i16 *%a1) {
;
; HASWELL-LABEL: test_pextrw:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpextrw $3, %xmm0, %eax # sched: [1:1.00]
; HASWELL-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpextrw $3, %xmm0, %eax # sched: [2:1.00]
; HASWELL-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pextrw:
; BTVER2: # BB#0:
@ -775,9 +775,9 @@ define <8 x i16> @test_phminposuw(<8 x i16> *%a0) {
;
; HASWELL-LABEL: test_phminposuw:
; HASWELL: # BB#0:
; HASWELL-NEXT: vphminposuw (%rdi), %xmm0 # sched: [9:1.00]
; HASWELL-NEXT: vphminposuw (%rdi), %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_phminposuw:
; BTVER2: # BB#0:
@ -818,9 +818,9 @@ define <16 x i8> @test_pinsrb(<16 x i8> %a0, i8 %a1, i8 *%a2) {
;
; HASWELL-LABEL: test_pinsrb:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
; HASWELL-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pinsrb:
; BTVER2: # BB#0:
@ -860,9 +860,9 @@ define <4 x i32> @test_pinsrd(<4 x i32> %a0, i32 %a1, i32 *%a2) {
;
; HASWELL-LABEL: test_pinsrd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
; HASWELL-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pinsrd:
; BTVER2: # BB#0:
@ -905,10 +905,10 @@ define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) {
;
; HASWELL-LABEL: test_pinsrq:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00]
; HASWELL-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [1:1.00]
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pinsrq:
; BTVER2: # BB#0:
@ -952,8 +952,8 @@ define <16 x i8> @test_pmaxsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; HASWELL-LABEL: test_pmaxsb:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pmaxsb:
; BTVER2: # BB#0:
@ -995,8 +995,8 @@ define <4 x i32> @test_pmaxsd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; HASWELL-LABEL: test_pmaxsd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pmaxsd:
; BTVER2: # BB#0:
@ -1038,8 +1038,8 @@ define <4 x i32> @test_pmaxud(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; HASWELL-LABEL: test_pmaxud:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pmaxud:
; BTVER2: # BB#0:
@ -1081,8 +1081,8 @@ define <8 x i16> @test_pmaxuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; HASWELL-LABEL: test_pmaxuw:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pmaxuw:
; BTVER2: # BB#0:
@ -1124,8 +1124,8 @@ define <16 x i8> @test_pminsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; HASWELL-LABEL: test_pminsb:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pminsb:
; BTVER2: # BB#0:
@ -1167,8 +1167,8 @@ define <4 x i32> @test_pminsd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; HASWELL-LABEL: test_pminsd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pminsd:
; BTVER2: # BB#0:
@ -1210,8 +1210,8 @@ define <4 x i32> @test_pminud(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; HASWELL-LABEL: test_pminud:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pminud:
; BTVER2: # BB#0:
@ -1253,8 +1253,8 @@ define <8 x i16> @test_pminuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; HASWELL-LABEL: test_pminuw:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pminuw:
; BTVER2: # BB#0:
@ -1300,9 +1300,9 @@ define <8 x i16> @test_pmovsxbw(<16 x i8> %a0, <8 x i8> *%a1) {
; HASWELL-LABEL: test_pmovsxbw:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [1:1.00]
; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pmovsxbw:
; BTVER2: # BB#0:
@ -1351,9 +1351,9 @@ define <4 x i32> @test_pmovsxbd(<16 x i8> %a0, <4 x i8> *%a1) {
; HASWELL-LABEL: test_pmovsxbd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [1:1.00]
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pmovsxbd:
; BTVER2: # BB#0:
@ -1402,9 +1402,9 @@ define <2 x i64> @test_pmovsxbq(<16 x i8> %a0, <2 x i8> *%a1) {
; HASWELL-LABEL: test_pmovsxbq:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [1:1.00]
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pmovsxbq:
; BTVER2: # BB#0:
@ -1453,9 +1453,9 @@ define <2 x i64> @test_pmovsxdq(<4 x i32> %a0, <2 x i32> *%a1) {
; HASWELL-LABEL: test_pmovsxdq:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [1:1.00]
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pmovsxdq:
; BTVER2: # BB#0:
@ -1504,9 +1504,9 @@ define <4 x i32> @test_pmovsxwd(<8 x i16> %a0, <4 x i16> *%a1) {
; HASWELL-LABEL: test_pmovsxwd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [1:1.00]
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pmovsxwd:
; BTVER2: # BB#0:
@ -1555,9 +1555,9 @@ define <2 x i64> @test_pmovsxwq(<8 x i16> %a0, <2 x i16> *%a1) {
; HASWELL-LABEL: test_pmovsxwq:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [1:1.00]
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pmovsxwq:
; BTVER2: # BB#0:
@ -1606,9 +1606,9 @@ define <8 x i16> @test_pmovzxbw(<16 x i8> %a0, <8 x i8> *%a1) {
; HASWELL-LABEL: test_pmovzxbw:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
; HASWELL-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [5:1.00]
; HASWELL-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [1:1.00]
; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pmovzxbw:
; BTVER2: # BB#0:
@ -1657,9 +1657,9 @@ define <4 x i32> @test_pmovzxbd(<16 x i8> %a0, <4 x i8> *%a1) {
; HASWELL-LABEL: test_pmovzxbd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
; HASWELL-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [5:1.00]
; HASWELL-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [1:1.00]
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pmovzxbd:
; BTVER2: # BB#0:
@ -1708,9 +1708,9 @@ define <2 x i64> @test_pmovzxbq(<16 x i8> %a0, <2 x i8> *%a1) {
; HASWELL-LABEL: test_pmovzxbq:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
; HASWELL-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [5:1.00]
; HASWELL-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pmovzxbq:
; BTVER2: # BB#0:
@ -1759,9 +1759,9 @@ define <2 x i64> @test_pmovzxdq(<4 x i32> %a0, <2 x i32> *%a1) {
; HASWELL-LABEL: test_pmovzxdq:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
; HASWELL-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [5:1.00]
; HASWELL-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [1:1.00]
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pmovzxdq:
; BTVER2: # BB#0:
@ -1810,9 +1810,9 @@ define <4 x i32> @test_pmovzxwd(<8 x i16> %a0, <4 x i16> *%a1) {
; HASWELL-LABEL: test_pmovzxwd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
; HASWELL-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [5:1.00]
; HASWELL-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [1:1.00]
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pmovzxwd:
; BTVER2: # BB#0:
@ -1861,9 +1861,9 @@ define <2 x i64> @test_pmovzxwq(<8 x i16> %a0, <2 x i16> *%a1) {
; HASWELL-LABEL: test_pmovzxwq:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
; HASWELL-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [5:1.00]
; HASWELL-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [1:1.00]
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pmovzxwq:
; BTVER2: # BB#0:
@ -1908,8 +1908,8 @@ define <2 x i64> @test_pmuldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; HASWELL-LABEL: test_pmuldq:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pmuldq:
; BTVER2: # BB#0:
@ -1953,7 +1953,7 @@ define <4 x i32> @test_pmulld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; HASWELL: # BB#0:
; HASWELL-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:2.00]
; HASWELL-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pmulld:
; BTVER2: # BB#0:
@ -2011,7 +2011,7 @@ define i32 @test_ptest(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; HASWELL-NEXT: setb %cl # sched: [1:0.50]
; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25]
; HASWELL-NEXT: movzbl %cl, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_ptest:
; BTVER2: # BB#0:
@ -2065,10 +2065,10 @@ define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) {
;
; HASWELL-LABEL: test_roundpd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [6:2.00]
; HASWELL-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [10:2.00]
; HASWELL-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [5:1.25]
; HASWELL-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [6:2.00]
; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_roundpd:
; BTVER2: # BB#0:
@ -2116,10 +2116,10 @@ define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) {
;
; HASWELL-LABEL: test_roundps:
; HASWELL: # BB#0:
; HASWELL-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [6:2.00]
; HASWELL-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [10:2.00]
; HASWELL-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [5:1.25]
; HASWELL-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [6:2.00]
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_roundps:
; BTVER2: # BB#0:
@ -2168,10 +2168,10 @@ define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x doubl
;
; HASWELL-LABEL: test_roundsd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [6:2.00]
; HASWELL-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
; HASWELL-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [5:1.25]
; HASWELL-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_roundsd:
; BTVER2: # BB#0:
@ -2220,10 +2220,10 @@ define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> *
;
; HASWELL-LABEL: test_roundss:
; HASWELL: # BB#0:
; HASWELL-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [6:2.00]
; HASWELL-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
; HASWELL-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [5:1.25]
; HASWELL-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_roundss:
; BTVER2: # BB#0:

View File

@ -35,7 +35,7 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1, i8 *%a2) {
; HASWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00]
; HASWELL-NEXT: crc32b (%rdx), %edi # sched: [7:1.00]
; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: crc32_32_8:
; BTVER2: # BB#0:
@ -84,7 +84,7 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1, i16 *%a2) {
; HASWELL-NEXT: crc32w %si, %edi # sched: [3:1.00]
; HASWELL-NEXT: crc32w (%rdx), %edi # sched: [7:1.00]
; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: crc32_32_16:
; BTVER2: # BB#0:
@ -133,7 +133,7 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1, i32 *%a2) {
; HASWELL-NEXT: crc32l %esi, %edi # sched: [3:1.00]
; HASWELL-NEXT: crc32l (%rdx), %edi # sched: [7:1.00]
; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: crc32_32_32:
; BTVER2: # BB#0:
@ -182,7 +182,7 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1, i8 *%a2) nounwind {
; HASWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00]
; HASWELL-NEXT: crc32b (%rdx), %edi # sched: [7:1.00]
; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: crc32_64_8:
; BTVER2: # BB#0:
@ -231,7 +231,7 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1, i64 *%a2) {
; HASWELL-NEXT: crc32q %rsi, %rdi # sched: [3:1.00]
; HASWELL-NEXT: crc32q (%rdx), %rdi # sched: [7:1.00]
; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: crc32_64_64:
; BTVER2: # BB#0:
@ -297,14 +297,14 @@ define i32 @test_pcmpestri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; HASWELL: # BB#0:
; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25]
; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25]
; HASWELL-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [11:3.00]
; HASWELL-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [18:4.00]
; HASWELL-NEXT: movl %ecx, %esi # sched: [1:0.25]
; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25]
; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25]
; HASWELL-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [11:3.00]
; HASWELL-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [18:4.00]
; HASWELL-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; HASWELL-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pcmpestri:
; BTVER2: # BB#0:
@ -374,11 +374,11 @@ define <16 x i8> @test_pcmpestrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; HASWELL: # BB#0:
; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25]
; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25]
; HASWELL-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [10:4.00]
; HASWELL-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00]
; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25]
; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25]
; HASWELL-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [10:3.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [19:4.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pcmpestrm:
; BTVER2: # BB#0:
@ -441,7 +441,7 @@ define i32 @test_pcmpistri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; HASWELL-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [11:3.00]
; HASWELL-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; HASWELL-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pcmpistri:
; BTVER2: # BB#0:
@ -489,9 +489,9 @@ define <16 x i8> @test_pcmpistrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
;
; HASWELL-LABEL: test_pcmpistrm:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [10:3.00]
; HASWELL-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [10:3.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00]
; HASWELL-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [11:3.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pcmpistrm:
; BTVER2: # BB#0:
@ -534,7 +534,7 @@ define <2 x i64> @test_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; HASWELL: # BB#0:
; HASWELL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pcmpgtq:
; BTVER2: # BB#0:
@ -576,9 +576,9 @@ define <2 x i64> @test_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
;
; HASWELL-LABEL: test_pclmulqdq:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [7:2.00]
; HASWELL-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [11:2.00]
; HASWELL-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pclmulqdq:
; BTVER2: # BB#0:

View File

@ -42,9 +42,9 @@ define <16 x i8> @test_pabsb(<16 x i8> %a0, <16 x i8> *%a1) {
; HASWELL-LABEL: test_pabsb:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpabsb (%rdi), %xmm1 # sched: [5:0.50]
; HASWELL-NEXT: vpabsb (%rdi), %xmm1 # sched: [1:0.50]
; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pabsb:
; BTVER2: # BB#0:
@ -100,9 +100,9 @@ define <4 x i32> @test_pabsd(<4 x i32> %a0, <4 x i32> *%a1) {
; HASWELL-LABEL: test_pabsd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpabsd (%rdi), %xmm1 # sched: [5:0.50]
; HASWELL-NEXT: vpabsd (%rdi), %xmm1 # sched: [1:0.50]
; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pabsd:
; BTVER2: # BB#0:
@ -158,9 +158,9 @@ define <8 x i16> @test_pabsw(<8 x i16> %a0, <8 x i16> *%a1) {
; HASWELL-LABEL: test_pabsw:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpabsw (%rdi), %xmm1 # sched: [5:0.50]
; HASWELL-NEXT: vpabsw (%rdi), %xmm1 # sched: [1:0.50]
; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pabsw:
; BTVER2: # BB#0:
@ -216,8 +216,8 @@ define <8 x i16> @test_palignr(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; HASWELL-LABEL: test_palignr:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00]
; HASWELL-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_palignr:
; BTVER2: # BB#0:
@ -264,8 +264,8 @@ define <4 x i32> @test_phaddd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; HASWELL-LABEL: test_phaddd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
; HASWELL-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_phaddd:
; BTVER2: # BB#0:
@ -313,8 +313,8 @@ define <8 x i16> @test_phaddsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; HASWELL-LABEL: test_phaddsw:
; HASWELL: # BB#0:
; HASWELL-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
; HASWELL-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_phaddsw:
; BTVER2: # BB#0:
@ -362,8 +362,8 @@ define <8 x i16> @test_phaddw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; HASWELL-LABEL: test_phaddw:
; HASWELL: # BB#0:
; HASWELL-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
; HASWELL-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_phaddw:
; BTVER2: # BB#0:
@ -411,8 +411,8 @@ define <4 x i32> @test_phsubd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; HASWELL-LABEL: test_phsubd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
; HASWELL-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_phsubd:
; BTVER2: # BB#0:
@ -460,8 +460,8 @@ define <8 x i16> @test_phsubsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; HASWELL-LABEL: test_phsubsw:
; HASWELL: # BB#0:
; HASWELL-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
; HASWELL-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_phsubsw:
; BTVER2: # BB#0:
@ -509,8 +509,8 @@ define <8 x i16> @test_phsubw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; HASWELL-LABEL: test_phsubw:
; HASWELL: # BB#0:
; HASWELL-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
; HASWELL-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_phsubw:
; BTVER2: # BB#0:
@ -558,8 +558,8 @@ define <8 x i16> @test_pmaddubsw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; HASWELL-LABEL: test_pmaddubsw:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pmaddubsw:
; BTVER2: # BB#0:
@ -608,8 +608,8 @@ define <8 x i16> @test_pmulhrsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; HASWELL-LABEL: test_pmulhrsw:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pmulhrsw:
; BTVER2: # BB#0:
@ -657,8 +657,8 @@ define <16 x i8> @test_pshufb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; HASWELL-LABEL: test_pshufb:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_pshufb:
; BTVER2: # BB#0:
@ -710,8 +710,8 @@ define <16 x i8> @test_psignb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; HASWELL-LABEL: test_psignb:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_psignb:
; BTVER2: # BB#0:
@ -763,8 +763,8 @@ define <4 x i32> @test_psignd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; HASWELL-LABEL: test_psignd:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_psignd:
; BTVER2: # BB#0:
@ -816,8 +816,8 @@ define <8 x i16> @test_psignw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; HASWELL-LABEL: test_psignw:
; HASWELL: # BB#0:
; HASWELL-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00]
; HASWELL-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
; BTVER2-LABEL: test_psignw:
; BTVER2: # BB#0:

View File

@ -201,14 +201,14 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; AVX512DQ-NEXT: vpsraw $2, %ymm0, %ymm5
; AVX512DQ-NEXT: vpaddw %ymm2, %ymm2, %ymm8
; AVX512DQ-NEXT: vpblendvb %ymm8, %ymm5, %ymm0, %ymm0
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
; AVX512DQ-NEXT: vpsraw $4, %ymm5, %ymm9
; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm9, %ymm5, %ymm3
; AVX512DQ-NEXT: vpsraw $1, %ymm0, %ymm5
; AVX512DQ-NEXT: vpaddw %ymm8, %ymm8, %ymm9
; AVX512DQ-NEXT: vpblendvb %ymm9, %ymm5, %ymm0, %ymm0
; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm0
; AVX512DQ-NEXT: vpackuswb %ymm4, %ymm0, %ymm0
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
; AVX512DQ-NEXT: vpsraw $4, %ymm4, %ymm5
; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm5, %ymm4, %ymm3
; AVX512DQ-NEXT: vpsraw $2, %ymm3, %ymm4
; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm4, %ymm3, %ymm3
; AVX512DQ-NEXT: vpsraw $1, %ymm3, %ymm4
@ -328,14 +328,14 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
; AVX512DQ-NEXT: vpsraw $2, %ymm0, %ymm5
; AVX512DQ-NEXT: vpaddw %ymm2, %ymm2, %ymm8
; AVX512DQ-NEXT: vpblendvb %ymm8, %ymm5, %ymm0, %ymm0
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
; AVX512DQ-NEXT: vpsraw $4, %ymm5, %ymm9
; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm9, %ymm5, %ymm3
; AVX512DQ-NEXT: vpsraw $1, %ymm0, %ymm5
; AVX512DQ-NEXT: vpaddw %ymm8, %ymm8, %ymm9
; AVX512DQ-NEXT: vpblendvb %ymm9, %ymm5, %ymm0, %ymm0
; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm0
; AVX512DQ-NEXT: vpackuswb %ymm4, %ymm0, %ymm0
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
; AVX512DQ-NEXT: vpsraw $4, %ymm4, %ymm5
; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm5, %ymm4, %ymm3
; AVX512DQ-NEXT: vpsraw $2, %ymm3, %ymm4
; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm4, %ymm3, %ymm3
; AVX512DQ-NEXT: vpsraw $1, %ymm3, %ymm4

View File

@ -777,9 +777,9 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
;
; AVX512DQ-LABEL: splatvar_shift_v32i8:
; AVX512DQ: # BB#0:
; AVX512DQ-NEXT: vpbroadcastb %xmm1, %ymm1
; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm2
; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
; AVX512DQ-NEXT: vpbroadcastb %xmm1, %ymm1
; AVX512DQ-NEXT: vpsllw $5, %ymm1, %ymm1
; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; AVX512DQ-NEXT: vpsrlw $2, %ymm0, %ymm2

View File

@ -713,9 +713,9 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
;
; AVX512DQ-LABEL: splatvar_shift_v32i8:
; AVX512DQ: # BB#0:
; AVX512DQ-NEXT: vpbroadcastb %xmm1, %ymm1
; AVX512DQ-NEXT: vpsllw $4, %ymm0, %ymm2
; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
; AVX512DQ-NEXT: vpbroadcastb %xmm1, %ymm1
; AVX512DQ-NEXT: vpsllw $5, %ymm1, %ymm1
; AVX512DQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; AVX512DQ-NEXT: vpsllw $2, %ymm0, %ymm2

View File

@ -68,13 +68,13 @@ define <32 x i16> @shuffle_v32i16_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_1
; KNL-NEXT: vpshufb {{.*#+}} xmm4 = xmm1[8,9,12,13,12,13,10,11,0,1,4,5,4,5,0,1]
; KNL-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,1,0,3]
; KNL-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[0,3,2,2,4,5,6,7]
; KNL-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm1
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm5
; KNL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
; KNL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,10,11,8,9,14,15,4,5,2,3,2,3,6,7]
; KNL-NEXT: vpshufb {{.*#+}} xmm5 = xmm5[6,7,2,3,4,5,6,7,2,3,2,3,0,1,14,15]
; KNL-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm0
; KNL-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
; KNL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[6,7,2,3,4,5,6,7,2,3,2,3,0,1,14,15]
; KNL-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm5
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; KNL-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm5[1],ymm0[2],ymm5[3],ymm0[4],ymm5[5],ymm0[6],ymm5[7],ymm0[8],ymm5[9],ymm0[10],ymm5[11],ymm0[12],ymm5[13],ymm0[14],ymm5[15]
; KNL-NEXT: vextracti128 $1, %ymm3, %xmm3
; KNL-NEXT: vpbroadcastw %xmm3, %ymm3
; KNL-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0]