forked from OSchip/llvm-project
				
			[AVX-512] Change another pattern that was using BLENDM to use masked moves. A future patch will conver it back to BLENDM if its beneficial to register allocation.
llvm-svn: 291419
This commit is contained in:
		
							parent
							
								
									177399e227
								
							
						
					
					
						commit
						96ab6fd2eb
					
				| 
						 | 
				
			
			@ -1588,21 +1588,6 @@ defm VPBLENDMB : blendmask_bw <0x66, "vpblendmb", avx512vl_i8_info>;
 | 
			
		|||
defm VPBLENDMW : blendmask_bw <0x66, "vpblendmw", avx512vl_i16_info>, VEX_W;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
let Predicates = [HasAVX512, NoVLX] in {
 | 
			
		||||
def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
 | 
			
		||||
                            (v8f32 VR256X:$src2))),
 | 
			
		||||
            (EXTRACT_SUBREG
 | 
			
		||||
              (v16f32 (VBLENDMPSZrrk (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
 | 
			
		||||
            (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
 | 
			
		||||
            (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), sub_ymm)>;
 | 
			
		||||
 | 
			
		||||
def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
 | 
			
		||||
                            (v8i32 VR256X:$src2))),
 | 
			
		||||
            (EXTRACT_SUBREG
 | 
			
		||||
                (v16i32 (VPBLENDMDZrrk (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
 | 
			
		||||
            (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
 | 
			
		||||
            (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), sub_ymm)>;
 | 
			
		||||
}
 | 
			
		||||
//===----------------------------------------------------------------------===//
 | 
			
		||||
// Compare Instructions
 | 
			
		||||
//===----------------------------------------------------------------------===//
 | 
			
		||||
| 
						 | 
				
			
			@ -2978,6 +2963,30 @@ def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
 | 
			
		|||
                           (v16i32 VR512:$src))),
 | 
			
		||||
                  (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
 | 
			
		||||
 | 
			
		||||
// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
 | 
			
		||||
// available. Use a 512-bit operation and extract.
 | 
			
		||||
let Predicates = [HasAVX512, NoVLX] in {
 | 
			
		||||
def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
 | 
			
		||||
                          (v8f32 VR256X:$src0))),
 | 
			
		||||
          (EXTRACT_SUBREG
 | 
			
		||||
           (v16f32
 | 
			
		||||
            (VMOVAPSZrrk
 | 
			
		||||
             (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src0, sub_ymm)),
 | 
			
		||||
             (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
 | 
			
		||||
             (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))),
 | 
			
		||||
           sub_ymm)>;
 | 
			
		||||
 | 
			
		||||
def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
 | 
			
		||||
                          (v8i32 VR256X:$src0))),
 | 
			
		||||
          (EXTRACT_SUBREG
 | 
			
		||||
           (v16i32
 | 
			
		||||
            (VMOVDQA32Zrrk
 | 
			
		||||
             (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src0, sub_ymm)),
 | 
			
		||||
             (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
 | 
			
		||||
             (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))),
 | 
			
		||||
           sub_ymm)>;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
let Predicates = [HasVLX, NoBWI] in {
 | 
			
		||||
  // 128-bit load/store without BWI.
 | 
			
		||||
  def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -345,9 +345,9 @@ define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind re
 | 
			
		|||
; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
 | 
			
		||||
; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
 | 
			
		||||
; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
 | 
			
		||||
; KNL-NEXT:    vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
 | 
			
		||||
; KNL-NEXT:    vpxor %ymm1, %ymm1, %ymm1
 | 
			
		||||
; KNL-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 | 
			
		||||
; KNL-NEXT:    vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
 | 
			
		||||
; KNL-NEXT:    vpxor %ymm0, %ymm0, %ymm0
 | 
			
		||||
; KNL-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1}
 | 
			
		||||
; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
 | 
			
		||||
; KNL-NEXT:    retq
 | 
			
		||||
;
 | 
			
		||||
| 
						 | 
				
			
			@ -369,9 +369,9 @@ define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind re
 | 
			
		|||
; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
 | 
			
		||||
; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
 | 
			
		||||
; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
 | 
			
		||||
; KNL-NEXT:    vpmovsxbd (%rdi), %ymm0
 | 
			
		||||
; KNL-NEXT:    vpxor %ymm1, %ymm1, %ymm1
 | 
			
		||||
; KNL-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 | 
			
		||||
; KNL-NEXT:    vpmovsxbd (%rdi), %ymm1
 | 
			
		||||
; KNL-NEXT:    vpxor %ymm0, %ymm0, %ymm0
 | 
			
		||||
; KNL-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1}
 | 
			
		||||
; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
 | 
			
		||||
; KNL-NEXT:    retq
 | 
			
		||||
;
 | 
			
		||||
| 
						 | 
				
			
			@ -704,9 +704,9 @@ define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind
 | 
			
		|||
; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
 | 
			
		||||
; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
 | 
			
		||||
; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
 | 
			
		||||
; KNL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
 | 
			
		||||
; KNL-NEXT:    vpxor %ymm1, %ymm1, %ymm1
 | 
			
		||||
; KNL-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 | 
			
		||||
; KNL-NEXT:    vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
 | 
			
		||||
; KNL-NEXT:    vpxor %ymm0, %ymm0, %ymm0
 | 
			
		||||
; KNL-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1}
 | 
			
		||||
; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
 | 
			
		||||
; KNL-NEXT:    retq
 | 
			
		||||
;
 | 
			
		||||
| 
						 | 
				
			
			@ -728,9 +728,9 @@ define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounw
 | 
			
		|||
; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
 | 
			
		||||
; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
 | 
			
		||||
; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
 | 
			
		||||
; KNL-NEXT:    vpmovsxwd (%rdi), %ymm0
 | 
			
		||||
; KNL-NEXT:    vpxor %ymm1, %ymm1, %ymm1
 | 
			
		||||
; KNL-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 | 
			
		||||
; KNL-NEXT:    vpmovsxwd (%rdi), %ymm1
 | 
			
		||||
; KNL-NEXT:    vpxor %ymm0, %ymm0, %ymm0
 | 
			
		||||
; KNL-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1}
 | 
			
		||||
; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
 | 
			
		||||
; KNL-NEXT:    retq
 | 
			
		||||
;
 | 
			
		||||
| 
						 | 
				
			
			@ -762,9 +762,9 @@ define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind
 | 
			
		|||
; KNL-NEXT:    vpmovsxwq %xmm1, %zmm1
 | 
			
		||||
; KNL-NEXT:    vpsllq $63, %zmm1, %zmm1
 | 
			
		||||
; KNL-NEXT:    vptestmq %zmm1, %zmm1, %k1
 | 
			
		||||
; KNL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 | 
			
		||||
; KNL-NEXT:    vpxor %ymm1, %ymm1, %ymm1
 | 
			
		||||
; KNL-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 | 
			
		||||
; KNL-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 | 
			
		||||
; KNL-NEXT:    vpxor %ymm0, %ymm0, %ymm0
 | 
			
		||||
; KNL-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1}
 | 
			
		||||
; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
 | 
			
		||||
; KNL-NEXT:    retq
 | 
			
		||||
;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -122,8 +122,8 @@ define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind {
 | 
			
		|||
; KNL-NEXT:    ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
 | 
			
		||||
; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
 | 
			
		||||
; KNL-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1
 | 
			
		||||
; KNL-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
 | 
			
		||||
; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
 | 
			
		||||
; KNL-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1}
 | 
			
		||||
; KNL-NEXT:    vmovdqa %ymm1, %ymm0
 | 
			
		||||
; KNL-NEXT:    retq
 | 
			
		||||
;
 | 
			
		||||
; SKX-LABEL: test9:
 | 
			
		||||
| 
						 | 
				
			
			@ -143,8 +143,8 @@ define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind {
 | 
			
		|||
; KNL-NEXT:    ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
 | 
			
		||||
; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
 | 
			
		||||
; KNL-NEXT:    vcmpeqps %zmm1, %zmm0, %k1
 | 
			
		||||
; KNL-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1}
 | 
			
		||||
; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
 | 
			
		||||
; KNL-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
 | 
			
		||||
; KNL-NEXT:    vmovaps %ymm1, %ymm0
 | 
			
		||||
; KNL-NEXT:    retq
 | 
			
		||||
;
 | 
			
		||||
; SKX-LABEL: test10:
 | 
			
		||||
| 
						 | 
				
			
			@ -1020,8 +1020,8 @@ define <8 x float> @test35(<8 x float> %x, <8 x float> %x1, <8 x float>* %yp) no
 | 
			
		|||
; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
 | 
			
		||||
; KNL-NEXT:    vmovups (%rdi), %ymm2
 | 
			
		||||
; KNL-NEXT:    vcmpltps %zmm2, %zmm0, %k1
 | 
			
		||||
; KNL-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1}
 | 
			
		||||
; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
 | 
			
		||||
; KNL-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
 | 
			
		||||
; KNL-NEXT:    vmovaps %ymm1, %ymm0
 | 
			
		||||
; KNL-NEXT:    retq
 | 
			
		||||
;
 | 
			
		||||
; SKX-LABEL: test35:
 | 
			
		||||
| 
						 | 
				
			
			@ -1140,8 +1140,8 @@ define <8  x float> @test41(<8  x float> %x, <8  x float> %x1, float* %ptr) noun
 | 
			
		|||
; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
 | 
			
		||||
; KNL-NEXT:    vbroadcastss (%rdi), %ymm2
 | 
			
		||||
; KNL-NEXT:    vcmpltps %zmm2, %zmm0, %k1
 | 
			
		||||
; KNL-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1}
 | 
			
		||||
; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
 | 
			
		||||
; KNL-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
 | 
			
		||||
; KNL-NEXT:    vmovaps %ymm1, %ymm0
 | 
			
		||||
; KNL-NEXT:    retq
 | 
			
		||||
;
 | 
			
		||||
; SKX-LABEL: test41:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue