[AVX-512] Use scalar vfmsub/vfnmsub mask3 intrinsics instead of inverting the mask argument of a vfmadd intrinsic.
Summary: Inverting the mask argument does not reflect the intended semantics of the intrinsic. Reviewers: igorb, delena Subscribers: cfe-commits Differential Revision: https://reviews.llvm.org/D26019 llvm-svn: 286733
This commit is contained in:
		
							parent
							
								
									79c6f7a0c5
								
							
						
					
					
						commit
						2c8f49e67b
					
				| 
						 | 
					@ -1743,6 +1743,10 @@ TARGET_BUILTIN(__builtin_ia32_vfmaddss3_mask3, "V4fV4fV4fV4fUcIi", "", "avx512f"
 | 
				
			||||||
TARGET_BUILTIN(__builtin_ia32_vfmaddsd3_mask,  "V2dV2dV2dV2dUcIi", "", "avx512f")
 | 
					TARGET_BUILTIN(__builtin_ia32_vfmaddsd3_mask,  "V2dV2dV2dV2dUcIi", "", "avx512f")
 | 
				
			||||||
TARGET_BUILTIN(__builtin_ia32_vfmaddsd3_maskz, "V2dV2dV2dV2dUcIi", "", "avx512f")
 | 
					TARGET_BUILTIN(__builtin_ia32_vfmaddsd3_maskz, "V2dV2dV2dV2dUcIi", "", "avx512f")
 | 
				
			||||||
TARGET_BUILTIN(__builtin_ia32_vfmaddsd3_mask3, "V2dV2dV2dV2dUcIi", "", "avx512f")
 | 
					TARGET_BUILTIN(__builtin_ia32_vfmaddsd3_mask3, "V2dV2dV2dV2dUcIi", "", "avx512f")
 | 
				
			||||||
 | 
					TARGET_BUILTIN(__builtin_ia32_vfmsubsd3_mask3, "V2dV2dV2dV2dUcIi", "", "avx512f")
 | 
				
			||||||
 | 
					TARGET_BUILTIN(__builtin_ia32_vfmsubss3_mask3, "V4fV4fV4fV4fUcIi", "", "avx512f")
 | 
				
			||||||
 | 
					TARGET_BUILTIN(__builtin_ia32_vfnmsubsd3_mask3, "V2dV2dV2dV2dUcIi", "", "avx512f")
 | 
				
			||||||
 | 
					TARGET_BUILTIN(__builtin_ia32_vfnmsubss3_mask3, "V4fV4fV4fV4fUcIi", "", "avx512f")
 | 
				
			||||||
TARGET_BUILTIN(__builtin_ia32_permvarhi512_mask, "V32sV32sV32sV32sUi","","avx512bw")
 | 
					TARGET_BUILTIN(__builtin_ia32_permvarhi512_mask, "V32sV32sV32sV32sUi","","avx512bw")
 | 
				
			||||||
TARGET_BUILTIN(__builtin_ia32_permvardf512_mask, "V8dV8dV8LLiV8dUc","","avx512f")
 | 
					TARGET_BUILTIN(__builtin_ia32_permvardf512_mask, "V8dV8dV8LLiV8dUc","","avx512f")
 | 
				
			||||||
TARGET_BUILTIN(__builtin_ia32_permvardi512_mask, "V8LLiV8LLiV8LLiV8LLiUc","","avx512f")
 | 
					TARGET_BUILTIN(__builtin_ia32_permvardi512_mask, "V8LLiV8LLiV8LLiV8LLiUc","","avx512f")
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -8470,17 +8470,17 @@ _mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
 | 
				
			||||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
 | 
					static __inline__ __m128 __DEFAULT_FN_ATTRS
 | 
				
			||||||
_mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
 | 
					_mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
 | 
					 return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
 | 
				
			||||||
          (__v4sf) __X,
 | 
					          (__v4sf) __X,
 | 
				
			||||||
          -(__v4sf) __Y,
 | 
					          (__v4sf) __Y,
 | 
				
			||||||
          (__mmask8) __U,
 | 
					          (__mmask8) __U,
 | 
				
			||||||
          _MM_FROUND_CUR_DIRECTION);
 | 
					          _MM_FROUND_CUR_DIRECTION);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) __extension__ ({\
 | 
					#define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) __extension__ ({\
 | 
				
			||||||
  (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
 | 
					  (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
 | 
				
			||||||
                                         (__v4sf)(__m128)(X), \
 | 
					                                         (__v4sf)(__m128)(X), \
 | 
				
			||||||
                                         -(__v4sf)(__m128)(Y), (__mmask8)(U), \
 | 
					                                         (__v4sf)(__m128)(Y), (__mmask8)(U), \
 | 
				
			||||||
                                         (int)(R)); })
 | 
					                                         (int)(R)); })
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
 | 
					static __inline__ __m128 __DEFAULT_FN_ATTRS
 | 
				
			||||||
| 
						 | 
					@ -8566,17 +8566,17 @@ _mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
 | 
				
			||||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
 | 
					static __inline__ __m128 __DEFAULT_FN_ATTRS
 | 
				
			||||||
_mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
 | 
					_mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 return (__m128) __builtin_ia32_vfmaddss3_mask3 (-(__v4sf) __W,
 | 
					 return (__m128) __builtin_ia32_vfnmsubss3_mask3 ((__v4sf) __W,
 | 
				
			||||||
          (__v4sf) __X,
 | 
					          (__v4sf) __X,
 | 
				
			||||||
          -(__v4sf) __Y,
 | 
					          (__v4sf) __Y,
 | 
				
			||||||
          (__mmask8) __U,
 | 
					          (__mmask8) __U,
 | 
				
			||||||
          _MM_FROUND_CUR_DIRECTION);
 | 
					          _MM_FROUND_CUR_DIRECTION);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) __extension__({\
 | 
					#define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) __extension__({\
 | 
				
			||||||
  (__m128)__builtin_ia32_vfmaddss3_mask3(-(__v4sf)(__m128)(W), \
 | 
					  (__m128)__builtin_ia32_vfnmsubss3_mask3((__v4sf)(__m128)(W), \
 | 
				
			||||||
                                         (__v4sf)(__m128)(X), \
 | 
					                                         (__v4sf)(__m128)(X), \
 | 
				
			||||||
                                         -(__v4sf)(__m128)(Y), (__mmask8)(U), \
 | 
					                                         (__v4sf)(__m128)(Y), (__mmask8)(U), \
 | 
				
			||||||
                                         (int)(R)); })
 | 
					                                         (int)(R)); })
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
 | 
					static __inline__ __m128d __DEFAULT_FN_ATTRS
 | 
				
			||||||
| 
						 | 
					@ -8662,17 +8662,17 @@ _mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
 | 
				
			||||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
 | 
					static __inline__ __m128d __DEFAULT_FN_ATTRS
 | 
				
			||||||
_mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
 | 
					_mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
 | 
					 return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
 | 
				
			||||||
          (__v2df) __X,
 | 
					          (__v2df) __X,
 | 
				
			||||||
          -(__v2df) __Y,
 | 
					          (__v2df) __Y,
 | 
				
			||||||
          (__mmask8) __U,
 | 
					          (__mmask8) __U,
 | 
				
			||||||
          _MM_FROUND_CUR_DIRECTION);
 | 
					          _MM_FROUND_CUR_DIRECTION);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) __extension__ ({\
 | 
					#define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) __extension__ ({\
 | 
				
			||||||
  (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
 | 
					  (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
 | 
				
			||||||
                                          (__v2df)(__m128d)(X), \
 | 
					                                          (__v2df)(__m128d)(X), \
 | 
				
			||||||
                                          -(__v2df)(__m128d)(Y), \
 | 
					                                          (__v2df)(__m128d)(Y), \
 | 
				
			||||||
                                          (__mmask8)(U), (int)(R)); })
 | 
					                                          (__mmask8)(U), (int)(R)); })
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
 | 
					static __inline__ __m128d __DEFAULT_FN_ATTRS
 | 
				
			||||||
| 
						 | 
					@ -8759,17 +8759,17 @@ _mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
 | 
				
			||||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
 | 
					static __inline__ __m128d __DEFAULT_FN_ATTRS
 | 
				
			||||||
_mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
 | 
					_mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 (-(__v2df) (__W),
 | 
					 return (__m128d) __builtin_ia32_vfnmsubsd3_mask3 ((__v2df) (__W),
 | 
				
			||||||
          (__v2df) __X,
 | 
					          (__v2df) __X,
 | 
				
			||||||
          -(__v2df) (__Y),
 | 
					          (__v2df) (__Y),
 | 
				
			||||||
          (__mmask8) __U,
 | 
					          (__mmask8) __U,
 | 
				
			||||||
          _MM_FROUND_CUR_DIRECTION);
 | 
					          _MM_FROUND_CUR_DIRECTION);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) __extension__({\
 | 
					#define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) __extension__({\
 | 
				
			||||||
  (__m128d)__builtin_ia32_vfmaddsd3_mask3(-(__v2df)(__m128d)(W), \
 | 
					  (__m128d)__builtin_ia32_vfnmsubsd3_mask3((__v2df)(__m128d)(W), \
 | 
				
			||||||
                                          (__v2df)(__m128d)(X), \
 | 
					                                          (__v2df)(__m128d)(X), \
 | 
				
			||||||
                                          -(__v2df)(__m128d)(Y), \
 | 
					                                          (__v2df)(__m128d)(Y), \
 | 
				
			||||||
                                          (__mmask8)(U), (int)(R)); })
 | 
					                                          (__mmask8)(U), (int)(R)); })
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define _mm512_permutex_pd(X, C) __extension__ ({ \
 | 
					#define _mm512_permutex_pd(X, C) __extension__ ({ \
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -5813,13 +5813,13 @@ __m128 test_mm_maskz_fmsub_round_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128
 | 
				
			||||||
 | 
					
 | 
				
			||||||
__m128 test_mm_mask3_fmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){
 | 
					__m128 test_mm_mask3_fmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){
 | 
				
			||||||
  // CHECK-LABEL: @test_mm_mask3_fmsub_ss
 | 
					  // CHECK-LABEL: @test_mm_mask3_fmsub_ss
 | 
				
			||||||
  // CHECK: @llvm.x86.avx512.mask3.vfmadd.ss
 | 
					  // CHECK: @llvm.x86.avx512.mask3.vfmsub.ss
 | 
				
			||||||
  return _mm_mask3_fmsub_ss(__W, __X, __Y, __U);
 | 
					  return _mm_mask3_fmsub_ss(__W, __X, __Y, __U);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
__m128 test_mm_mask3_fmsub_round_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){
 | 
					__m128 test_mm_mask3_fmsub_round_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){
 | 
				
			||||||
  // CHECK-LABEL: @test_mm_mask3_fmsub_round_ss
 | 
					  // CHECK-LABEL: @test_mm_mask3_fmsub_round_ss
 | 
				
			||||||
  // CHECK: @llvm.x86.avx512.mask3.vfmadd.ss
 | 
					  // CHECK: @llvm.x86.avx512.mask3.vfmsub.ss
 | 
				
			||||||
  return _mm_mask3_fmsub_round_ss(__W, __X, __Y, __U, _MM_FROUND_CUR_DIRECTION);
 | 
					  return _mm_mask3_fmsub_round_ss(__W, __X, __Y, __U, _MM_FROUND_CUR_DIRECTION);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -5885,13 +5885,13 @@ __m128 test_mm_maskz_fnmsub_round_ss(__mmask8 __U, __m128 __A, __m128 __B, __m12
 | 
				
			||||||
 | 
					
 | 
				
			||||||
__m128 test_mm_mask3_fnmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){
 | 
					__m128 test_mm_mask3_fnmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){
 | 
				
			||||||
  // CHECK-LABEL: @test_mm_mask3_fnmsub_ss
 | 
					  // CHECK-LABEL: @test_mm_mask3_fnmsub_ss
 | 
				
			||||||
  // CHECK: @llvm.x86.avx512.mask3.vfmadd.ss
 | 
					  // CHECK: @llvm.x86.avx512.mask3.vfnmsub.ss
 | 
				
			||||||
  return _mm_mask3_fnmsub_ss(__W, __X, __Y, __U);
 | 
					  return _mm_mask3_fnmsub_ss(__W, __X, __Y, __U);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
__m128 test_mm_mask3_fnmsub_round_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){
 | 
					__m128 test_mm_mask3_fnmsub_round_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){
 | 
				
			||||||
  // CHECK-LABEL: @test_mm_mask3_fnmsub_round_ss
 | 
					  // CHECK-LABEL: @test_mm_mask3_fnmsub_round_ss
 | 
				
			||||||
  // CHECK: @llvm.x86.avx512.mask3.vfmadd.ss
 | 
					  // CHECK: @llvm.x86.avx512.mask3.vfnmsub.ss
 | 
				
			||||||
  return _mm_mask3_fnmsub_round_ss(__W, __X, __Y, __U, _MM_FROUND_CUR_DIRECTION);
 | 
					  return _mm_mask3_fnmsub_round_ss(__W, __X, __Y, __U, _MM_FROUND_CUR_DIRECTION);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -5957,13 +5957,13 @@ __m128d test_mm_maskz_fmsub_round_sd(__mmask8 __U, __m128d __A, __m128d __B, __m
 | 
				
			||||||
 | 
					
 | 
				
			||||||
__m128d test_mm_mask3_fmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){
 | 
					__m128d test_mm_mask3_fmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){
 | 
				
			||||||
  // CHECK-LABEL: @test_mm_mask3_fmsub_sd
 | 
					  // CHECK-LABEL: @test_mm_mask3_fmsub_sd
 | 
				
			||||||
  // CHECK: @llvm.x86.avx512.mask3.vfmadd.sd
 | 
					  // CHECK: @llvm.x86.avx512.mask3.vfmsub.sd
 | 
				
			||||||
  return _mm_mask3_fmsub_sd(__W, __X, __Y, __U);
 | 
					  return _mm_mask3_fmsub_sd(__W, __X, __Y, __U);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
__m128d test_mm_mask3_fmsub_round_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){
 | 
					__m128d test_mm_mask3_fmsub_round_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){
 | 
				
			||||||
  // CHECK-LABEL: @test_mm_mask3_fmsub_round_sd
 | 
					  // CHECK-LABEL: @test_mm_mask3_fmsub_round_sd
 | 
				
			||||||
  // CHECK: @llvm.x86.avx512.mask3.vfmadd.sd
 | 
					  // CHECK: @llvm.x86.avx512.mask3.vfmsub.sd
 | 
				
			||||||
  return _mm_mask3_fmsub_round_sd(__W, __X, __Y, __U, _MM_FROUND_CUR_DIRECTION);
 | 
					  return _mm_mask3_fmsub_round_sd(__W, __X, __Y, __U, _MM_FROUND_CUR_DIRECTION);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -6029,13 +6029,13 @@ __m128d test_mm_maskz_fnmsub_round_sd(__mmask8 __U, __m128d __A, __m128d __B, __
 | 
				
			||||||
 | 
					
 | 
				
			||||||
__m128d test_mm_mask3_fnmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){
 | 
					__m128d test_mm_mask3_fnmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){
 | 
				
			||||||
  // CHECK-LABEL: @test_mm_mask3_fnmsub_sd
 | 
					  // CHECK-LABEL: @test_mm_mask3_fnmsub_sd
 | 
				
			||||||
  // CHECK: @llvm.x86.avx512.mask3.vfmadd.sd
 | 
					  // CHECK: @llvm.x86.avx512.mask3.vfnmsub.sd
 | 
				
			||||||
  return _mm_mask3_fnmsub_sd(__W, __X, __Y, __U);
 | 
					  return _mm_mask3_fnmsub_sd(__W, __X, __Y, __U);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
__m128d test_mm_mask3_fnmsub_round_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){
 | 
					__m128d test_mm_mask3_fnmsub_round_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){
 | 
				
			||||||
  // CHECK-LABEL: @test_mm_mask3_fnmsub_round_sd
 | 
					  // CHECK-LABEL: @test_mm_mask3_fnmsub_round_sd
 | 
				
			||||||
  // CHECK: @llvm.x86.avx512.mask3.vfmadd.sd
 | 
					  // CHECK: @llvm.x86.avx512.mask3.vfnmsub.sd
 | 
				
			||||||
  return _mm_mask3_fnmsub_round_sd(__W, __X, __Y, __U, _MM_FROUND_CUR_DIRECTION);
 | 
					  return _mm_mask3_fnmsub_round_sd(__W, __X, __Y, __U, _MM_FROUND_CUR_DIRECTION);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue