[X86] Use zero vector to extend to 512-bits for strict_fp_to_uint v2i1->v2f64 on targets with AVX512F, but not AVX512VL.
In the worst case, this requires a 128-bit move instruction to implicitly zero the upper bits. In the common case, we should recognize the producing instruction already zeroed the upper bits.
This commit is contained in:
parent
4af5b23db3
commit
c5b4a2386b
|
@ -19776,9 +19776,13 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
|
||||||
Opc = IsSigned ? ISD::STRICT_FP_TO_SINT : ISD::STRICT_FP_TO_UINT;
|
Opc = IsSigned ? ISD::STRICT_FP_TO_SINT : ISD::STRICT_FP_TO_UINT;
|
||||||
else
|
else
|
||||||
Opc = IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT;
|
Opc = IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT;
|
||||||
Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8f64,
|
// Need to concat with zero vector for strict fp to avoid spurious
|
||||||
DAG.getUNDEF(MVT::v8f64),
|
// exceptions.
|
||||||
Src, DAG.getIntPtrConstant(0, dl));
|
// TODO: Should we just do this for non-strict as well?
|
||||||
|
SDValue Tmp = IsStrict ? DAG.getConstantFP(0.0, dl, MVT::v8f64)
|
||||||
|
: DAG.getUNDEF(MVT::v8f64);
|
||||||
|
Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8f64, Tmp, Src,
|
||||||
|
DAG.getIntPtrConstant(0, dl));
|
||||||
}
|
}
|
||||||
SDValue Res, Chain;
|
SDValue Res, Chain;
|
||||||
if (IsStrict) {
|
if (IsStrict) {
|
||||||
|
|
|
@ -1754,10 +1754,9 @@ define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 {
|
||||||
; AVX512VL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
|
; AVX512VL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
|
||||||
; AVX512VL-NEXT: ret{{[l|q]}}
|
; AVX512VL-NEXT: ret{{[l|q]}}
|
||||||
;
|
;
|
||||||
; FIXME: This is an unsafe behavior for strict FP
|
|
||||||
; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i1:
|
; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i1:
|
||||||
; AVX512DQ: # %bb.0:
|
; AVX512DQ: # %bb.0:
|
||||||
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0
|
||||||
; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0
|
; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0
|
||||||
; AVX512DQ-NEXT: vpslld $31, %ymm0, %ymm0
|
; AVX512DQ-NEXT: vpslld $31, %ymm0, %ymm0
|
||||||
; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
|
; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
|
||||||
|
|
Loading…
Reference in New Issue