diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index dac3d490fedf..ea1e4e25fc78 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -8092,34 +8092,38 @@ multiclass avx512_sqrt_packed opc, string OpcodeStr, } multiclass avx512_sqrt_packed_all opc, string OpcodeStr, - X86SchedWriteWidths sched> { - defm PSZ : avx512_sqrt_packed, + X86SchedWriteSizes sched> { + defm PSZ : avx512_sqrt_packed, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; - defm PDZ : avx512_sqrt_packed, + defm PDZ : avx512_sqrt_packed, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; // Define only if AVX512VL feature is present. let Predicates = [HasVLX] in { defm PSZ128 : avx512_sqrt_packed, + sched.PS.XMM, v4f32x_info>, EVEX_V128, PS, EVEX_CD8<32, CD8VF>; defm PSZ256 : avx512_sqrt_packed, + sched.PS.YMM, v8f32x_info>, EVEX_V256, PS, EVEX_CD8<32, CD8VF>; defm PDZ128 : avx512_sqrt_packed, + sched.PD.XMM, v2f64x_info>, EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>; defm PDZ256 : avx512_sqrt_packed, + sched.PD.YMM, v4f64x_info>, EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>; } } multiclass avx512_sqrt_packed_all_round opc, string OpcodeStr, - X86SchedWriteWidths sched> { - defm PSZ : avx512_sqrt_packed_round, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; - defm PDZ : avx512_sqrt_packed_round, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; + X86SchedWriteSizes sched> { + defm PSZ : avx512_sqrt_packed_round, + EVEX_V512, PS, EVEX_CD8<32, CD8VF>; + defm PDZ : avx512_sqrt_packed_round, + EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; } multiclass avx512_sqrt_scalar opc, string OpcodeStr, X86FoldableSchedWrite sched, @@ -8182,20 +8186,20 @@ multiclass avx512_sqrt_scalar opc, string OpcodeStr, X86FoldableSchedWri } multiclass avx512_sqrt_scalar_all opc, string OpcodeStr, - X86SchedWriteWidths sched> { - defm SSZ : avx512_sqrt_scalar { + defm SSZ : avx512_sqrt_scalar, EVEX_CD8<32, CD8VT1>, EVEX_4V, XS, NotMemoryFoldable; - defm SDZ : avx512_sqrt_scalar, EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W, NotMemoryFoldable; } -defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrt>, - avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrt>; +defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, + avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>; -defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrt>, VEX_LIG; +defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG; multiclass avx512_rndscale_scalar opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _> { diff --git a/llvm/lib/Target/X86/X86InstrFPStack.td b/llvm/lib/Target/X86/X86InstrFPStack.td index 57493408fef1..aedd445b714b 100644 --- a/llvm/lib/Target/X86/X86InstrFPStack.td +++ b/llvm/lib/Target/X86/X86InstrFPStack.td @@ -314,7 +314,7 @@ defm CHS : FPUnary; defm ABS : FPUnary; } -let SchedRW = [WriteFSqrt] in +let SchedRW = [WriteFSqrt80] in defm SQRT: FPUnary; let SchedRW = [WriteMicrocoded] in { diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 10b8cac81f84..1ad7d4cdc816 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -2935,8 +2935,8 @@ multiclass sse2_fp_unop_s opc, string OpcodeStr, SDNode OpNode, // Square root. defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SchedWriteFSqrt, UseAVX>, sse1_fp_unop_p<0x51, "sqrt", fsqrt, SchedWriteFSqrt, [HasAVX, NoVLX]>, - sse2_fp_unop_s<0x51, "sqrt", fsqrt, SchedWriteFSqrt, UseAVX>, - sse2_fp_unop_p<0x51, "sqrt", fsqrt, SchedWriteFSqrt>; + sse2_fp_unop_s<0x51, "sqrt", fsqrt, SchedWriteFSqrt64, UseAVX>, + sse2_fp_unop_p<0x51, "sqrt", fsqrt, SchedWriteFSqrt64>; // Reciprocal approximations. Note that these typically require refinement // in order to obtain suitable precision. diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index edd81bed65fe..93adb100b76e 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -164,12 +164,27 @@ defm : BWWriteResPair; // Floating point defm : BWWriteResPair; // Floating point multiplication (YMM/ZMM). defm : BWWriteResPair; // 10-14 cycles. // Floating point division. defm : BWWriteResPair; // 10-14 cycles. // Floating point division (YMM/ZMM). -defm : BWWriteResPair; // Floating point square root. -defm : BWWriteResPair; // Floating point square root (YMM/ZMM). + +defm : X86WriteRes; // Floating point square root. +defm : X86WriteRes; +defm : BWWriteResPair; // Floating point square root (XMM). +defm : BWWriteResPair; // Floating point square root (YMM). +defm : BWWriteResPair; // Floating point square root (ZMM). +defm : X86WriteRes; // Floating point double square root. +defm : X86WriteRes; +defm : BWWriteResPair; // Floating point double square root (XMM). +defm : BWWriteResPair; // Floating point double square root (YMM). +defm : BWWriteResPair; // Floating point double square root (ZMM). +defm : BWWriteResPair; // Floating point long double square root. + defm : BWWriteResPair; // Floating point reciprocal estimate. -defm : BWWriteResPair; // Floating point reciprocal estimate (YMM/ZMM). +defm : BWWriteResPair; // Floating point reciprocal estimate (XMM). +defm : BWWriteResPair; // Floating point reciprocal estimate (YMM/ZMM). + defm : BWWriteResPair; // Floating point reciprocal square root estimate. -defm : BWWriteResPair; // Floating point reciprocal square root estimate (YMM/ZMM). +defm : BWWriteResPair; // Floating point reciprocal square root estimate (XMM). +defm : BWWriteResPair; // Floating point reciprocal square root estimate (YMM/ZMM). + defm : BWWriteResPair; // Fused Multiply Add. defm : BWWriteResPair; // Fused Multiply Add (XMM). defm : BWWriteResPair; // Fused Multiply Add (YMM/ZMM). @@ -1401,14 +1416,6 @@ def BWWriteResGroup123 : SchedWriteRes<[BWPort0,BWPort23]> { def: InstRW<[BWWriteResGroup123], (instregex "MUL_F(32|64)m", "VPCMPGTQYrm")>; -def BWWriteResGroup126 : SchedWriteRes<[BWPort0,BWPort015]> { - let Latency = 11; - let NumMicroOps = 3; - let ResourceCycles = [2,1]; -} -def: InstRW<[BWWriteResGroup126], (instregex "VRCPPSYr", - "VRSQRTPSYr")>; - def BWWriteResGroup128 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> { let Latency = 11; let NumMicroOps = 3; @@ -1454,20 +1461,6 @@ def BWWriteResGroup135 : SchedWriteRes<[BWPort1,BWPort23]> { } def: InstRW<[BWWriteResGroup135], (instregex "(ADD|SUB|SUBR)_FI(16|32)m")>; -def BWWriteResGroup137 : SchedWriteRes<[BWPort0,BWFPDivider]> { - let Latency = 11; - let NumMicroOps = 1; - let ResourceCycles = [1,7]; -} -def: InstRW<[BWWriteResGroup137], (instregex "(V?)SQRTPSr")>; - -def BWWriteResGroup137_1 : SchedWriteRes<[BWPort0,BWFPDivider]> { - let Latency = 11; - let NumMicroOps = 1; - let ResourceCycles = [1,4]; -} -def: InstRW<[BWWriteResGroup137_1], (instregex "(V?)SQRTSSr")>; - def BWWriteResGroup139 : SchedWriteRes<[BWPort0,BWFPDivider]> { let Latency = 14; let NumMicroOps = 1; @@ -1555,22 +1548,6 @@ def BWWriteResGroup155 : SchedWriteRes<[BWPort0,BWPort015,BWFPDivider]> { } def: InstRW<[BWWriteResGroup155], (instregex "VDIVPSYrr")>; -def BWWriteResGroup156 : SchedWriteRes<[BWPort0,BWPort23,BWPort015]> { - let Latency = 17; - let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; -} -def: InstRW<[BWWriteResGroup156], (instregex "VRCPPSYm", - "VRSQRTPSYm")>; - -def BWWriteResGroup157 : SchedWriteRes<[BWPort0,BWPort23,BWFPDivider]> { - let Latency = 16; - let NumMicroOps = 2; - let ResourceCycles = [1,1,7]; -} -def: InstRW<[BWWriteResGroup157], (instregex "(V?)SQRTPSm", - "(V?)SQRTSSm")>; - def BWWriteResGroup159 : SchedWriteRes<[BWPort5,BWPort6,BWPort06,BWPort0156]> { let Latency = 18; let NumMicroOps = 8; @@ -1610,20 +1587,6 @@ def BWWriteResGroup167 : SchedWriteRes<[BWPort4,BWPort5,BWPort6,BWPort23,BWPort2 } def: InstRW<[BWWriteResGroup167], (instrs INSB, INSL, INSW)>; -def BWWriteResGroup168 : SchedWriteRes<[BWPort0,BWFPDivider]> { - let Latency = 16; - let NumMicroOps = 1; - let ResourceCycles = [1,14]; -} -def: InstRW<[BWWriteResGroup168], (instregex "(V?)SQRTPDr")>; - -def BWWriteResGroup168_1 : SchedWriteRes<[BWPort0,BWFPDivider]> { - let Latency = 16; - let NumMicroOps = 1; - let ResourceCycles = [1,8]; -} -def: InstRW<[BWWriteResGroup168_1], (instregex "(V?)SQRTSDr")>; - def BWWriteResGroup169 : SchedWriteRes<[BWPort0,BWPort23]> { let Latency = 21; let NumMicroOps = 2; @@ -1631,13 +1594,6 @@ def BWWriteResGroup169 : SchedWriteRes<[BWPort0,BWPort23]> { } def: InstRW<[BWWriteResGroup169], (instregex "DIV_F(32|64)m")>; -def BWWriteResGroup170 : SchedWriteRes<[BWPort0,BWPort015,BWFPDivider]> { - let Latency = 21; - let NumMicroOps = 3; - let ResourceCycles = [2,1,14]; -} -def: InstRW<[BWWriteResGroup170], (instregex "VSQRTPSYr")>; - def BWWriteResGroup171 : SchedWriteRes<[BWPort0,BWPort4,BWPort5,BWPort23,BWPort237,BWPort06,BWPort0156]> { let Latency = 21; let NumMicroOps = 19; @@ -1680,14 +1636,6 @@ def BWWriteResGroup177 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> { } def: InstRW<[BWWriteResGroup177], (instregex "DIV_FI(16|32)m")>; -def BWWriteResGroup179 : SchedWriteRes<[BWPort0,BWPort23,BWFPDivider]> { - let Latency = 21; - let NumMicroOps = 2; - let ResourceCycles = [1,1,14]; -} -def: InstRW<[BWWriteResGroup179], (instregex "(V?)SQRTPDm", - "(V?)SQRTSDm")>; - def BWWriteResGroup180 : SchedWriteRes<[BWPort0,BWPort23]> { let Latency = 26; let NumMicroOps = 2; @@ -1695,13 +1643,6 @@ def BWWriteResGroup180 : SchedWriteRes<[BWPort0,BWPort23]> { } def: InstRW<[BWWriteResGroup180], (instregex "DIVR_F(32|64)m")>; -def BWWriteResGroup181 : SchedWriteRes<[BWPort0,BWPort23,BWPort015,BWFPDivider]> { - let Latency = 27; - let NumMicroOps = 4; - let ResourceCycles = [2,1,1,14]; -} -def: InstRW<[BWWriteResGroup181], (instregex "VSQRTPSYm")>; - def BWWriteResGroup182 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> { let Latency = 29; let NumMicroOps = 3; @@ -1780,13 +1721,6 @@ def BWWriteResGroup186 : SchedWriteRes<[BWPort4,BWPort6,BWPort23,BWPort237,BWPor } def: InstRW<[BWWriteResGroup186], (instregex "^XSAVE$", "XSAVEC", "XSAVES", "XSAVEOPT")>; -def BWWriteResGroup189 : SchedWriteRes<[BWPort0,BWPort015,BWFPDivider]> { - let Latency = 29; - let NumMicroOps = 3; - let ResourceCycles = [2,1,28]; -} -def: InstRW<[BWWriteResGroup189], (instregex "VSQRTPDYr")>; - def BWWriteResGroup190 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156]> { let Latency = 34; let NumMicroOps = 8; @@ -1817,13 +1751,6 @@ def BWWriteResGroup194 : SchedWriteRes<[BWPort5,BWPort6,BWPort23,BWPort237,BWPor def: InstRW<[BWWriteResGroup194], (instregex "OUT(8|16|32)ir", "OUT(8|16|32)rr")>; -def BWWriteResGroup195 : SchedWriteRes<[BWPort0,BWPort23,BWPort015,BWFPDivider]> { - let Latency = 35; - let NumMicroOps = 4; - let ResourceCycles = [2,1,1,28]; -} -def: InstRW<[BWWriteResGroup195], (instregex "VSQRTPDYm")>; - def BWWriteResGroup196 : SchedWriteRes<[BWPort5,BWPort0156]> { let Latency = 42; let NumMicroOps = 22; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index 79a9e7a847e0..2e2535eda309 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -158,12 +158,25 @@ defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; // 10-14 cycles. defm : HWWriteResPair; // 10-14 cycles. + defm : HWWriteResPair; -defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; + defm : HWWriteResPair; -defm : HWWriteResPair; -defm : HWWriteResPair; -defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; + +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; + defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; @@ -1639,13 +1652,6 @@ def: InstRW<[HWWriteResGroup89], (instregex "(V?)PCMPGTQ(Y?)rr", "MUL_FST0r", "MUL_FrST0")>; -def HWWriteResGroup91_1 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> { - let Latency = 16; - let NumMicroOps = 2; - let ResourceCycles = [1,1,7]; -} -def: InstRW<[HWWriteResGroup91_1], (instregex "(V?)SQRTSSm")>; - def HWWriteResGroup91_4 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> { let Latency = 18; let NumMicroOps = 2; @@ -1658,9 +1664,7 @@ def HWWriteResGroup91_2 : SchedWriteRes<[HWPort0,HWPort23]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[HWWriteResGroup91_2], (instregex "(V?)PCMPGTQrm", - "(V?)RCPPSm", - "(V?)RSQRTPSm")>; +def: InstRW<[HWWriteResGroup91_2], (instregex "(V?)PCMPGTQrm")>; def HWWriteResGroup91_3 : SchedWriteRes<[HWPort0,HWPort23]> { let Latency = 12; @@ -1832,22 +1836,6 @@ def HWWriteResGroup121 : SchedWriteRes<[HWPort0,HWFPDivider]> { def: InstRW<[HWWriteResGroup121], (instregex "(V?)DIVPSrr", "(V?)DIVSSrr")>; -def HWWriteResGroup125 : SchedWriteRes<[HWPort0,HWPort015]> { - let Latency = 11; - let NumMicroOps = 3; - let ResourceCycles = [2,1]; -} -def: InstRW<[HWWriteResGroup125], (instregex "VRCPPSYr", - "VRSQRTPSYr")>; - -def HWWriteResGroup128 : SchedWriteRes<[HWPort0,HWPort23,HWPort015]> { - let Latency = 18; - let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; -} -def: InstRW<[HWWriteResGroup128], (instregex "VRCPPSYm", - "VRSQRTPSYm")>; - def HWWriteResGroup129 : SchedWriteRes<[HWPort1,HWPort06,HWPort0156]> { let Latency = 11; let NumMicroOps = 7; @@ -1877,14 +1865,6 @@ def HWWriteResGroup132 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort06,HWPo } def: InstRW<[HWWriteResGroup132], (instregex "CMPXCHG8B")>; -def HWWriteResGroup133 : SchedWriteRes<[HWPort0,HWFPDivider]> { - let Latency = 11; - let NumMicroOps = 1; - let ResourceCycles = [1,7]; -} -def: InstRW<[HWWriteResGroup133], (instregex "(V?)SQRTPSr", - "(V?)SQRTSSr")>; - def HWWriteResGroup134 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> { let Latency = 19; let NumMicroOps = 2; @@ -1899,13 +1879,6 @@ def HWWriteResGroup135 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort06,HWPo } def: InstRW<[HWWriteResGroup135], (instregex "RCR(8|16|32|64)mCL")>; -def HWWriteResGroup138 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> { - let Latency = 17; - let NumMicroOps = 2; - let ResourceCycles = [1,1,7]; -} -def: InstRW<[HWWriteResGroup138], (instregex "(V?)SQRTPSm")>; - def HWWriteResGroup142 : SchedWriteRes<[HWPort1,HWPort06,HWPort15,HWPort0156]> { let Latency = 14; let NumMicroOps = 10; @@ -1994,20 +1967,6 @@ def HWWriteResGroup155_1 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> { } def: InstRW<[HWWriteResGroup155_1], (instregex "(V?)DIVPDrm")>; -def HWWriteResGroup155_2 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> { - let Latency = 21; - let NumMicroOps = 2; - let ResourceCycles = [1,1,14]; -} -def: InstRW<[HWWriteResGroup155_2], (instregex "(V?)SQRTSDm")>; - -def HWWriteResGroup155_3 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> { - let Latency = 22; - let NumMicroOps = 2; - let ResourceCycles = [1,1,14]; -} -def: InstRW<[HWWriteResGroup155_3], (instregex "(V?)SQRTPDm")>; - def HWWriteResGroup155_4 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> { let Latency = 25; let NumMicroOps = 2; @@ -2022,29 +1981,19 @@ def HWWriteResGroup156 : SchedWriteRes<[HWPort5,HWPort6,HWPort0156]> { } def: InstRW<[HWWriteResGroup156], (instregex "MWAITrr")>; -def HWWriteResGroup157 : SchedWriteRes<[HWPort0,HWFPDivider]> { - let Latency = 16; - let NumMicroOps = 1; - let ResourceCycles = [1,14]; -} -def: InstRW<[HWWriteResGroup157], (instregex "(V?)SQRTPDr", - "(V?)SQRTSDr")>; - def HWWriteResGroup159 : SchedWriteRes<[HWPort0,HWPort15,HWFPDivider]> { let Latency = 21; let NumMicroOps = 3; let ResourceCycles = [2,1,14]; } -def: InstRW<[HWWriteResGroup159], (instregex "VDIVPSYrr", - "VSQRTPSYr")>; +def: InstRW<[HWWriteResGroup159], (instregex "VDIVPSYrr")>; def HWWriteResGroup160 : SchedWriteRes<[HWPort0,HWPort23,HWPort15,HWFPDivider]> { let Latency = 28; let NumMicroOps = 4; let ResourceCycles = [2,1,1,14]; } -def: InstRW<[HWWriteResGroup160], (instregex "VDIVPSYrm", - "VSQRTPSYm")>; +def: InstRW<[HWWriteResGroup160], (instregex "VDIVPSYrm")>; def HWWriteResGroup161 : SchedWriteRes<[HWPort0,HWPort1,HWPort23]> { let Latency = 30; @@ -2111,16 +2060,14 @@ def HWWriteResGroup173 : SchedWriteRes<[HWPort0,HWPort15,HWFPDivider]> { let NumMicroOps = 3; let ResourceCycles = [2,1,28]; } -def: InstRW<[HWWriteResGroup173], (instregex "VDIVPDYrr", - "VSQRTPDYr")>; +def: InstRW<[HWWriteResGroup173], (instregex "VDIVPDYrr")>; def HWWriteResGroup174 : SchedWriteRes<[HWPort0,HWPort23,HWPort15,HWFPDivider]> { let Latency = 42; let NumMicroOps = 4; let ResourceCycles = [2,1,1,28]; } -def: InstRW<[HWWriteResGroup174], (instregex "VDIVPDYrm", - "VSQRTPDYm")>; +def: InstRW<[HWWriteResGroup174], (instregex "VDIVPDYrm")>; def HWWriteResGroup175 : SchedWriteRes<[HWPort1,HWPort4,HWPort5,HWPort6,HWPort23,HWPort237,HWPort15,HWPort0156]> { let Latency = 41; diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index a39e5b2bf28e..c3ef44b4ee85 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -148,12 +148,25 @@ defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; + defm : SBWriteResPair; -defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; + defm : SBWriteResPair; -defm : SBWriteResPair; -defm : SBWriteResPair; -defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; + +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; + defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; @@ -951,14 +964,6 @@ def: InstRW<[SBWriteResGroup59a], (instregex "MMX_PADD(B|D|W)irm", "MMX_P(MAX|MIN)(SW|UB)irm", "MMX_PSUB(B|D|Q|W)irm")>; -def SBWriteResGroup61 : SchedWriteRes<[SBPort0,SBPort05]> { - let Latency = 7; - let NumMicroOps = 3; - let ResourceCycles = [2,1]; -} -def: InstRW<[SBWriteResGroup61], (instregex "VRCPPSYr", - "VRSQRTPSYr")>; - def SBWriteResGroup62 : SchedWriteRes<[SBPort5,SBPort23]> { let Latency = 7; let NumMicroOps = 3; @@ -1361,18 +1366,8 @@ def SBWriteResGroup116 : SchedWriteRes<[SBPort0,SBFPDivider]> { let NumMicroOps = 1; let ResourceCycles = [1,14]; } -def: InstRW<[SBWriteResGroup116], (instregex "(V?)SQRTSSr", - "(V?)DIVPSrr", - "(V?)DIVSSrr", - "(V?)SQRTPSr")>; - -def SBWriteResGroup118 : SchedWriteRes<[SBPort0,SBPort23,SBPort05]> { - let Latency = 14; - let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; -} -def: InstRW<[SBWriteResGroup118], (instregex "VRCPPSYm", - "VRSQRTPSYm")>; +def: InstRW<[SBWriteResGroup116], (instregex "(V?)DIVPSrr", + "(V?)DIVSSrr")>; def SBWriteResGroup119 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { let Latency = 15; @@ -1386,26 +1381,8 @@ def SBWriteResGroup123 : SchedWriteRes<[SBPort0,SBPort23,SBFPDivider]> { let NumMicroOps = 2; let ResourceCycles = [1,1,14]; } -def: InstRW<[SBWriteResGroup123], (instregex "(V?)SQRTSSm", - "(V?)DIVPSrm", - "(V?)DIVSSrm", - "(V?)SQRTPSm")>; - -def SBWriteResGroup124 : SchedWriteRes<[SBPort0,SBFPDivider]> { - let Latency = 21; - let NumMicroOps = 1; - let ResourceCycles = [1,21]; -} -def: InstRW<[SBWriteResGroup124], (instregex "(V?)SQRTPDr", - "(V?)SQRTSDr")>; - -def SBWriteResGroup125 : SchedWriteRes<[SBPort0,SBPort23,SBFPDivider]> { - let Latency = 27; - let NumMicroOps = 2; - let ResourceCycles = [1,1,21]; -} -def: InstRW<[SBWriteResGroup125], (instregex "(V?)SQRTPDm", - "(V?)SQRTSDm")>; +def: InstRW<[SBWriteResGroup123], (instregex "(V?)DIVPSrm", + "(V?)DIVSSrm")>; def SBWriteResGroup126 : SchedWriteRes<[SBPort0,SBFPDivider]> { let Latency = 22; @@ -1428,8 +1405,7 @@ def SBWriteResGroup129 : SchedWriteRes<[SBPort0,SBPort05,SBFPDivider]> { let NumMicroOps = 3; let ResourceCycles = [2,1,28]; } -def: InstRW<[SBWriteResGroup129], (instregex "VDIVPSYrr", - "VSQRTPSYr")>; +def: InstRW<[SBWriteResGroup129], (instregex "VDIVPSYrr")>; def SBWriteResGroup130 : SchedWriteRes<[SBPort0,SBPort23]> { let Latency = 31; @@ -1450,23 +1426,20 @@ def SBWriteResGroup132 : SchedWriteRes<[SBPort0,SBPort23,SBPort05,SBFPDivider]> let NumMicroOps = 4; let ResourceCycles = [2,1,1,28]; } -def: InstRW<[SBWriteResGroup132], (instregex "VDIVPSYrm", - "VSQRTPSYm")>; +def: InstRW<[SBWriteResGroup132], (instregex "VDIVPSYrm")>; def SBWriteResGroup133 : SchedWriteRes<[SBPort0,SBPort05,SBFPDivider]> { let Latency = 45; let NumMicroOps = 3; let ResourceCycles = [2,1,44]; } -def: InstRW<[SBWriteResGroup133], (instregex "VDIVPDYrr", - "VSQRTPDYr")>; +def: InstRW<[SBWriteResGroup133], (instregex "VDIVPDYrr")>; def SBWriteResGroup134 : SchedWriteRes<[SBPort0,SBPort23,SBPort05,SBFPDivider]> { let Latency = 52; let NumMicroOps = 4; let ResourceCycles = [2,1,1,44]; } -def: InstRW<[SBWriteResGroup134], (instregex "VDIVPDYrm", - "VSQRTPDYm")>; +def: InstRW<[SBWriteResGroup134], (instregex "VDIVPDYrm")>; } // SchedModel diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index 8cb6b14239da..077eeed90d4b 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -161,12 +161,25 @@ defm : SKLWriteResPair; // Floating poin defm : SKLWriteResPair; // Floating point multiplication (YMM/ZMM). defm : SKLWriteResPair; // 10-14 cycles. // Floating point division. defm : SKLWriteResPair; // 10-14 cycles. // Floating point division (YMM/ZMM). -defm : SKLWriteResPair; // Floating point square root. -defm : SKLWriteResPair; // Floating point square root (YMM/ZMM). + +defm : SKLWriteResPair; // Floating point square root. +defm : SKLWriteResPair; // Floating point square root (XMM). +defm : SKLWriteResPair; // Floating point square root (YMM). +defm : SKLWriteResPair; // Floating point square root (ZMM). +defm : SKLWriteResPair; // Floating point double square root. +defm : SKLWriteResPair; // Floating point double square root (XMM). +defm : SKLWriteResPair; // Floating point double square root (YMM). +defm : SKLWriteResPair; // Floating point double square root (ZMM). +defm : SKLWriteResPair; // Floating point long double square root. + defm : SKLWriteResPair; // Floating point reciprocal estimate. -defm : SKLWriteResPair; // Floating point reciprocal estimate (YMM/ZMM). +defm : SKLWriteResPair; // Floating point reciprocal estimate (XMM). +defm : SKLWriteResPair; // Floating point reciprocal estimate (YMM/ZMM). + defm : SKLWriteResPair; // Floating point reciprocal square root estimate. -defm : SKLWriteResPair; // Floating point reciprocal square root estimate (YMM/ZMM). +defm : SKLWriteResPair; // Floating point reciprocal square root estimate (XMM). +defm : SKLWriteResPair; // Floating point reciprocal square root estimate (YMM/ZMM). + defm : SKLWriteResPair; // Fused Multiply Add. defm : SKLWriteResPair; // Fused Multiply Add (XMM). defm : SKLWriteResPair; // Fused Multiply Add (YMM/ZMM). @@ -1531,14 +1544,6 @@ def SKLWriteResGroup131 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort23,SKLPort0156 def: InstRW<[SKLWriteResGroup131], (instregex "LAR(16|32|64)rm", "LSL(16|32|64)rm")>; -def SKLWriteResGroup132 : SchedWriteRes<[SKLPort0,SKLPort23]> { - let Latency = 10; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup132], (instregex "(V?)RCPPSm", - "(V?)RSQRTPSm")>; - def SKLWriteResGroup133 : SchedWriteRes<[SKLPort5,SKLPort23]> { let Latency = 10; let NumMicroOps = 2; @@ -1621,9 +1626,7 @@ def SKLWriteResGroup146 : SchedWriteRes<[SKLPort0,SKLPort23]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup146], (instregex "MUL_F(32|64)m", - "VRCPPSYm", - "VRSQRTPSYm")>; +def: InstRW<[SKLWriteResGroup146], (instregex "MUL_F(32|64)m")>; def SKLWriteResGroup147 : SchedWriteRes<[SKLPort01,SKLPort23]> { let Latency = 11; @@ -1707,21 +1710,6 @@ def SKLWriteResGroup156 : SchedWriteRes<[SKLPort06,SKLPort0156]> { } def: InstRW<[SKLWriteResGroup156], (instrs LOOPE, LOOPNE)>; -def SKLWriteResGroup157 : SchedWriteRes<[SKLPort0,SKLFPDivider]> { - let Latency = 12; - let NumMicroOps = 1; - let ResourceCycles = [1,3]; -} -def: InstRW<[SKLWriteResGroup157], (instregex "(V?)SQRTPSr", - "(V?)SQRTSSr")>; - -def SKLWriteResGroup158 : SchedWriteRes<[SKLPort0,SKLFPDivider]> { - let Latency = 12; - let NumMicroOps = 1; - let ResourceCycles = [1,6]; -} -def: InstRW<[SKLWriteResGroup158], (instregex "VSQRTPSYr")>; - def SKLWriteResGroup160 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23,SKLPort01]> { let Latency = 12; let NumMicroOps = 4; @@ -1816,13 +1804,6 @@ def SKLWriteResGroup179 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> { } def: InstRW<[SKLWriteResGroup179], (instregex "(V?)DIVPSrm")>; -def SKLWriteResGroup179_1 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> { - let Latency = 17; - let NumMicroOps = 2; - let ResourceCycles = [1,1,3]; -} -def: InstRW<[SKLWriteResGroup179_1], (instregex "(V?)SQRTSSm")>; - def SKLWriteResGroup180 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort05,SKLPort0156]> { let Latency = 17; let NumMicroOps = 15; @@ -1830,21 +1811,6 @@ def SKLWriteResGroup180 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKL } def: InstRW<[SKLWriteResGroup180], (instrs XCH_F)>; -def SKLWriteResGroup181 : SchedWriteRes<[SKLPort0,SKLFPDivider]> { - let Latency = 18; - let NumMicroOps = 1; - let ResourceCycles = [1,6]; -} -def: InstRW<[SKLWriteResGroup181], (instregex "(V?)SQRTPDr", - "(V?)SQRTSDr")>; - -def SKLWriteResGroup181_1 : SchedWriteRes<[SKLPort0,SKLFPDivider]> { - let Latency = 18; - let NumMicroOps = 1; - let ResourceCycles = [1,12]; -} -def: InstRW<[SKLWriteResGroup181_1], (instregex "VSQRTPDYr")>; - def SKLWriteResGroup182 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> { let Latency = 18; let NumMicroOps = 2; @@ -1852,13 +1818,6 @@ def SKLWriteResGroup182 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> { } def: InstRW<[SKLWriteResGroup182], (instregex "VDIVPSYrm")>; -def SKLWriteResGroup183 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> { - let Latency = 18; - let NumMicroOps = 2; - let ResourceCycles = [1,1,3]; -} -def: InstRW<[SKLWriteResGroup183], (instregex "(V?)SQRTPSm")>; - def SKLWriteResGroup184 : SchedWriteRes<[SKLPort5,SKLPort6,SKLPort06,SKLPort0156]> { let Latency = 18; let NumMicroOps = 8; @@ -1880,13 +1839,6 @@ def SKLWriteResGroup186 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> { } def: InstRW<[SKLWriteResGroup186], (instregex "(V?)DIVSDrm")>; -def SKLWriteResGroup186_1 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> { - let Latency = 19; - let NumMicroOps = 2; - let ResourceCycles = [1,1,6]; -} -def: InstRW<[SKLWriteResGroup186_1], (instregex "VSQRTPSYm")>; - def SKLWriteResGroup189 : SchedWriteRes<[SKLPort0]> { let Latency = 20; let NumMicroOps = 1; @@ -1959,13 +1911,6 @@ def: InstRW<[SKLWriteResGroup196_2], (instrs VGATHERDPSYrm, VPGATHERQQYrm, VGATHERDPDYrm)>; -def SKLWriteResGroup197 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> { - let Latency = 23; - let NumMicroOps = 2; - let ResourceCycles = [1,1,6]; -} -def: InstRW<[SKLWriteResGroup197], (instregex "(V?)SQRTSDm")>; - def SKLWriteResGroup198 : SchedWriteRes<[SKLPort0,SKLPort4,SKLPort5,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { let Latency = 23; let NumMicroOps = 19; @@ -1973,20 +1918,6 @@ def SKLWriteResGroup198 : SchedWriteRes<[SKLPort0,SKLPort4,SKLPort5,SKLPort23,SK } def: InstRW<[SKLWriteResGroup198], (instregex "CMPXCHG16B")>; -def SKLWriteResGroup199 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> { - let Latency = 24; - let NumMicroOps = 2; - let ResourceCycles = [1,1,6]; -} -def: InstRW<[SKLWriteResGroup199], (instregex "(V?)SQRTPDm")>; - -def SKLWriteResGroup201 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> { - let Latency = 25; - let NumMicroOps = 2; - let ResourceCycles = [1,1,12]; -} -def: InstRW<[SKLWriteResGroup201], (instregex "VSQRTPDYm")>; - def SKLWriteResGroup202 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { let Latency = 25; let NumMicroOps = 3; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index c945fae34d5c..bd58687884ec 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -161,12 +161,25 @@ defm : SKXWriteResPair; // Floating poin defm : SKXWriteResPair; // Floating point multiplication (YMM/ZMM). defm : SKXWriteResPair; // 10-14 cycles. // Floating point division. defm : SKXWriteResPair; // 10-14 cycles. // Floating point division (YMM/ZMM). -defm : SKXWriteResPair; // Floating point square root. -defm : SKXWriteResPair; // Floating point square root (YMM/ZMM). -defm : SKXWriteResPair; // Floating point reciprocal estimate. + +defm : SKXWriteResPair; // Floating point square root. +defm : SKXWriteResPair; // Floating point square root (XMM). +defm : SKXWriteResPair; // Floating point square root (YMM). +defm : SKXWriteResPair; // Floating point square root (ZMM). +defm : SKXWriteResPair; // Floating point double square root. +defm : SKXWriteResPair; // Floating point double square root (XMM). +defm : SKXWriteResPair; // Floating point double square root (YMM). +defm : SKXWriteResPair; // Floating point double square root (ZMM). +defm : SKXWriteResPair; // Floating point long double square root. + +defm : SKXWriteResPair; // Floating point reciprocal estimate. +defm : SKXWriteResPair; // Floating point reciprocal estimate (XMM). defm : SKXWriteResPair; // Floating point reciprocal estimate (YMM/ZMM). -defm : SKXWriteResPair; // Floating point reciprocal square root estimate. + +defm : SKXWriteResPair; // Floating point reciprocal square root estimate. +defm : SKXWriteResPair; // Floating point reciprocal square root estimate (XMM). defm : SKXWriteResPair; // Floating point reciprocal square root estimate (YMM/ZMM). + defm : SKXWriteResPair; // Fused Multiply Add. defm : SKXWriteResPair; // Fused Multiply Add (XMM). defm : SKXWriteResPair; // Fused Multiply Add (YMM/ZMM). @@ -2388,10 +2401,6 @@ def SKXWriteResGroup135 : SchedWriteRes<[SKXPort0,SKXPort23]> { let ResourceCycles = [1,1]; } def: InstRW<[SKXWriteResGroup135], (instregex "MMX_CVTPI2PSirm", - "RCPSSm", - "RSQRTSSm", - "VRCPSSm", - "VRSQRTSSm", "VTESTPDYrm", "VTESTPSYrm")>; @@ -2877,21 +2886,6 @@ def SKXWriteResGroup171 : SchedWriteRes<[SKXPort06,SKXPort0156]> { } def: InstRW<[SKXWriteResGroup171], (instrs LOOPE, LOOPNE)>; -def SKXWriteResGroup172 : SchedWriteRes<[SKXPort0,SKXFPDivider]> { - let Latency = 12; - let NumMicroOps = 1; - let ResourceCycles = [1,3]; -} -def: InstRW<[SKXWriteResGroup172], (instregex "(V?)SQRTPS(Z128)?r", - "(V?)SQRTSS(Z?)r")>; - -def SKXWriteResGroup173 : SchedWriteRes<[SKXPort0,SKXFPDivider]> { - let Latency = 12; - let NumMicroOps = 1; - let ResourceCycles = [1,6]; -} -def: InstRW<[SKXWriteResGroup173], (instregex "VSQRTPS(Y|Z256)r")>; - def SKXWriteResGroup174 : SchedWriteRes<[SKXPort015]> { let Latency = 12; let NumMicroOps = 3; @@ -3072,13 +3066,6 @@ def SKXWriteResGroup201 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> { } def: InstRW<[SKXWriteResGroup201], (instregex "(V?)DIVPS(Z128)?rm")>; -def SKXWriteResGroup201_1 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> { - let Latency = 17; - let NumMicroOps = 2; - let ResourceCycles = [1,1,3]; -} -def: InstRW<[SKXWriteResGroup201_1], (instregex "(V?)SQRTSS(Z?)m")>; - def SKXWriteResGroup202 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156]> { let Latency = 17; let NumMicroOps = 15; @@ -3086,21 +3073,6 @@ def SKXWriteResGroup202 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKX } def: InstRW<[SKXWriteResGroup202], (instrs XCH_F)>; -def SKXWriteResGroup203 : SchedWriteRes<[SKXPort0,SKXFPDivider]> { - let Latency = 18; - let NumMicroOps = 1; - let ResourceCycles = [1,6]; -} -def: InstRW<[SKXWriteResGroup203], (instregex "(V?)SQRTPD(Z128)?r", - "(V?)SQRTSD(Z?)r")>; - -def SKXWriteResGroup203_1 : SchedWriteRes<[SKXPort0,SKXFPDivider]> { - let Latency = 18; - let NumMicroOps = 1; - let ResourceCycles = [1,12]; -} -def: InstRW<[SKXWriteResGroup203_1], (instregex "VSQRTPD(Y|Z256)r")>; - def SKXWriteResGroup204 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> { let Latency = 18; let NumMicroOps = 2; @@ -3108,13 +3080,6 @@ def SKXWriteResGroup204 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> { } def: InstRW<[SKXWriteResGroup204], (instregex "VDIVPS(Y|Z256)rm")>; -def SKXWriteResGroup204_1 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> { - let Latency = 18; - let NumMicroOps = 2; - let ResourceCycles = [1,1,3]; -} -def: InstRW<[SKXWriteResGroup204_1], (instregex "(V?)SQRTPS(Z128)?m")>; - def SKXWriteResGroup205 : SchedWriteRes<[SKXPort23,SKXPort015]> { let Latency = 18; let NumMicroOps = 4; @@ -3143,20 +3108,6 @@ def SKXWriteResGroup209 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> { } def: InstRW<[SKXWriteResGroup209], (instregex "(V?)DIVSD(Z?)rm")>; -def SKXWriteResGroup209_1 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> { - let Latency = 19; - let NumMicroOps = 2; - let ResourceCycles = [1,1,6]; -} -def: InstRW<[SKXWriteResGroup209_1], (instregex "VSQRTPS(Y|Z256)m")>; - -def SKXWriteResGroup210 : SchedWriteRes<[SKXPort0,SKXPort5,SKXFPDivider]> { - let Latency = 20; - let NumMicroOps = 3; - let ResourceCycles = [2,1,12]; -} -def: InstRW<[SKXWriteResGroup210], (instregex "VSQRTPSZr")>; - def SKXWriteResGroup211 : SchedWriteRes<[SKXPort23,SKXPort015]> { let Latency = 19; let NumMicroOps = 4; @@ -3287,13 +3238,6 @@ def SKXWriteResGroup225 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort015]> { def: InstRW<[SKXWriteResGroup225], (instregex "VPCONFLICTDZ128rr", "VPCONFLICTQZ256rr")>; -def SKXWriteResGroup226 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> { - let Latency = 23; - let NumMicroOps = 2; - let ResourceCycles = [1,1,6]; -} -def: InstRW<[SKXWriteResGroup226], (instregex "(V?)SQRTSD(Z?)m")>; - def SKXWriteResGroup227 : SchedWriteRes<[SKXPort0,SKXPort5,SKXFPDivider]> { let Latency = 23; let NumMicroOps = 3; @@ -3315,13 +3259,6 @@ def SKXWriteResGroup228 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort23,SK } def: InstRW<[SKXWriteResGroup228], (instregex "CMPXCHG16B")>; -def SKXWriteResGroup229 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> { - let Latency = 24; - let NumMicroOps = 2; - let ResourceCycles = [1,1,6]; -} -def: InstRW<[SKXWriteResGroup229], (instregex "(V?)SQRTPD(Z128)?m")>; - def SKXWriteResGroup230 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort5,SKXFPDivider]> { let Latency = 25; let NumMicroOps = 4; @@ -3329,13 +3266,6 @@ def SKXWriteResGroup230 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort5,SKXFPDivide } def: InstRW<[SKXWriteResGroup230], (instregex "VDIVPSZrm(b?)")>; -def SKXWriteResGroup232 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> { - let Latency = 25; - let NumMicroOps = 2; - let ResourceCycles = [1,1,12]; -} -def: InstRW<[SKXWriteResGroup232], (instregex "VSQRTPD(Y|Z256)m")>; - def SKXWriteResGroup233 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { let Latency = 25; let NumMicroOps = 3; @@ -3354,13 +3284,6 @@ def: InstRW<[SKXWriteResGroup234], (instrs VGATHERDPDZ256rm, VPGATHERQDZrm, VPGATHERQQZ256rm)>; -def SKXWriteResGroup237 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort5,SKXFPDivider]> { - let Latency = 27; - let NumMicroOps = 4; - let ResourceCycles = [2,1,1,12]; -} -def: InstRW<[SKXWriteResGroup237], (instregex "VSQRTPSZm(b?)")>; - def SKXWriteResGroup238 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { let Latency = 26; let NumMicroOps = 5; @@ -3422,13 +3345,6 @@ def SKXWriteResGroup245 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort01 def: InstRW<[SKXWriteResGroup245], (instrs VGATHERDPSZrm, VPGATHERDDZrm)>; -def SKXWriteResGroup246 : SchedWriteRes<[SKXPort0,SKXPort5,SKXFPDivider]> { - let Latency = 32; - let NumMicroOps = 3; - let ResourceCycles = [2,1,24]; -} -def: InstRW<[SKXWriteResGroup246], (instregex "VSQRTPDZr")>; - def SKXWriteResGroup247 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort23,SKXPort06,SKXPort0156]> { let Latency = 35; let NumMicroOps = 23; @@ -3460,13 +3376,6 @@ def SKXWriteResGroup250 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort23,SKXPort0156 } def: InstRW<[SKXWriteResGroup250], (instregex "XRSTOR(64)?")>; -def SKXWriteResGroup251 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort5,SKXFPDivider]> { - let Latency = 39; - let NumMicroOps = 4; - let ResourceCycles = [2,1,1,24]; -} -def: InstRW<[SKXWriteResGroup251], (instregex "VSQRTPDZm(b?)")>; - def SKXWriteResGroup252 : SchedWriteRes<[SKXPort1,SKXPort4,SKXPort5,SKXPort6,SKXPort23,SKXPort237,SKXPort15,SKXPort0156]> { let Latency = 40; let NumMicroOps = 18; diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index 768e5e15d6a2..cc933c80eefc 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -61,6 +61,13 @@ class X86SchedWriteWidths { + X86SchedWriteWidths PS = sPS; + X86SchedWriteWidths PD = sPD; +} + // Loads, stores, and moves, not folded with other operations. def WriteLoad : SchedWrite; def WriteStore : SchedWrite; @@ -111,10 +118,19 @@ defm WriteFMulY : X86SchedWritePair; // Floating point multiplication (YMM/ZMM) defm WriteFDiv : X86SchedWritePair; // Floating point division. defm WriteFDivY : X86SchedWritePair; // Floating point division (YMM/ZMM). defm WriteFSqrt : X86SchedWritePair; // Floating point square root. -defm WriteFSqrtY : X86SchedWritePair; // Floating point square root (YMM/ZMM). +defm WriteFSqrtX : X86SchedWritePair; // Floating point square root (XMM). +defm WriteFSqrtY : X86SchedWritePair; // Floating point square root (YMM). +defm WriteFSqrtZ : X86SchedWritePair; // Floating point square root (ZMM). +defm WriteFSqrt64 : X86SchedWritePair; // Floating point double square root. +defm WriteFSqrt64X : X86SchedWritePair; // Floating point double square root (XMM). +defm WriteFSqrt64Y : X86SchedWritePair; // Floating point double square root (YMM). +defm WriteFSqrt64Z : X86SchedWritePair; // Floating point double square root (ZMM). +defm WriteFSqrt80 : X86SchedWritePair; // Floating point long double square root. defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal estimate. +defm WriteFRcpX : X86SchedWritePair; // Floating point reciprocal estimate (XMM). defm WriteFRcpY : X86SchedWritePair; // Floating point reciprocal estimate (YMM/ZMM). defm WriteFRsqrt : X86SchedWritePair; // Floating point reciprocal square root estimate. +defm WriteFRsqrtX: X86SchedWritePair; // Floating point reciprocal square root estimate (XMM). defm WriteFRsqrtY: X86SchedWritePair; // Floating point reciprocal square root estimate (YMM/ZMM). defm WriteFMA : X86SchedWritePair; // Fused Multiply Add. defm WriteFMAX : X86SchedWritePair; // Fused Multiply Add (XMM). @@ -261,11 +277,15 @@ def SchedWriteDPPS def SchedWriteFDiv : X86SchedWriteWidths; def SchedWriteFSqrt - : X86SchedWriteWidths; + : X86SchedWriteWidths; +def SchedWriteFSqrt64 + : X86SchedWriteWidths; def SchedWriteFRcp - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteFRsqrt - : X86SchedWriteWidths; + : X86SchedWriteWidths; def SchedWriteFRnd : X86SchedWriteWidths; def SchedWriteFLogic @@ -324,6 +344,16 @@ def SchedWriteVarBlend : X86SchedWriteWidths; +// Vector size wrappers. +def SchedWriteFAddSizes + : X86SchedWriteSizes; +def SchedWriteFMulSizes + : X86SchedWriteSizes; +def SchedWriteFDivSizes + : X86SchedWriteSizes; +def SchedWriteFSqrtSizes + : X86SchedWriteSizes; + //===----------------------------------------------------------------------===// // Generic Processor Scheduler Models. diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td index 1b3337180424..8ffa9e67400d 100644 --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -211,13 +211,22 @@ defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; -defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; defm : AtomWriteResPair; -defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; -defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; @@ -557,7 +566,7 @@ def : InstRW<[AtomWrite01_9], (instrs BT16mr, BT32mr, BT64mr, SHLD64mri8, SHRD64mri8, SHLD64rri8, SHRD64rri8, CMPXCHG8rr, - MULPDrr, RCPPSr, RSQRTPSr)>; + MULPDrr)>; def : InstRW<[AtomWrite01_9], (instregex "CMOV(B|BE|E|P|NB|NBE|NE|NP)_F", "(U)?COM_FI", "TST_F", "(U)?COMIS(D|S)rr", @@ -568,7 +577,7 @@ def AtomWrite01_10 : SchedWriteRes<[AtomPort01]> { let ResourceCycles = [10]; } def : InstRW<[AtomWrite01_10], (instrs FLDL2E, FLDL2T, FLDLG2, FLDLN2, FLDPI, - MULPDrm, RCPPSm, RSQRTPSm)>; + MULPDrm)>; def : InstRW<[AtomWrite01_10], (instregex "(U)?COMIS(D|S)rm", "CVT(T)?SS2SI64rm(_Int)?")>; @@ -697,8 +706,7 @@ def AtomWrite01_62 : SchedWriteRes<[AtomPort01]> { let Latency = 62; let ResourceCycles = [62]; } -def : InstRW<[AtomWrite01_62], (instregex "DIVSD(r|m)(_Int)?", - "SQRTSD(r|m)(_Int)?")>; +def : InstRW<[AtomWrite01_62], (instregex "DIVSD(r|m)(_Int)?")>; def AtomWrite01_63 : SchedWriteRes<[AtomPort01]> { let Latency = 63; @@ -716,7 +724,7 @@ def AtomWrite01_70 : SchedWriteRes<[AtomPort01]> { let Latency = 70; let ResourceCycles = [70]; } -def : InstRW<[AtomWrite01_70], (instrs DIVPSrr, DIVPSrm, SQRTPSr, SQRTPSm)>; +def : InstRW<[AtomWrite01_70], (instrs DIVPSrr, DIVPSrm)>; def AtomWrite01_71 : SchedWriteRes<[AtomPort01]> { let Latency = 71; @@ -724,7 +732,6 @@ def AtomWrite01_71 : SchedWriteRes<[AtomPort01]> { } def : InstRW<[AtomWrite01_71], (instrs FPREM1, INVLPG, INVLPGA32, INVLPGA64)>; -def : InstRW<[AtomWrite01_71], (instregex "SQRT_F")>; def AtomWrite01_72 : SchedWriteRes<[AtomPort01]> { let Latency = 72; @@ -785,7 +792,7 @@ def AtomWrite01_125 : SchedWriteRes<[AtomPort01]> { let Latency = 125; let ResourceCycles = [125]; } -def : InstRW<[AtomWrite01_125], (instrs DIVPDrr, DIVPDrm, SQRTPDr, SQRTPDm)>; +def : InstRW<[AtomWrite01_125], (instrs DIVPDrr, DIVPDrm)>; def AtomWrite01_127 : SchedWriteRes<[AtomPort01]> { let Latency = 127; diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index 8521ed3881d4..31e26b4579b5 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -330,13 +330,22 @@ defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResYMMPair; defm : JWriteResFpuPair; +defm : JWriteResFpuPair; defm : JWriteResYMMPair; defm : JWriteResFpuPair; +defm : JWriteResFpuPair; defm : JWriteResYMMPair; defm : JWriteResFpuPair; defm : JWriteResYMMPair; defm : JWriteResFpuPair; +defm : JWriteResFpuPair; defm : JWriteResYMMPair; +defm : JWriteResYMMPair; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResYMMPair; +defm : JWriteResYMMPair; +defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResYMMPair; @@ -667,36 +676,6 @@ def JWriteVTESTLd: SchedWriteRes<[JLAGU, JFPU0, JFPA, JALU0]> { } def : InstRW<[JWriteVTESTLd], (instrs PTESTrm, VPTESTrm, VTESTPDrm, VTESTPSrm)>; -def JWriteVSQRTPD: SchedWriteRes<[JFPU1, JFPM]> { - let Latency = 27; - let ResourceCycles = [1, 27]; -} -def : InstRW<[JWriteVSQRTPD], (instrs SQRTPDr, VSQRTPDr, - SQRTSDr, VSQRTSDr, - SQRTSDr_Int, VSQRTSDr_Int)>; - -def JWriteVSQRTPDLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> { - let Latency = 32; - let ResourceCycles = [1, 1, 27]; -} -def : InstRW<[JWriteVSQRTPDLd], (instrs SQRTPDm, VSQRTPDm, - SQRTSDm, VSQRTSDm, - SQRTSDm_Int, VSQRTSDm_Int)>; - -def JWriteVSQRTYPD: SchedWriteRes<[JFPU1, JFPM]> { - let Latency = 54; // each uOp is 27cy. - let ResourceCycles = [2, 54]; - let NumMicroOps = 2; -} -def : InstRW<[JWriteVSQRTYPD], (instrs VSQRTPDYr)>; - -def JWriteVSQRTYPDLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> { - let Latency = 59; // each uOp is 27cy (+5cy of memory load). - let ResourceCycles = [2, 2, 54]; - let NumMicroOps = 2; -} -def : InstRW<[JWriteVSQRTYPDLd], (instrs VSQRTPDYm)>; - def JWriteJVZEROALL: SchedWriteRes<[]> { let Latency = 90; let NumMicroOps = 73; diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index dec522ea97d9..9d1787fec6f3 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -139,12 +139,21 @@ defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; -defm : SLMWriteResPair; -defm : SLMWriteResPair; -defm : SLMWriteResPair; -defm : SLMWriteResPair; -defm : SLMWriteResPair; -defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; @@ -382,60 +391,4 @@ def SLMriteResGroup8 : SchedWriteRes<[SLM_MEC_RSV,SLM_FPC_RSV0,SLMFPDivider]> { } def: InstRW<[SLMriteResGroup8], (instregex "(V?)DIVSSrm")>; -def SLMriteResGroup9 : SchedWriteRes<[SLM_FPC_RSV0,SLMFPDivider]> { - let Latency = 71; - let NumMicroOps = 1; - let ResourceCycles = [1,70]; -} -def: InstRW<[SLMriteResGroup9], (instregex "(V?)SQRTPDr")>; - -def SLMriteResGroup10 : SchedWriteRes<[SLM_FPC_RSV0,SLMFPDivider]> { - let Latency = 41; - let NumMicroOps = 1; - let ResourceCycles = [1,40]; -} -def: InstRW<[SLMriteResGroup10], (instregex "(V?)SQRTPSr")>; - -def SLMriteResGroup11 : SchedWriteRes<[SLM_FPC_RSV0,SLMFPDivider]> { - let Latency = 35; - let NumMicroOps = 1; - let ResourceCycles = [1,35]; -} -def: InstRW<[SLMriteResGroup11], (instregex "(V?)SQRTSDr")>; - -def SLMriteResGroup12 : SchedWriteRes<[SLM_FPC_RSV0,SLMFPDivider]> { - let Latency = 20; - let NumMicroOps = 1; - let ResourceCycles = [1,20]; -} -def: InstRW<[SLMriteResGroup12], (instregex "(V?)SQRTSSr")>; - -def SLMriteResGroup13 : SchedWriteRes<[SLM_MEC_RSV,SLM_FPC_RSV0,SLMFPDivider]> { - let Latency = 74; - let NumMicroOps = 1; - let ResourceCycles = [1,1,70]; -} -def: InstRW<[SLMriteResGroup13], (instregex "(V?)SQRTPDm")>; - -def SLMriteResGroup14 : SchedWriteRes<[SLM_MEC_RSV,SLM_FPC_RSV0,SLMFPDivider]> { - let Latency = 44; - let NumMicroOps = 1; - let ResourceCycles = [1,1,40]; -} -def: InstRW<[SLMriteResGroup14], (instregex "(V?)SQRTPSm")>; - -def SLMriteResGroup15 : SchedWriteRes<[SLM_MEC_RSV,SLM_FPC_RSV0,SLMFPDivider]> { - let Latency = 38; - let NumMicroOps = 1; - let ResourceCycles = [1,1,35]; -} -def: InstRW<[SLMriteResGroup15], (instregex "(V?)SQRTSDm")>; - -def SLMriteResGroup16 : SchedWriteRes<[SLM_MEC_RSV,SLM_FPC_RSV0,SLMFPDivider]> { - let Latency = 23; - let NumMicroOps = 1; - let ResourceCycles = [1,1,20]; -} -def: InstRW<[SLMriteResGroup16], (instregex "(V?)SQRTSSm")>; - } // SchedModel diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index cf95ac1fa0b0..f5a0e9c950b2 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -223,11 +223,20 @@ defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; -defm : ZnWriteResFpuPair; -defm : ZnWriteResFpuPair; -defm : ZnWriteResFpuPair; -defm : ZnWriteResFpuPair; -defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +//defm : ZnWriteResFpuPair; +//defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +//defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; def : WriteRes; // Vector integer operations which uses FPU units @@ -1504,18 +1513,19 @@ def ZnWriteVDIVPDYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> { def : InstRW<[ZnWriteVDIVPDYLd], (instregex "VDIVPDYrm")>; // VRCPPS. +// TODO - convert to ZnWriteResFpuPair // y,y. -def ZnWriteVRCPPSr : SchedWriteRes<[ZnFPU01]> { +def ZnWriteVRCPPSYr : SchedWriteRes<[ZnFPU01]> { let Latency = 5; } -def : InstRW<[ZnWriteVRCPPSr], (instregex "VRCPPSYr")>; +def : SchedAlias; // y,m256. -def ZnWriteVRCPPSLd : SchedWriteRes<[ZnAGU, ZnFPU01]> { +def ZnWriteVRCPPSYLd : SchedWriteRes<[ZnAGU, ZnFPU01]> { let Latency = 12; let NumMicroOps = 3; } -def : InstRW<[ZnWriteVRCPPSLd], (instregex "VRCPPSYm")>; +def : SchedAlias; // DPPS. // x,x,i / v,v,v,i. @@ -1533,83 +1543,38 @@ def : SchedAlias; // x,m,i. def : SchedAlias; -// VSQRTPS. -// y,y. -def ZnWriteVSQRTPSYr : SchedWriteRes<[ZnFPU3]> { - let Latency = 28; - let ResourceCycles = [28]; -} -def : InstRW<[ZnWriteVSQRTPSYr], (instregex "VSQRTPSYr")>; - -// y,m256. -def ZnWriteVSQRTPSYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> { - let Latency = 35; - let ResourceCycles = [1,35]; - let NumMicroOps = 2; -} -def : InstRW<[ZnWriteVSQRTPSYLd], (instregex "VSQRTPSYm")>; - -// VSQRTPD. -// y,y. -def ZnWriteVSQRTPDYr : SchedWriteRes<[ZnFPU3]> { - let Latency = 40; - let ResourceCycles = [40]; -} -def : InstRW<[ZnWriteVSQRTPDYr], (instregex "VSQRTPDYr")>; - -// y,m256. -def ZnWriteVSQRTPDYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> { - let Latency = 47; - let NumMicroOps = 2; - let ResourceCycles = [1,47]; -} -def : InstRW<[ZnWriteVSQRTPDYLd], (instregex "VSQRTPDYm")>; - // RSQRTSS +// TODO - convert to ZnWriteResFpuPair // x,x. def ZnWriteRSQRTSSr : SchedWriteRes<[ZnFPU02]> { let Latency = 5; } -def : InstRW<[ZnWriteRSQRTSSr], (instregex "(V?)RSQRTSS(Y?)r")>; +def : SchedAlias; -// RSQRTPS -// x,x. -def ZnWriteRSQRTPSr : SchedWriteRes<[ZnFPU01]> { - let Latency = 5; -} -def : InstRW<[ZnWriteRSQRTPSr], (instregex "(V?)RSQRTPSr")>; - -// RSQRTSSm // x,m128. def ZnWriteRSQRTSSLd: SchedWriteRes<[ZnAGU, ZnFPU02]> { let Latency = 12; let NumMicroOps = 2; - let ResourceCycles = [1,2]; + let ResourceCycles = [1,2]; // FIXME: Is this right? } -def : InstRW<[ZnWriteRSQRTSSLd], (instregex "(V?)RSQRTSSm")>; +def : SchedAlias; -// RSQRTPSm -def ZnWriteRSQRTPSLd : SchedWriteRes<[ZnAGU, ZnFPU01]> { - let Latency = 12; - let NumMicroOps = 2; -} -def : InstRW<[ZnWriteRSQRTPSLd], (instregex "(V?)RSQRTPSm")>; - -// RSQRTPS 256. +// RSQRTPS +// TODO - convert to ZnWriteResFpuPair // y,y. def ZnWriteRSQRTPSYr : SchedWriteRes<[ZnFPU01]> { let Latency = 5; let NumMicroOps = 2; let ResourceCycles = [2]; } -def : InstRW<[ZnWriteRSQRTPSYr], (instregex "VRSQRTPSYr")>; +def : SchedAlias; // y,m256. def ZnWriteRSQRTPSYLd : SchedWriteRes<[ZnAGU, ZnFPU01]> { let Latency = 12; let NumMicroOps = 2; } -def : InstRW<[ZnWriteRSQRTPSYLd], (instregex "VRSQRTPSYm")>; +def : SchedAlias; //-- Other instructions --// diff --git a/llvm/test/CodeGen/X86/avx-schedule.ll b/llvm/test/CodeGen/X86/avx-schedule.ll index 3ff64e036218..54ed9a0a1718 100644 --- a/llvm/test/CodeGen/X86/avx-schedule.ll +++ b/llvm/test/CodeGen/X86/avx-schedule.ll @@ -4450,7 +4450,7 @@ define <4 x double> @test_sqrtpd(<4 x double> %a0, <4 x double> *%a1) { ; ; ZNVER1-LABEL: test_sqrtpd: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [47:47.00] +; ZNVER1-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [47:40.00] ; ZNVER1-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [40:40.00] ; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] @@ -4514,7 +4514,7 @@ define <8 x float> @test_sqrtps(<8 x float> %a0, <8 x float> *%a1) { ; ; ZNVER1-LABEL: test_sqrtps: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vsqrtps (%rdi), %ymm1 # sched: [35:35.00] +; ZNVER1-NEXT: vsqrtps (%rdi), %ymm1 # sched: [35:28.00] ; ZNVER1-NEXT: vsqrtps %ymm0, %ymm0 # sched: [28:28.00] ; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] diff --git a/llvm/test/CodeGen/X86/avx512-schedule.ll b/llvm/test/CodeGen/X86/avx512-schedule.ll index 2d21bfde1203..78439fc2622d 100755 --- a/llvm/test/CodeGen/X86/avx512-schedule.ll +++ b/llvm/test/CodeGen/X86/avx512-schedule.ll @@ -600,7 +600,7 @@ declare <16 x float> @llvm.sqrt.v16f32(<16 x float>) define <16 x float> @sqrtD(<16 x float> %a) nounwind { ; GENERIC-LABEL: sqrtD: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vsqrtps %zmm0, %zmm0 # sched: [14:1.00] +; GENERIC-NEXT: vsqrtps %zmm0, %zmm0 # sched: [29:28.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sqrtD: @@ -615,7 +615,7 @@ declare <8 x double> @llvm.sqrt.v8f64(<8 x double>) define <8 x double> @sqrtE(<8 x double> %a) nounwind { ; GENERIC-LABEL: sqrtE: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vsqrtpd %zmm0, %zmm0 # sched: [14:1.00] +; GENERIC-NEXT: vsqrtpd %zmm0, %zmm0 # sched: [45:44.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sqrtE: diff --git a/llvm/test/CodeGen/X86/recip-fastmath.ll b/llvm/test/CodeGen/X86/recip-fastmath.ll index 1d041c3f2597..2e6c99a52139 100644 --- a/llvm/test/CodeGen/X86/recip-fastmath.ll +++ b/llvm/test/CodeGen/X86/recip-fastmath.ll @@ -1024,7 +1024,7 @@ define <16 x float> @v16f32_one_step(<16 x float> %x) #1 { ; ; KNL-LABEL: v16f32_one_step: ; KNL: # %bb.0: -; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [5:1.00] +; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [11:2.00] ; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [12:0.50] ; KNL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [5:0.50] ; KNL-NEXT: retq # sched: [7:1.00] @@ -1224,7 +1224,7 @@ define <16 x float> @v16f32_two_step(<16 x float> %x) #2 { ; ; KNL-LABEL: v16f32_two_step: ; KNL: # %bb.0: -; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [5:1.00] +; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [11:2.00] ; KNL-NEXT: vbroadcastss {{.*#+}} zmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] sched: [10:1.00] ; KNL-NEXT: vmovaps %zmm1, %zmm3 # sched: [1:1.00] ; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [5:0.50] diff --git a/llvm/test/CodeGen/X86/recip-fastmath2.ll b/llvm/test/CodeGen/X86/recip-fastmath2.ll index 27a07f090952..aaaf0c6ab7e5 100644 --- a/llvm/test/CodeGen/X86/recip-fastmath2.ll +++ b/llvm/test/CodeGen/X86/recip-fastmath2.ll @@ -1323,7 +1323,7 @@ define <16 x float> @v16f32_one_step2(<16 x float> %x) #1 { ; ; KNL-LABEL: v16f32_one_step2: ; KNL: # %bb.0: -; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [5:1.00] +; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [11:2.00] ; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [12:0.50] ; KNL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [5:0.50] ; KNL-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [12:0.50] @@ -1489,7 +1489,7 @@ define <16 x float> @v16f32_one_step_2_divs(<16 x float> %x) #1 { ; ; KNL-LABEL: v16f32_one_step_2_divs: ; KNL: # %bb.0: -; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [5:1.00] +; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [11:2.00] ; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [12:0.50] ; KNL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [5:0.50] ; KNL-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm1 # sched: [12:0.50] @@ -1709,7 +1709,7 @@ define <16 x float> @v16f32_two_step2(<16 x float> %x) #2 { ; ; KNL-LABEL: v16f32_two_step2: ; KNL: # %bb.0: -; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [5:1.00] +; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [11:2.00] ; KNL-NEXT: vbroadcastss {{.*#+}} zmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] sched: [10:1.00] ; KNL-NEXT: vmovaps %zmm1, %zmm3 # sched: [1:1.00] ; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [5:0.50] @@ -1781,7 +1781,7 @@ define <16 x float> @v16f32_no_step(<16 x float> %x) #3 { ; ; KNL-LABEL: v16f32_no_step: ; KNL: # %bb.0: -; KNL-NEXT: vrcp14ps %zmm0, %zmm0 # sched: [5:1.00] +; KNL-NEXT: vrcp14ps %zmm0, %zmm0 # sched: [11:2.00] ; KNL-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: v16f32_no_step: @@ -1855,7 +1855,7 @@ define <16 x float> @v16f32_no_step2(<16 x float> %x) #3 { ; ; KNL-LABEL: v16f32_no_step2: ; KNL: # %bb.0: -; KNL-NEXT: vrcp14ps %zmm0, %zmm0 # sched: [5:1.00] +; KNL-NEXT: vrcp14ps %zmm0, %zmm0 # sched: [11:2.00] ; KNL-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [12:0.50] ; KNL-NEXT: retq # sched: [7:1.00] ; diff --git a/llvm/test/CodeGen/X86/sse-schedule.ll b/llvm/test/CodeGen/X86/sse-schedule.ll index dcd664358639..5cde75919a59 100644 --- a/llvm/test/CodeGen/X86/sse-schedule.ll +++ b/llvm/test/CodeGen/X86/sse-schedule.ll @@ -5007,15 +5007,15 @@ define <4 x float> @test_sqrtps(<4 x float> %a0, <4 x float> *%a1) { ; ; ZNVER1-SSE-LABEL: test_sqrtps: ; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [20:1.00] -; ZNVER1-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [27:1.00] +; ZNVER1-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [20:20.00] +; ZNVER1-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [27:20.00] ; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] ; ; ZNVER1-LABEL: test_sqrtps: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vsqrtps (%rdi), %xmm1 # sched: [27:1.00] -; ZNVER1-NEXT: vsqrtps %xmm0, %xmm0 # sched: [20:1.00] +; ZNVER1-NEXT: vsqrtps (%rdi), %xmm1 # sched: [27:20.00] +; ZNVER1-NEXT: vsqrtps %xmm0, %xmm0 # sched: [20:20.00] ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) @@ -5152,16 +5152,16 @@ define <4 x float> @test_sqrtss(<4 x float> %a0, <4 x float> *%a1) { ; ZNVER1-SSE-LABEL: test_sqrtss: ; ZNVER1-SSE: # %bb.0: ; ZNVER1-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [20:1.00] -; ZNVER1-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [20:1.00] +; ZNVER1-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [20:20.00] +; ZNVER1-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [20:20.00] ; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] ; ; ZNVER1-LABEL: test_sqrtss: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovaps (%rdi), %xmm1 # sched: [8:0.50] -; ZNVER1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [20:1.00] -; ZNVER1-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [20:1.00] +; ZNVER1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [20:20.00] +; ZNVER1-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [20:20.00] ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) diff --git a/llvm/test/CodeGen/X86/sse2-schedule.ll b/llvm/test/CodeGen/X86/sse2-schedule.ll index e6e2cd94f30e..fa06624a2dce 100644 --- a/llvm/test/CodeGen/X86/sse2-schedule.ll +++ b/llvm/test/CodeGen/X86/sse2-schedule.ll @@ -14247,15 +14247,15 @@ define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) { ; ; ZNVER1-SSE-LABEL: test_sqrtpd: ; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [20:1.00] -; ZNVER1-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [27:1.00] +; ZNVER1-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [20:20.00] +; ZNVER1-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [27:20.00] ; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] ; ; ZNVER1-LABEL: test_sqrtpd: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [27:1.00] -; ZNVER1-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [20:1.00] +; ZNVER1-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [27:20.00] +; ZNVER1-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [20:20.00] ; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) @@ -14392,16 +14392,16 @@ define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) { ; ZNVER1-SSE-LABEL: test_sqrtsd: ; ZNVER1-SSE: # %bb.0: ; ZNVER1-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [20:1.00] -; ZNVER1-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [20:1.00] +; ZNVER1-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [20:20.00] +; ZNVER1-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [20:20.00] ; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] ; ; ZNVER1-LABEL: test_sqrtsd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovapd (%rdi), %xmm1 # sched: [8:0.50] -; ZNVER1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [20:1.00] -; ZNVER1-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [20:1.00] +; ZNVER1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [20:20.00] +; ZNVER1-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [20:20.00] ; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) diff --git a/llvm/test/CodeGen/X86/x87-schedule.ll b/llvm/test/CodeGen/X86/x87-schedule.ll index ff82dcce0bf8..639ac510f66e 100644 --- a/llvm/test/CodeGen/X86/x87-schedule.ll +++ b/llvm/test/CodeGen/X86/x87-schedule.ll @@ -4083,56 +4083,56 @@ define void @test_fsqrt() optsize { ; SLM-LABEL: test_fsqrt: ; SLM: # %bb.0: ; SLM-NEXT: #APP -; SLM-NEXT: fsqrt # sched: [15:1.00] +; SLM-NEXT: fsqrt # sched: [40:40.00] ; SLM-NEXT: #NO_APP ; SLM-NEXT: retl # sched: [4:1.00] ; ; SANDY-LABEL: test_fsqrt: ; SANDY: # %bb.0: ; SANDY-NEXT: #APP -; SANDY-NEXT: fsqrt # sched: [14:1.00] +; SANDY-NEXT: fsqrt # sched: [24:24.00] ; SANDY-NEXT: #NO_APP ; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fsqrt: ; HASWELL: # %bb.0: ; HASWELL-NEXT: #APP -; HASWELL-NEXT: fsqrt # sched: [15:1.00] +; HASWELL-NEXT: fsqrt # sched: [23:17.00] ; HASWELL-NEXT: #NO_APP ; HASWELL-NEXT: retl # sched: [7:1.00] ; ; BROADWELL-LABEL: test_fsqrt: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fsqrt # sched: [15:1.00] +; BROADWELL-NEXT: fsqrt # sched: [23:9.00] ; BROADWELL-NEXT: #NO_APP ; BROADWELL-NEXT: retl # sched: [6:0.50] ; ; SKYLAKE-LABEL: test_fsqrt: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fsqrt # sched: [15:1.00] +; SKYLAKE-NEXT: fsqrt # sched: [21:7.00] ; SKYLAKE-NEXT: #NO_APP ; SKYLAKE-NEXT: retl # sched: [6:0.50] ; ; SKX-LABEL: test_fsqrt: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: fsqrt # sched: [15:1.00] +; SKX-NEXT: fsqrt # sched: [21:7.00] ; SKX-NEXT: #NO_APP ; SKX-NEXT: retl # sched: [6:0.50] ; ; BTVER2-LABEL: test_fsqrt: ; BTVER2: # %bb.0: ; BTVER2-NEXT: #APP -; BTVER2-NEXT: fsqrt # sched: [21:21.00] +; BTVER2-NEXT: fsqrt # sched: [35:35.00] ; BTVER2-NEXT: #NO_APP ; BTVER2-NEXT: retl # sched: [4:1.00] ; ; ZNVER1-LABEL: test_fsqrt: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fsqrt # sched: [20:1.00] +; ZNVER1-NEXT: fsqrt # sched: [20:20.00] ; ZNVER1-NEXT: #NO_APP ; ZNVER1-NEXT: retl # sched: [1:0.50] tail call void asm sideeffect "fsqrt", ""() nounwind diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-x87.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-x87.s index 315c6ef8da47..29fc7f38e3dc 100644 --- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-x87.s +++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-x87.s @@ -307,7 +307,7 @@ fyl2xp1 # CHECK-NEXT: 1 100 0.25 * fscale # CHECK-NEXT: 1 100 0.25 * fsin # CHECK-NEXT: 1 100 0.25 * fsincos -# CHECK-NEXT: 1 15 1.00 * fsqrt +# CHECK-NEXT: 1 23 9.00 * fsqrt # CHECK-NEXT: 1 1 0.25 * fst %st(0) # CHECK-NEXT: 1 1 1.00 * * fsts (%edx) # CHECK-NEXT: 1 1 1.00 * * fstl (%ecx) @@ -369,7 +369,7 @@ fyl2xp1 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 111.67 140.67 49.00 49.00 27.00 60.67 71.00 9.00 +# CHECK-NEXT: - 9.00 111.67 140.67 49.00 49.00 27.00 60.67 71.00 9.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -474,7 +474,7 @@ fyl2xp1 # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fscale # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fsin # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fsincos -# CHECK-NEXT: - - 1.00 - - - - - - - fsqrt +# CHECK-NEXT: - 9.00 1.00 - - - - - - - fsqrt # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fst %st(0) # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 fsts (%edx) # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 fstl (%ecx) diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-x87.s b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-x87.s index b6bd0a7b3a94..e5bfaac3d944 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-x87.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-x87.s @@ -307,7 +307,7 @@ fyl2xp1 # CHECK-NEXT: 1 100 0.50 * fscale # CHECK-NEXT: 1 100 0.50 * fsin # CHECK-NEXT: 1 100 0.50 * fsincos -# CHECK-NEXT: 1 21 21.00 * fsqrt +# CHECK-NEXT: 1 35 35.00 * fsqrt # CHECK-NEXT: 1 1 0.50 * fst %st(0) # CHECK-NEXT: 1 1 1.00 * * fsts (%edx) # CHECK-NEXT: 1 1 1.00 * * fstl (%ecx) @@ -373,7 +373,7 @@ fyl2xp1 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] -# CHECK-NEXT: 44.50 22.50 - 54.00 335.00 54.00 27.00 39.00 - 13.00 - - - - +# CHECK-NEXT: 44.50 22.50 - 54.00 349.00 54.00 27.00 39.00 - 13.00 - - - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: @@ -478,7 +478,7 @@ fyl2xp1 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - fscale # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - fsin # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - fsincos -# CHECK-NEXT: - - - - 21.00 - 1.00 - - - - - - - fsqrt +# CHECK-NEXT: - - - - 35.00 - 1.00 - - - - - - - fsqrt # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - fst %st(0) # CHECK-NEXT: - - - - - - - - - 1.00 - - - - fsts (%edx) # CHECK-NEXT: - - - - - - - - - 1.00 - - - - fstl (%ecx) diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-x87.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-x87.s index 505b3d1fe8ea..a1d53dac0705 100644 --- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-x87.s +++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-x87.s @@ -307,7 +307,7 @@ fyl2xp1 # CHECK-NEXT: 50 75 - * fscale # CHECK-NEXT: 1 100 0.25 * fsin # CHECK-NEXT: 1 100 0.25 * fsincos -# CHECK-NEXT: 1 15 1.00 * fsqrt +# CHECK-NEXT: 1 23 17.00 * fsqrt # CHECK-NEXT: 1 1 0.50 * fst %st(0) # CHECK-NEXT: 1 1 1.00 * * fsts (%edx) # CHECK-NEXT: 1 1 1.00 * * fstl (%ecx) @@ -369,7 +369,7 @@ fyl2xp1 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 117.92 141.92 49.00 49.00 27.00 56.92 65.25 9.00 +# CHECK-NEXT: - 17.00 117.92 141.92 49.00 49.00 27.00 56.92 65.25 9.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -474,7 +474,7 @@ fyl2xp1 # CHECK-NEXT: - - - - - - - - - - fscale # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fsin # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fsincos -# CHECK-NEXT: - - 1.00 - - - - - - - fsqrt +# CHECK-NEXT: - 17.00 1.00 - - - - - - - fsqrt # CHECK-NEXT: - - 0.50 0.50 - - - - - - fst %st(0) # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 fsts (%edx) # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 fstl (%ecx) diff --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-x87.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-x87.s index 99e95dc4d48a..7baab7c4102f 100644 --- a/llvm/test/tools/llvm-mca/X86/SLM/resources-x87.s +++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-x87.s @@ -307,7 +307,7 @@ fyl2xp1 # CHECK-NEXT: 1 100 1.00 * fscale # CHECK-NEXT: 1 100 1.00 * fsin # CHECK-NEXT: 1 100 1.00 * fsincos -# CHECK-NEXT: 1 15 1.00 * fsqrt +# CHECK-NEXT: 1 40 40.00 * fsqrt # CHECK-NEXT: 1 1 0.50 * fst %st(0) # CHECK-NEXT: 1 1 1.00 * * fsts (%edx) # CHECK-NEXT: 1 1 1.00 * * fstl (%ecx) @@ -367,7 +367,7 @@ fyl2xp1 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] -# CHECK-NEXT: - 544.00 16.00 64.00 55.00 9.50 9.50 52.00 +# CHECK-NEXT: - 584.00 16.00 64.00 55.00 9.50 9.50 52.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions: @@ -472,7 +472,7 @@ fyl2xp1 # CHECK-NEXT: - - - 1.00 - - - - fscale # CHECK-NEXT: - - - 1.00 - - - - fsin # CHECK-NEXT: - - - 1.00 - - - - fsincos -# CHECK-NEXT: - - - 1.00 - - - - fsqrt +# CHECK-NEXT: - 40.00 - 1.00 - - - - fsqrt # CHECK-NEXT: - - - - - 0.50 0.50 - fst %st(0) # CHECK-NEXT: - - - - - 0.50 0.50 1.00 fsts (%edx) # CHECK-NEXT: - - - - - 0.50 0.50 1.00 fstl (%ecx) diff --git a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-x87.s b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-x87.s index f8a99fabac60..8a2130dc2b92 100644 --- a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-x87.s +++ b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-x87.s @@ -307,7 +307,7 @@ fyl2xp1 # CHECK-NEXT: 1 100 0.33 * fscale # CHECK-NEXT: 1 100 0.33 * fsin # CHECK-NEXT: 1 100 0.33 * fsincos -# CHECK-NEXT: 1 14 1.00 * fsqrt +# CHECK-NEXT: 1 24 24.00 * fsqrt # CHECK-NEXT: 1 1 1.00 * fst %st(0) # CHECK-NEXT: 3 6 1.00 * * fsts (%edx) # CHECK-NEXT: 3 6 1.00 * * fstl (%ecx) @@ -367,7 +367,7 @@ fyl2xp1 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - - 48.33 87.33 17.00 54.33 34.00 34.00 +# CHECK-NEXT: - 24.00 48.33 87.33 17.00 54.33 34.00 34.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -472,7 +472,7 @@ fyl2xp1 # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fscale # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fsin # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fsincos -# CHECK-NEXT: - - 1.00 - - - - - fsqrt +# CHECK-NEXT: - 24.00 1.00 - - - - - fsqrt # CHECK-NEXT: - - - - - 1.00 - - fst %st(0) # CHECK-NEXT: - - - - 1.00 - 1.00 1.00 fsts (%edx) # CHECK-NEXT: - - - - 1.00 - 1.00 1.00 fstl (%ecx) diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-x87.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-x87.s index d3c2e133875a..0f033720d5b1 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-x87.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-x87.s @@ -307,7 +307,7 @@ fyl2xp1 # CHECK-NEXT: 1 100 0.25 * fscale # CHECK-NEXT: 1 100 0.25 * fsin # CHECK-NEXT: 1 100 0.25 * fsincos -# CHECK-NEXT: 1 15 1.00 * fsqrt +# CHECK-NEXT: 1 21 7.00 * fsqrt # CHECK-NEXT: 1 1 0.25 * fst %st(0) # CHECK-NEXT: 1 1 1.00 * * fsts (%edx) # CHECK-NEXT: 1 1 1.00 * * fstl (%ecx) @@ -369,7 +369,7 @@ fyl2xp1 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 121.50 53.50 49.00 49.00 27.00 144.50 70.50 9.00 +# CHECK-NEXT: - 7.00 121.50 53.50 49.00 49.00 27.00 144.50 70.50 9.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -474,7 +474,7 @@ fyl2xp1 # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fscale # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fsin # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fsincos -# CHECK-NEXT: - - 1.00 - - - - - - - fsqrt +# CHECK-NEXT: - 7.00 1.00 - - - - - - - fsqrt # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fst %st(0) # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 fsts (%edx) # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 fstl (%ecx) diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-x87.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-x87.s index 458c5eb36b93..933abf56e458 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-x87.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-x87.s @@ -307,7 +307,7 @@ fyl2xp1 # CHECK-NEXT: 1 100 0.25 * fscale # CHECK-NEXT: 1 100 0.25 * fsin # CHECK-NEXT: 1 100 0.25 * fsincos -# CHECK-NEXT: 1 15 1.00 * fsqrt +# CHECK-NEXT: 1 21 7.00 * fsqrt # CHECK-NEXT: 1 1 0.25 * fst %st(0) # CHECK-NEXT: 1 1 1.00 * * fsts (%edx) # CHECK-NEXT: 1 1 1.00 * * fstl (%ecx) @@ -369,7 +369,7 @@ fyl2xp1 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 121.50 53.50 49.00 49.00 27.00 144.50 70.50 9.00 +# CHECK-NEXT: - 7.00 121.50 53.50 49.00 49.00 27.00 144.50 70.50 9.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -474,7 +474,7 @@ fyl2xp1 # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fscale # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fsin # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fsincos -# CHECK-NEXT: - - 1.00 - - - - - - - fsqrt +# CHECK-NEXT: - 7.00 1.00 - - - - - - - fsqrt # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - fst %st(0) # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 fsts (%edx) # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 fstl (%ecx) diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s index eec6eb01975d..ed9dfaa30307 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s @@ -1638,18 +1638,18 @@ vzeroupper # CHECK-NEXT: 1 8 0.50 * vshufps $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 0.50 vshufps $1, %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 1 8 0.50 * vshufps $1, (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 20 1.00 vsqrtpd %xmm0, %xmm2 -# CHECK-NEXT: 1 27 1.00 * vsqrtpd (%rax), %xmm2 +# CHECK-NEXT: 1 20 20.00 vsqrtpd %xmm0, %xmm2 +# CHECK-NEXT: 1 27 20.00 * vsqrtpd (%rax), %xmm2 # CHECK-NEXT: 1 40 40.00 vsqrtpd %ymm0, %ymm2 -# CHECK-NEXT: 2 47 47.00 * vsqrtpd (%rax), %ymm2 -# CHECK-NEXT: 1 20 1.00 vsqrtps %xmm0, %xmm2 -# CHECK-NEXT: 1 27 1.00 * vsqrtps (%rax), %xmm2 +# CHECK-NEXT: 2 47 40.00 * vsqrtpd (%rax), %ymm2 +# CHECK-NEXT: 1 20 20.00 vsqrtps %xmm0, %xmm2 +# CHECK-NEXT: 1 27 20.00 * vsqrtps (%rax), %xmm2 # CHECK-NEXT: 1 28 28.00 vsqrtps %ymm0, %ymm2 -# CHECK-NEXT: 2 35 35.00 * vsqrtps (%rax), %ymm2 -# CHECK-NEXT: 1 20 1.00 vsqrtsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 27 1.00 * vsqrtsd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 20 1.00 vsqrtss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 27 1.00 * vsqrtss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 2 35 28.00 * vsqrtps (%rax), %ymm2 +# CHECK-NEXT: 1 20 20.00 vsqrtsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 27 20.00 * vsqrtsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 20 20.00 vsqrtss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 27 20.00 * vsqrtss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 100 - * * * vstmxcsr (%rax) # CHECK-NEXT: 1 3 1.00 vsubpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 10 1.00 * vsubpd (%rax), %xmm1, %xmm2 @@ -1718,7 +1718,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: 168.00 168.00 - - - - - 194.25 142.75 168.25 366.75 - +# CHECK-NEXT: 168.00 168.00 - - - - - 194.25 142.75 168.25 504.75 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -2342,18 +2342,18 @@ vzeroupper # CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vshufps $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vshufps $1, %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vshufps $1, (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - - - - - - - - - 1.00 - vsqrtpd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - - - 1.00 - vsqrtpd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 20.00 - vsqrtpd %xmm0, %xmm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - 20.00 - vsqrtpd (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - 40.00 - vsqrtpd %ymm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - - - 47.00 - vsqrtpd (%rax), %ymm2 -# CHECK-NEXT: - - - - - - - - - - 1.00 - vsqrtps %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - - - 1.00 - vsqrtps (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - 40.00 - vsqrtpd (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - - - 20.00 - vsqrtps %xmm0, %xmm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - 20.00 - vsqrtps (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - 28.00 - vsqrtps %ymm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - - - 35.00 - vsqrtps (%rax), %ymm2 -# CHECK-NEXT: - - - - - - - - - - 1.00 - vsqrtsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - - - 1.00 - vsqrtsd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - - - 1.00 - vsqrtss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - - - 1.00 - vsqrtss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - 28.00 - vsqrtps (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - - - 20.00 - vsqrtsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - 20.00 - vsqrtsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 20.00 - vsqrtss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - 20.00 - vsqrtss (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - - - - - vstmxcsr (%rax) # CHECK-NEXT: - - - - - - - 1.00 - - - - vsubpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - vsubpd (%rax), %xmm1, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse1.s index dc6b91d96a96..3ca2dcd3b689 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse1.s @@ -301,10 +301,10 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 * * * sfence # CHECK-NEXT: 1 1 0.50 shufps $1, %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * shufps $1, (%rax), %xmm2 -# CHECK-NEXT: 1 20 1.00 sqrtps %xmm0, %xmm2 -# CHECK-NEXT: 1 27 1.00 * sqrtps (%rax), %xmm2 -# CHECK-NEXT: 1 20 1.00 sqrtss %xmm0, %xmm2 -# CHECK-NEXT: 1 27 1.00 * sqrtss (%rax), %xmm2 +# CHECK-NEXT: 1 20 20.00 sqrtps %xmm0, %xmm2 +# CHECK-NEXT: 1 27 20.00 * sqrtps (%rax), %xmm2 +# CHECK-NEXT: 1 20 20.00 sqrtss %xmm0, %xmm2 +# CHECK-NEXT: 1 27 20.00 * sqrtss (%rax), %xmm2 # CHECK-NEXT: 1 100 - * * * stmxcsr (%rax) # CHECK-NEXT: 1 3 1.00 subps %xmm0, %xmm2 # CHECK-NEXT: 1 10 1.00 * subps (%rax), %xmm2 @@ -335,7 +335,7 @@ xorps (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: 32.50 32.50 - - - - - 41.00 21.50 22.00 32.50 - +# CHECK-NEXT: 32.50 32.50 - - - - - 41.00 21.50 22.00 108.50 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -446,10 +446,10 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - sfence # CHECK-NEXT: - - - - - - - - 0.50 0.50 - - shufps $1, %xmm0, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - shufps $1, (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - - - 1.00 - sqrtps %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - - - 1.00 - sqrtps (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - - - 1.00 - sqrtss %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - - - 1.00 - sqrtss (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 20.00 - sqrtps %xmm0, %xmm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - 20.00 - sqrtps (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 20.00 - sqrtss %xmm0, %xmm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - 20.00 - sqrtss (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - stmxcsr (%rax) # CHECK-NEXT: - - - - - - - 1.00 - - - - subps %xmm0, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - subps (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse2.s index b14b59a5ced6..bdbf2fde7815 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse2.s @@ -650,10 +650,10 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 8 0.50 * pxor (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 shufpd $1, %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * shufpd $1, (%rax), %xmm2 -# CHECK-NEXT: 1 20 1.00 sqrtpd %xmm0, %xmm2 -# CHECK-NEXT: 1 27 1.00 * sqrtpd (%rax), %xmm2 -# CHECK-NEXT: 1 20 1.00 sqrtsd %xmm0, %xmm2 -# CHECK-NEXT: 1 27 1.00 * sqrtsd (%rax), %xmm2 +# CHECK-NEXT: 1 20 20.00 sqrtpd %xmm0, %xmm2 +# CHECK-NEXT: 1 27 20.00 * sqrtpd (%rax), %xmm2 +# CHECK-NEXT: 1 20 20.00 sqrtsd %xmm0, %xmm2 +# CHECK-NEXT: 1 27 20.00 * sqrtsd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 subpd %xmm0, %xmm2 # CHECK-NEXT: 1 10 1.00 * subpd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 subsd %xmm0, %xmm2 @@ -683,7 +683,7 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: 65.00 65.00 0.50 0.50 0.50 0.50 - 74.08 39.58 69.25 77.08 - +# CHECK-NEXT: 65.00 65.00 0.50 0.50 0.50 0.50 - 74.08 39.58 69.25 153.08 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -935,10 +935,10 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - pxor (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - 0.50 0.50 - - shufpd $1, %xmm0, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - shufpd $1, (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - - - 1.00 - sqrtpd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - - - 1.00 - sqrtpd (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - - - 1.00 - sqrtsd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - - - 1.00 - sqrtsd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 20.00 - sqrtpd %xmm0, %xmm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - 20.00 - sqrtpd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 20.00 - sqrtsd %xmm0, %xmm2 +# CHECK-NEXT: 0.50 0.50 - - - - - - - - 20.00 - sqrtsd (%rax), %xmm2 # CHECK-NEXT: - - - - - - - 1.00 - - - - subpd %xmm0, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - subpd (%rax), %xmm2 # CHECK-NEXT: - - - - - - - 1.00 - - - - subsd %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-x87.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-x87.s index a10e6704f8af..9db502c6c4e1 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-x87.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-x87.s @@ -307,7 +307,7 @@ fyl2xp1 # CHECK-NEXT: 1 100 - * fscale # CHECK-NEXT: 1 100 - * fsin # CHECK-NEXT: 1 100 - * fsincos -# CHECK-NEXT: 1 20 1.00 * fsqrt +# CHECK-NEXT: 1 20 20.00 * fsqrt # CHECK-NEXT: 2 5 0.50 * fst %st(0) # CHECK-NEXT: 1 1 0.50 * * fsts (%edx) # CHECK-NEXT: 1 1 0.50 * * fstl (%ecx) @@ -371,7 +371,7 @@ fyl2xp1 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: 32.50 32.50 - - - - - 58.50 2.00 8.00 45.50 - +# CHECK-NEXT: 32.50 32.50 - - - - - 58.50 2.00 8.00 64.50 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -476,7 +476,7 @@ fyl2xp1 # CHECK-NEXT: - - - - - - - - - - - - fscale # CHECK-NEXT: - - - - - - - - - - - - fsin # CHECK-NEXT: - - - - - - - - - - - - fsincos -# CHECK-NEXT: - - - - - - - - - - 1.00 - fsqrt +# CHECK-NEXT: - - - - - - - - - - 20.00 - fsqrt # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - fst %st(0) # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - fsts (%edx) # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - fstl (%ecx)