[X86] Split WriteFRcp/WriteFRsqrt/WriteFSqrt schedule classes
WriteFRcp/WriteFRsqrt are split to support scalar, XMM and YMM/ZMM instructions. WriteFSqrt is split into single/double/long-double sizes and scalar, XMM, YMM and ZMM instructions. This removes all InstrRW overrides for these instructions. NOTE: There were a couple of typos in the Znver1 model - notably a 1cy throughput for SQRT that is highly unlikely and doesn't tally with Agner. NOTE: I had to add Agner's numbers for several targets for WriteFSqrt80. llvm-svn: 331629
This commit is contained in:
		
							parent
							
								
									3ae0c0e291
								
							
						
					
					
						commit
						f3ae50fca2
					
				| 
						 | 
				
			
			@ -8092,34 +8092,38 @@ multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
 | 
			
		||||
                                  X86SchedWriteWidths sched> {
 | 
			
		||||
  defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), sched.ZMM, v16f32_info>,
 | 
			
		||||
                                  X86SchedWriteSizes sched> {
 | 
			
		||||
  defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
 | 
			
		||||
                                sched.PS.ZMM, v16f32_info>,
 | 
			
		||||
                                EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
 | 
			
		||||
  defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), sched.ZMM, v8f64_info>,
 | 
			
		||||
  defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
 | 
			
		||||
                                sched.PD.ZMM, v8f64_info>,
 | 
			
		||||
                                EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
 | 
			
		||||
  // Define only if AVX512VL feature is present.
 | 
			
		||||
  let Predicates = [HasVLX] in {
 | 
			
		||||
    defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
 | 
			
		||||
                                     sched.XMM, v4f32x_info>,
 | 
			
		||||
                                     sched.PS.XMM, v4f32x_info>,
 | 
			
		||||
                                     EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
 | 
			
		||||
    defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
 | 
			
		||||
                                     sched.YMM, v8f32x_info>,
 | 
			
		||||
                                     sched.PS.YMM, v8f32x_info>,
 | 
			
		||||
                                     EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
 | 
			
		||||
    defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
 | 
			
		||||
                                     sched.XMM, v2f64x_info>,
 | 
			
		||||
                                     sched.PD.XMM, v2f64x_info>,
 | 
			
		||||
                                     EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
 | 
			
		||||
    defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
 | 
			
		||||
                                     sched.YMM, v4f64x_info>,
 | 
			
		||||
                                     sched.PD.YMM, v4f64x_info>,
 | 
			
		||||
                                     EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
 | 
			
		||||
                                        X86SchedWriteWidths sched> {
 | 
			
		||||
  defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), sched.ZMM,
 | 
			
		||||
                                v16f32_info>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
 | 
			
		||||
  defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), sched.ZMM,
 | 
			
		||||
                                v8f64_info>, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
 | 
			
		||||
                                        X86SchedWriteSizes sched> {
 | 
			
		||||
  defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
 | 
			
		||||
                                      sched.PS.ZMM, v16f32_info>,
 | 
			
		||||
                                      EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
 | 
			
		||||
  defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
 | 
			
		||||
                                      sched.PD.ZMM, v8f64_info>,
 | 
			
		||||
                                      EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
 | 
			
		||||
| 
						 | 
				
			
			@ -8182,20 +8186,20 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWri
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
 | 
			
		||||
                                  X86SchedWriteWidths sched> {
 | 
			
		||||
  defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.Scl, f32x_info, "SS",
 | 
			
		||||
                                  X86SchedWriteSizes sched> {
 | 
			
		||||
  defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, "SS",
 | 
			
		||||
                        int_x86_sse_sqrt_ss>,
 | 
			
		||||
                        EVEX_CD8<32, CD8VT1>, EVEX_4V, XS, NotMemoryFoldable;
 | 
			
		||||
  defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.Scl, f64x_info, "SD",
 | 
			
		||||
  defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, "SD",
 | 
			
		||||
                        int_x86_sse2_sqrt_sd>,
 | 
			
		||||
                        EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W,
 | 
			
		||||
                        NotMemoryFoldable;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
defm VSQRT   : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrt>,
 | 
			
		||||
               avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrt>;
 | 
			
		||||
defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
 | 
			
		||||
             avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
 | 
			
		||||
 | 
			
		||||
defm VSQRT   : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrt>, VEX_LIG;
 | 
			
		||||
defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
 | 
			
		||||
 | 
			
		||||
multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
 | 
			
		||||
                                  X86FoldableSchedWrite sched, X86VectorVTInfo _> {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -314,7 +314,7 @@ defm CHS : FPUnary<fneg, MRM_E0, "fchs">;
 | 
			
		|||
defm ABS : FPUnary<fabs, MRM_E1, "fabs">;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
let SchedRW = [WriteFSqrt] in
 | 
			
		||||
let SchedRW = [WriteFSqrt80] in
 | 
			
		||||
defm SQRT: FPUnary<fsqrt,MRM_FA, "fsqrt">;
 | 
			
		||||
 | 
			
		||||
let SchedRW = [WriteMicrocoded] in {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2935,8 +2935,8 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
 | 
			
		|||
// Square root.
 | 
			
		||||
defm SQRT  : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SchedWriteFSqrt, UseAVX>,
 | 
			
		||||
             sse1_fp_unop_p<0x51, "sqrt", fsqrt, SchedWriteFSqrt, [HasAVX, NoVLX]>,
 | 
			
		||||
             sse2_fp_unop_s<0x51, "sqrt", fsqrt, SchedWriteFSqrt, UseAVX>,
 | 
			
		||||
             sse2_fp_unop_p<0x51, "sqrt", fsqrt, SchedWriteFSqrt>;
 | 
			
		||||
             sse2_fp_unop_s<0x51, "sqrt", fsqrt, SchedWriteFSqrt64, UseAVX>,
 | 
			
		||||
             sse2_fp_unop_p<0x51, "sqrt", fsqrt, SchedWriteFSqrt64>;
 | 
			
		||||
 | 
			
		||||
// Reciprocal approximations. Note that these typically require refinement
 | 
			
		||||
// in order to obtain suitable precision.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -164,12 +164,27 @@ defm : BWWriteResPair<WriteFMul,   [BWPort01], 3, [1], 1, 5>; // Floating point
 | 
			
		|||
defm : BWWriteResPair<WriteFMulY,  [BWPort01], 3, [1], 1, 6>; // Floating point multiplication (YMM/ZMM).
 | 
			
		||||
defm : BWWriteResPair<WriteFDiv,   [BWPort0], 12, [1], 1, 5>; // 10-14 cycles. // Floating point division.
 | 
			
		||||
defm : BWWriteResPair<WriteFDivY,  [BWPort0], 12, [1], 1, 7>; // 10-14 cycles. // Floating point division (YMM/ZMM).
 | 
			
		||||
defm : BWWriteResPair<WriteFSqrt,  [BWPort0], 15, [1], 1, 5>; // Floating point square root.
 | 
			
		||||
defm : BWWriteResPair<WriteFSqrtY, [BWPort0], 15, [1], 1, 7>; // Floating point square root (YMM/ZMM).
 | 
			
		||||
 | 
			
		||||
defm : X86WriteRes<WriteFSqrt,       [BWPort0,BWFPDivider], 11, [1,4], 1>; // Floating point square root.
 | 
			
		||||
defm : X86WriteRes<WriteFSqrtLd,     [BWPort0,BWPort23,BWFPDivider], 16, [1,1,7], 2>;
 | 
			
		||||
defm : BWWriteResPair<WriteFSqrtX,   [BWPort0,BWFPDivider], 11, [1,7], 1, 5>; // Floating point square root (XMM).
 | 
			
		||||
defm : BWWriteResPair<WriteFSqrtY,   [BWPort0,BWPort015,BWFPDivider], 21, [2,1,14], 3, 6>; // Floating point square root (YMM).
 | 
			
		||||
defm : BWWriteResPair<WriteFSqrtZ,   [BWPort0,BWPort015,BWFPDivider], 21, [2,1,14], 3, 6>; // Floating point square root (ZMM).
 | 
			
		||||
defm : X86WriteRes<WriteFSqrt64,     [BWPort0,BWFPDivider], 16, [1,8], 1>; // Floating point double square root.
 | 
			
		||||
defm : X86WriteRes<WriteFSqrt64Ld,   [BWPort0,BWPort23,BWFPDivider], 21, [1,1,14], 2>;
 | 
			
		||||
defm : BWWriteResPair<WriteFSqrt64X, [BWPort0,BWFPDivider], 16, [1,14],1, 5>; // Floating point double square root (XMM).
 | 
			
		||||
defm : BWWriteResPair<WriteFSqrt64Y, [BWPort0,BWPort015,BWFPDivider], 29, [2,1,28], 3, 6>; // Floating point double square root (YMM).
 | 
			
		||||
defm : BWWriteResPair<WriteFSqrt64Z, [BWPort0,BWPort015,BWFPDivider], 29, [2,1,28], 3, 6>; // Floating point double square root (ZMM).
 | 
			
		||||
defm : BWWriteResPair<WriteFSqrt80,  [BWPort0,BWFPDivider], 23, [1,9]>; // Floating point long double square root.
 | 
			
		||||
 | 
			
		||||
defm : BWWriteResPair<WriteFRcp,   [BWPort0],  5, [1], 1, 5>; // Floating point reciprocal estimate.
 | 
			
		||||
defm : BWWriteResPair<WriteFRcpY,  [BWPort0],  5, [1], 1, 7>; // Floating point reciprocal estimate (YMM/ZMM).
 | 
			
		||||
defm : BWWriteResPair<WriteFRcpX,  [BWPort0],  5, [1], 1, 5>; // Floating point reciprocal estimate (XMM).
 | 
			
		||||
defm : BWWriteResPair<WriteFRcpY,  [BWPort0,BWPort015], 11, [2,1], 3, 6>; // Floating point reciprocal estimate (YMM/ZMM).
 | 
			
		||||
 | 
			
		||||
defm : BWWriteResPair<WriteFRsqrt, [BWPort0],  5, [1], 1, 5>; // Floating point reciprocal square root estimate.
 | 
			
		||||
defm : BWWriteResPair<WriteFRsqrtY,[BWPort0],  5, [1], 1, 7>; // Floating point reciprocal square root estimate (YMM/ZMM).
 | 
			
		||||
defm : BWWriteResPair<WriteFRsqrtX,[BWPort0],  5, [1], 1, 5>; // Floating point reciprocal square root estimate (XMM).
 | 
			
		||||
defm : BWWriteResPair<WriteFRsqrtY,[BWPort0,BWPort015], 11, [2,1], 3, 6>; // Floating point reciprocal square root estimate (YMM/ZMM).
 | 
			
		||||
 | 
			
		||||
defm : BWWriteResPair<WriteFMA,    [BWPort01], 5, [1], 1, 5>; // Fused Multiply Add.
 | 
			
		||||
defm : BWWriteResPair<WriteFMAX,   [BWPort01], 5, [1], 1, 5>; // Fused Multiply Add (XMM).
 | 
			
		||||
defm : BWWriteResPair<WriteFMAY,   [BWPort01], 5, [1], 1, 6>; // Fused Multiply Add (YMM/ZMM).
 | 
			
		||||
| 
						 | 
				
			
			@ -1401,14 +1416,6 @@ def BWWriteResGroup123 : SchedWriteRes<[BWPort0,BWPort23]> {
 | 
			
		|||
def: InstRW<[BWWriteResGroup123], (instregex "MUL_F(32|64)m",
 | 
			
		||||
                                             "VPCMPGTQYrm")>;
 | 
			
		||||
 | 
			
		||||
def BWWriteResGroup126 : SchedWriteRes<[BWPort0,BWPort015]> {
 | 
			
		||||
  let Latency = 11;
 | 
			
		||||
  let NumMicroOps = 3;
 | 
			
		||||
  let ResourceCycles = [2,1];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[BWWriteResGroup126], (instregex "VRCPPSYr",
 | 
			
		||||
                                             "VRSQRTPSYr")>;
 | 
			
		||||
 | 
			
		||||
def BWWriteResGroup128 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> {
 | 
			
		||||
  let Latency = 11;
 | 
			
		||||
  let NumMicroOps = 3;
 | 
			
		||||
| 
						 | 
				
			
			@ -1454,20 +1461,6 @@ def BWWriteResGroup135 : SchedWriteRes<[BWPort1,BWPort23]> {
 | 
			
		|||
}
 | 
			
		||||
def: InstRW<[BWWriteResGroup135], (instregex "(ADD|SUB|SUBR)_FI(16|32)m")>;
 | 
			
		||||
 | 
			
		||||
def BWWriteResGroup137 : SchedWriteRes<[BWPort0,BWFPDivider]> {
 | 
			
		||||
  let Latency = 11;
 | 
			
		||||
  let NumMicroOps = 1;
 | 
			
		||||
  let ResourceCycles = [1,7];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[BWWriteResGroup137], (instregex "(V?)SQRTPSr")>;
 | 
			
		||||
 | 
			
		||||
def BWWriteResGroup137_1 : SchedWriteRes<[BWPort0,BWFPDivider]> {
 | 
			
		||||
  let Latency = 11;
 | 
			
		||||
  let NumMicroOps = 1;
 | 
			
		||||
  let ResourceCycles = [1,4];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[BWWriteResGroup137_1], (instregex "(V?)SQRTSSr")>;
 | 
			
		||||
 | 
			
		||||
def BWWriteResGroup139 : SchedWriteRes<[BWPort0,BWFPDivider]> {
 | 
			
		||||
  let Latency = 14;
 | 
			
		||||
  let NumMicroOps = 1;
 | 
			
		||||
| 
						 | 
				
			
			@ -1555,22 +1548,6 @@ def BWWriteResGroup155 : SchedWriteRes<[BWPort0,BWPort015,BWFPDivider]> {
 | 
			
		|||
}
 | 
			
		||||
def: InstRW<[BWWriteResGroup155], (instregex "VDIVPSYrr")>;
 | 
			
		||||
 | 
			
		||||
def BWWriteResGroup156 : SchedWriteRes<[BWPort0,BWPort23,BWPort015]> {
 | 
			
		||||
  let Latency = 17;
 | 
			
		||||
  let NumMicroOps = 4;
 | 
			
		||||
  let ResourceCycles = [2,1,1];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[BWWriteResGroup156], (instregex "VRCPPSYm",
 | 
			
		||||
                                             "VRSQRTPSYm")>;
 | 
			
		||||
 | 
			
		||||
def BWWriteResGroup157 : SchedWriteRes<[BWPort0,BWPort23,BWFPDivider]> {
 | 
			
		||||
  let Latency = 16;
 | 
			
		||||
  let NumMicroOps = 2;
 | 
			
		||||
  let ResourceCycles = [1,1,7];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[BWWriteResGroup157], (instregex "(V?)SQRTPSm",
 | 
			
		||||
                                             "(V?)SQRTSSm")>;
 | 
			
		||||
 | 
			
		||||
def BWWriteResGroup159 : SchedWriteRes<[BWPort5,BWPort6,BWPort06,BWPort0156]> {
 | 
			
		||||
  let Latency = 18;
 | 
			
		||||
  let NumMicroOps = 8;
 | 
			
		||||
| 
						 | 
				
			
			@ -1610,20 +1587,6 @@ def BWWriteResGroup167 : SchedWriteRes<[BWPort4,BWPort5,BWPort6,BWPort23,BWPort2
 | 
			
		|||
}
 | 
			
		||||
def: InstRW<[BWWriteResGroup167], (instrs INSB, INSL, INSW)>;
 | 
			
		||||
 | 
			
		||||
def BWWriteResGroup168 : SchedWriteRes<[BWPort0,BWFPDivider]> {
 | 
			
		||||
  let Latency = 16;
 | 
			
		||||
  let NumMicroOps = 1;
 | 
			
		||||
  let ResourceCycles = [1,14];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[BWWriteResGroup168], (instregex "(V?)SQRTPDr")>;
 | 
			
		||||
 | 
			
		||||
def BWWriteResGroup168_1 : SchedWriteRes<[BWPort0,BWFPDivider]> {
 | 
			
		||||
  let Latency = 16;
 | 
			
		||||
  let NumMicroOps = 1;
 | 
			
		||||
  let ResourceCycles = [1,8];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[BWWriteResGroup168_1], (instregex "(V?)SQRTSDr")>;
 | 
			
		||||
 | 
			
		||||
def BWWriteResGroup169 : SchedWriteRes<[BWPort0,BWPort23]> {
 | 
			
		||||
  let Latency = 21;
 | 
			
		||||
  let NumMicroOps = 2;
 | 
			
		||||
| 
						 | 
				
			
			@ -1631,13 +1594,6 @@ def BWWriteResGroup169 : SchedWriteRes<[BWPort0,BWPort23]> {
 | 
			
		|||
}
 | 
			
		||||
def: InstRW<[BWWriteResGroup169], (instregex "DIV_F(32|64)m")>;
 | 
			
		||||
 | 
			
		||||
def BWWriteResGroup170 : SchedWriteRes<[BWPort0,BWPort015,BWFPDivider]> {
 | 
			
		||||
  let Latency = 21;
 | 
			
		||||
  let NumMicroOps = 3;
 | 
			
		||||
  let ResourceCycles = [2,1,14];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[BWWriteResGroup170], (instregex "VSQRTPSYr")>;
 | 
			
		||||
 | 
			
		||||
def BWWriteResGroup171 : SchedWriteRes<[BWPort0,BWPort4,BWPort5,BWPort23,BWPort237,BWPort06,BWPort0156]> {
 | 
			
		||||
  let Latency = 21;
 | 
			
		||||
  let NumMicroOps = 19;
 | 
			
		||||
| 
						 | 
				
			
			@ -1680,14 +1636,6 @@ def BWWriteResGroup177 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> {
 | 
			
		|||
}
 | 
			
		||||
def: InstRW<[BWWriteResGroup177], (instregex "DIV_FI(16|32)m")>;
 | 
			
		||||
 | 
			
		||||
def BWWriteResGroup179 : SchedWriteRes<[BWPort0,BWPort23,BWFPDivider]> {
 | 
			
		||||
  let Latency = 21;
 | 
			
		||||
  let NumMicroOps = 2;
 | 
			
		||||
  let ResourceCycles = [1,1,14];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[BWWriteResGroup179], (instregex "(V?)SQRTPDm",
 | 
			
		||||
                                             "(V?)SQRTSDm")>;
 | 
			
		||||
 | 
			
		||||
def BWWriteResGroup180 : SchedWriteRes<[BWPort0,BWPort23]> {
 | 
			
		||||
  let Latency = 26;
 | 
			
		||||
  let NumMicroOps = 2;
 | 
			
		||||
| 
						 | 
				
			
			@ -1695,13 +1643,6 @@ def BWWriteResGroup180 : SchedWriteRes<[BWPort0,BWPort23]> {
 | 
			
		|||
}
 | 
			
		||||
def: InstRW<[BWWriteResGroup180], (instregex "DIVR_F(32|64)m")>;
 | 
			
		||||
 | 
			
		||||
def BWWriteResGroup181 : SchedWriteRes<[BWPort0,BWPort23,BWPort015,BWFPDivider]> {
 | 
			
		||||
  let Latency = 27;
 | 
			
		||||
  let NumMicroOps = 4;
 | 
			
		||||
  let ResourceCycles = [2,1,1,14];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[BWWriteResGroup181], (instregex "VSQRTPSYm")>;
 | 
			
		||||
 | 
			
		||||
def BWWriteResGroup182 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> {
 | 
			
		||||
  let Latency = 29;
 | 
			
		||||
  let NumMicroOps = 3;
 | 
			
		||||
| 
						 | 
				
			
			@ -1780,13 +1721,6 @@ def BWWriteResGroup186 : SchedWriteRes<[BWPort4,BWPort6,BWPort23,BWPort237,BWPor
 | 
			
		|||
}
 | 
			
		||||
def: InstRW<[BWWriteResGroup186], (instregex "^XSAVE$", "XSAVEC", "XSAVES", "XSAVEOPT")>;
 | 
			
		||||
 | 
			
		||||
def BWWriteResGroup189 : SchedWriteRes<[BWPort0,BWPort015,BWFPDivider]> {
 | 
			
		||||
  let Latency = 29;
 | 
			
		||||
  let NumMicroOps = 3;
 | 
			
		||||
  let ResourceCycles = [2,1,28];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[BWWriteResGroup189], (instregex "VSQRTPDYr")>;
 | 
			
		||||
 | 
			
		||||
def BWWriteResGroup190 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156]> {
 | 
			
		||||
  let Latency = 34;
 | 
			
		||||
  let NumMicroOps = 8;
 | 
			
		||||
| 
						 | 
				
			
			@ -1817,13 +1751,6 @@ def BWWriteResGroup194 : SchedWriteRes<[BWPort5,BWPort6,BWPort23,BWPort237,BWPor
 | 
			
		|||
def: InstRW<[BWWriteResGroup194], (instregex "OUT(8|16|32)ir",
 | 
			
		||||
                                             "OUT(8|16|32)rr")>;
 | 
			
		||||
 | 
			
		||||
def BWWriteResGroup195 : SchedWriteRes<[BWPort0,BWPort23,BWPort015,BWFPDivider]> {
 | 
			
		||||
  let Latency = 35;
 | 
			
		||||
  let NumMicroOps = 4;
 | 
			
		||||
  let ResourceCycles = [2,1,1,28];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[BWWriteResGroup195], (instregex "VSQRTPDYm")>;
 | 
			
		||||
 | 
			
		||||
def BWWriteResGroup196 : SchedWriteRes<[BWPort5,BWPort0156]> {
 | 
			
		||||
  let Latency = 42;
 | 
			
		||||
  let NumMicroOps = 22;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -158,12 +158,25 @@ defm : HWWriteResPair<WriteFMul,  [HWPort01],  5, [1], 1, 6>;
 | 
			
		|||
defm : HWWriteResPair<WriteFMulY, [HWPort01],  5, [1], 1, 7>;
 | 
			
		||||
defm : HWWriteResPair<WriteFDiv,   [HWPort0], 12, [1], 1, 5>; // 10-14 cycles.
 | 
			
		||||
defm : HWWriteResPair<WriteFDivY,  [HWPort0], 12, [1], 1, 7>; // 10-14 cycles.
 | 
			
		||||
 | 
			
		||||
defm : HWWriteResPair<WriteFRcp,   [HWPort0],  5, [1], 1, 5>;
 | 
			
		||||
defm : HWWriteResPair<WriteFRcpY,  [HWPort0],  5, [1], 1, 7>;
 | 
			
		||||
defm : HWWriteResPair<WriteFRcpX,  [HWPort0],  5, [1], 1, 6>;
 | 
			
		||||
defm : HWWriteResPair<WriteFRcpY,  [HWPort0,HWPort015], 11, [2,1], 3, 7>;
 | 
			
		||||
 | 
			
		||||
defm : HWWriteResPair<WriteFRsqrt, [HWPort0],  5, [1], 1, 5>;
 | 
			
		||||
defm : HWWriteResPair<WriteFRsqrtY,[HWPort0],  5, [1], 1, 7>;
 | 
			
		||||
defm : HWWriteResPair<WriteFSqrt,  [HWPort0], 15, [1], 1, 5>;
 | 
			
		||||
defm : HWWriteResPair<WriteFSqrtY, [HWPort0], 15, [1], 1, 7>;
 | 
			
		||||
defm : HWWriteResPair<WriteFRsqrtX,[HWPort0],  5, [1], 1, 6>;
 | 
			
		||||
defm : HWWriteResPair<WriteFRsqrtY,[HWPort0,HWPort015], 11, [2,1], 3, 7>;
 | 
			
		||||
 | 
			
		||||
defm : HWWriteResPair<WriteFSqrt,    [HWPort0,HWFPDivider], 11, [1,7], 1, 5>;
 | 
			
		||||
defm : HWWriteResPair<WriteFSqrtX,   [HWPort0,HWFPDivider], 11, [1,7], 1, 6>;
 | 
			
		||||
defm : HWWriteResPair<WriteFSqrtY,   [HWPort0,HWPort15,HWFPDivider], 21, [2,1,14], 3, 7>;
 | 
			
		||||
defm : HWWriteResPair<WriteFSqrtZ,   [HWPort0,HWPort15,HWFPDivider], 21, [2,1,14], 3, 7>;
 | 
			
		||||
defm : HWWriteResPair<WriteFSqrt64,  [HWPort0,HWFPDivider], 16, [1,14], 1, 5>;
 | 
			
		||||
defm : HWWriteResPair<WriteFSqrt64X, [HWPort0,HWFPDivider], 16, [1,14], 1, 6>;
 | 
			
		||||
defm : HWWriteResPair<WriteFSqrt64Y, [HWPort0,HWPort15,HWFPDivider], 35, [2,1,28], 3, 7>;
 | 
			
		||||
defm : HWWriteResPair<WriteFSqrt64Z, [HWPort0,HWPort15,HWFPDivider], 35, [2,1,28], 3, 7>;
 | 
			
		||||
defm : HWWriteResPair<WriteFSqrt80,  [HWPort0,HWFPDivider], 23, [1,17]>;
 | 
			
		||||
 | 
			
		||||
defm : HWWriteResPair<WriteCvtF2I, [HWPort1], 3>;
 | 
			
		||||
defm : HWWriteResPair<WriteCvtI2F, [HWPort1], 4>;
 | 
			
		||||
defm : HWWriteResPair<WriteCvtF2F, [HWPort1], 3>;
 | 
			
		||||
| 
						 | 
				
			
			@ -1639,13 +1652,6 @@ def: InstRW<[HWWriteResGroup89], (instregex "(V?)PCMPGTQ(Y?)rr",
 | 
			
		|||
                                            "MUL_FST0r",
 | 
			
		||||
                                            "MUL_FrST0")>;
 | 
			
		||||
 | 
			
		||||
def HWWriteResGroup91_1 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> {
 | 
			
		||||
  let Latency = 16;
 | 
			
		||||
  let NumMicroOps = 2;
 | 
			
		||||
  let ResourceCycles = [1,1,7];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[HWWriteResGroup91_1], (instregex "(V?)SQRTSSm")>;
 | 
			
		||||
 | 
			
		||||
def HWWriteResGroup91_4 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> {
 | 
			
		||||
  let Latency = 18;
 | 
			
		||||
  let NumMicroOps = 2;
 | 
			
		||||
| 
						 | 
				
			
			@ -1658,9 +1664,7 @@ def HWWriteResGroup91_2 : SchedWriteRes<[HWPort0,HWPort23]> {
 | 
			
		|||
  let NumMicroOps = 2;
 | 
			
		||||
  let ResourceCycles = [1,1];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[HWWriteResGroup91_2], (instregex "(V?)PCMPGTQrm",
 | 
			
		||||
                                              "(V?)RCPPSm",
 | 
			
		||||
                                              "(V?)RSQRTPSm")>;
 | 
			
		||||
def: InstRW<[HWWriteResGroup91_2], (instregex "(V?)PCMPGTQrm")>;
 | 
			
		||||
 | 
			
		||||
def HWWriteResGroup91_3 : SchedWriteRes<[HWPort0,HWPort23]> {
 | 
			
		||||
  let Latency = 12;
 | 
			
		||||
| 
						 | 
				
			
			@ -1832,22 +1836,6 @@ def HWWriteResGroup121 : SchedWriteRes<[HWPort0,HWFPDivider]> {
 | 
			
		|||
def: InstRW<[HWWriteResGroup121], (instregex "(V?)DIVPSrr",
 | 
			
		||||
                                             "(V?)DIVSSrr")>;
 | 
			
		||||
 | 
			
		||||
def HWWriteResGroup125 : SchedWriteRes<[HWPort0,HWPort015]> {
 | 
			
		||||
  let Latency = 11;
 | 
			
		||||
  let NumMicroOps = 3;
 | 
			
		||||
  let ResourceCycles = [2,1];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[HWWriteResGroup125], (instregex "VRCPPSYr",
 | 
			
		||||
                                             "VRSQRTPSYr")>;
 | 
			
		||||
 | 
			
		||||
def HWWriteResGroup128 : SchedWriteRes<[HWPort0,HWPort23,HWPort015]> {
 | 
			
		||||
  let Latency = 18;
 | 
			
		||||
  let NumMicroOps = 4;
 | 
			
		||||
  let ResourceCycles = [2,1,1];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[HWWriteResGroup128], (instregex "VRCPPSYm",
 | 
			
		||||
                                             "VRSQRTPSYm")>;
 | 
			
		||||
 | 
			
		||||
def HWWriteResGroup129 : SchedWriteRes<[HWPort1,HWPort06,HWPort0156]> {
 | 
			
		||||
  let Latency = 11;
 | 
			
		||||
  let NumMicroOps = 7;
 | 
			
		||||
| 
						 | 
				
			
			@ -1877,14 +1865,6 @@ def HWWriteResGroup132 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort06,HWPo
 | 
			
		|||
}
 | 
			
		||||
def: InstRW<[HWWriteResGroup132], (instregex "CMPXCHG8B")>;
 | 
			
		||||
 | 
			
		||||
def HWWriteResGroup133 : SchedWriteRes<[HWPort0,HWFPDivider]> {
 | 
			
		||||
  let Latency = 11;
 | 
			
		||||
  let NumMicroOps = 1;
 | 
			
		||||
  let ResourceCycles = [1,7];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[HWWriteResGroup133], (instregex "(V?)SQRTPSr",
 | 
			
		||||
                                             "(V?)SQRTSSr")>;
 | 
			
		||||
 | 
			
		||||
def HWWriteResGroup134 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> {
 | 
			
		||||
  let Latency = 19;
 | 
			
		||||
  let NumMicroOps = 2;
 | 
			
		||||
| 
						 | 
				
			
			@ -1899,13 +1879,6 @@ def HWWriteResGroup135 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort06,HWPo
 | 
			
		|||
}
 | 
			
		||||
def: InstRW<[HWWriteResGroup135], (instregex "RCR(8|16|32|64)mCL")>;
 | 
			
		||||
 | 
			
		||||
def HWWriteResGroup138 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> {
 | 
			
		||||
  let Latency = 17;
 | 
			
		||||
  let NumMicroOps = 2;
 | 
			
		||||
  let ResourceCycles = [1,1,7];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[HWWriteResGroup138], (instregex "(V?)SQRTPSm")>;
 | 
			
		||||
 | 
			
		||||
def HWWriteResGroup142 : SchedWriteRes<[HWPort1,HWPort06,HWPort15,HWPort0156]> {
 | 
			
		||||
  let Latency = 14;
 | 
			
		||||
  let NumMicroOps = 10;
 | 
			
		||||
| 
						 | 
				
			
			@ -1994,20 +1967,6 @@ def HWWriteResGroup155_1 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> {
 | 
			
		|||
}
 | 
			
		||||
def: InstRW<[HWWriteResGroup155_1], (instregex "(V?)DIVPDrm")>;
 | 
			
		||||
 | 
			
		||||
def HWWriteResGroup155_2 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> {
 | 
			
		||||
  let Latency = 21;
 | 
			
		||||
  let NumMicroOps = 2;
 | 
			
		||||
  let ResourceCycles = [1,1,14];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[HWWriteResGroup155_2], (instregex "(V?)SQRTSDm")>;
 | 
			
		||||
 | 
			
		||||
def HWWriteResGroup155_3 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> {
 | 
			
		||||
  let Latency = 22;
 | 
			
		||||
  let NumMicroOps = 2;
 | 
			
		||||
  let ResourceCycles = [1,1,14];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[HWWriteResGroup155_3], (instregex "(V?)SQRTPDm")>;
 | 
			
		||||
 | 
			
		||||
def HWWriteResGroup155_4 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> {
 | 
			
		||||
  let Latency = 25;
 | 
			
		||||
  let NumMicroOps = 2;
 | 
			
		||||
| 
						 | 
				
			
			@ -2022,29 +1981,19 @@ def HWWriteResGroup156 : SchedWriteRes<[HWPort5,HWPort6,HWPort0156]> {
 | 
			
		|||
}
 | 
			
		||||
def: InstRW<[HWWriteResGroup156], (instregex "MWAITrr")>;
 | 
			
		||||
 | 
			
		||||
def HWWriteResGroup157 : SchedWriteRes<[HWPort0,HWFPDivider]> {
 | 
			
		||||
  let Latency = 16;
 | 
			
		||||
  let NumMicroOps = 1;
 | 
			
		||||
  let ResourceCycles = [1,14];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[HWWriteResGroup157], (instregex "(V?)SQRTPDr",
 | 
			
		||||
                                             "(V?)SQRTSDr")>;
 | 
			
		||||
 | 
			
		||||
def HWWriteResGroup159 : SchedWriteRes<[HWPort0,HWPort15,HWFPDivider]> {
 | 
			
		||||
  let Latency = 21;
 | 
			
		||||
  let NumMicroOps = 3;
 | 
			
		||||
  let ResourceCycles = [2,1,14];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[HWWriteResGroup159], (instregex "VDIVPSYrr",
 | 
			
		||||
                                             "VSQRTPSYr")>;
 | 
			
		||||
def: InstRW<[HWWriteResGroup159], (instregex "VDIVPSYrr")>;
 | 
			
		||||
 | 
			
		||||
def HWWriteResGroup160 : SchedWriteRes<[HWPort0,HWPort23,HWPort15,HWFPDivider]> {
 | 
			
		||||
  let Latency = 28;
 | 
			
		||||
  let NumMicroOps = 4;
 | 
			
		||||
  let ResourceCycles = [2,1,1,14];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[HWWriteResGroup160], (instregex "VDIVPSYrm",
 | 
			
		||||
                                             "VSQRTPSYm")>;
 | 
			
		||||
def: InstRW<[HWWriteResGroup160], (instregex "VDIVPSYrm")>;
 | 
			
		||||
 | 
			
		||||
def HWWriteResGroup161 : SchedWriteRes<[HWPort0,HWPort1,HWPort23]> {
 | 
			
		||||
  let Latency = 30;
 | 
			
		||||
| 
						 | 
				
			
			@ -2111,16 +2060,14 @@ def HWWriteResGroup173 : SchedWriteRes<[HWPort0,HWPort15,HWFPDivider]> {
 | 
			
		|||
  let NumMicroOps = 3;
 | 
			
		||||
  let ResourceCycles = [2,1,28];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[HWWriteResGroup173], (instregex "VDIVPDYrr",
 | 
			
		||||
                                             "VSQRTPDYr")>;
 | 
			
		||||
def: InstRW<[HWWriteResGroup173], (instregex "VDIVPDYrr")>;
 | 
			
		||||
 | 
			
		||||
def HWWriteResGroup174 : SchedWriteRes<[HWPort0,HWPort23,HWPort15,HWFPDivider]> {
 | 
			
		||||
  let Latency = 42;
 | 
			
		||||
  let NumMicroOps = 4;
 | 
			
		||||
  let ResourceCycles = [2,1,1,28];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[HWWriteResGroup174], (instregex "VDIVPDYrm",
 | 
			
		||||
                                             "VSQRTPDYm")>;
 | 
			
		||||
def: InstRW<[HWWriteResGroup174], (instregex "VDIVPDYrm")>;
 | 
			
		||||
 | 
			
		||||
def HWWriteResGroup175 : SchedWriteRes<[HWPort1,HWPort4,HWPort5,HWPort6,HWPort23,HWPort237,HWPort15,HWPort0156]> {
 | 
			
		||||
  let Latency = 41;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -148,12 +148,25 @@ defm : SBWriteResPair<WriteFMul,   [SBPort0],  5, [1], 1, 6>;
 | 
			
		|||
defm : SBWriteResPair<WriteFMulY,  [SBPort0],  5, [1], 1, 7>;
 | 
			
		||||
defm : SBWriteResPair<WriteFDiv,   [SBPort0], 24, [1], 1, 5>;
 | 
			
		||||
defm : SBWriteResPair<WriteFDivY,  [SBPort0], 24, [1], 1, 7>;
 | 
			
		||||
 | 
			
		||||
defm : SBWriteResPair<WriteFRcp,   [SBPort0],  5, [1], 1, 6>;
 | 
			
		||||
defm : SBWriteResPair<WriteFRcpY,  [SBPort0],  5, [1], 1, 7>;
 | 
			
		||||
defm : SBWriteResPair<WriteFRcpX,  [SBPort0],  5, [1], 1, 6>;
 | 
			
		||||
defm : SBWriteResPair<WriteFRcpY,  [SBPort0,SBPort05],  7, [2,1], 3, 7>;
 | 
			
		||||
 | 
			
		||||
defm : SBWriteResPair<WriteFRsqrt, [SBPort0],  5, [1], 1, 6>;
 | 
			
		||||
defm : SBWriteResPair<WriteFRsqrtY,[SBPort0],  5, [1], 1, 7>;
 | 
			
		||||
defm : SBWriteResPair<WriteFSqrt,  [SBPort0], 14, [1], 1, 5>;
 | 
			
		||||
defm : SBWriteResPair<WriteFSqrtY, [SBPort0], 14, [1], 1, 7>;
 | 
			
		||||
defm : SBWriteResPair<WriteFRsqrtX,[SBPort0],  5, [1], 1, 6>;
 | 
			
		||||
defm : SBWriteResPair<WriteFRsqrtY,[SBPort0,SBPort05],  7, [2,1], 3, 7>;
 | 
			
		||||
 | 
			
		||||
defm : SBWriteResPair<WriteFSqrt,    [SBPort0,SBFPDivider], 14, [1,14], 1, 6>;
 | 
			
		||||
defm : SBWriteResPair<WriteFSqrtX,   [SBPort0,SBFPDivider], 14, [1,14], 1, 6>;
 | 
			
		||||
defm : SBWriteResPair<WriteFSqrtY,   [SBPort0,SBPort05,SBFPDivider], 29, [2,1,28], 3, 7>;
 | 
			
		||||
defm : SBWriteResPair<WriteFSqrtZ,   [SBPort0,SBPort05,SBFPDivider], 29, [2,1,28], 3, 7>;
 | 
			
		||||
defm : SBWriteResPair<WriteFSqrt64,  [SBPort0,SBFPDivider], 21, [1,21], 1, 6>;
 | 
			
		||||
defm : SBWriteResPair<WriteFSqrt64X, [SBPort0,SBFPDivider], 21, [1,21], 1, 6>;
 | 
			
		||||
defm : SBWriteResPair<WriteFSqrt64Y, [SBPort0,SBPort05,SBFPDivider], 45, [2,1,44], 3, 7>;
 | 
			
		||||
defm : SBWriteResPair<WriteFSqrt64Z, [SBPort0,SBPort05,SBFPDivider], 45, [2,1,44], 3, 7>;
 | 
			
		||||
defm : SBWriteResPair<WriteFSqrt80,  [SBPort0,SBFPDivider], 24, [1,24], 1, 6>;
 | 
			
		||||
 | 
			
		||||
defm : SBWriteResPair<WriteDPPD,   [SBPort0,SBPort1,SBPort5],  9, [1,1,1], 3, 6>;
 | 
			
		||||
defm : SBWriteResPair<WriteDPPS,   [SBPort0,SBPort1,SBPort5], 12, [1,2,1], 4, 6>;
 | 
			
		||||
defm : SBWriteResPair<WriteDPPSY,  [SBPort0,SBPort1,SBPort5], 12, [1,2,1], 4, 7>;
 | 
			
		||||
| 
						 | 
				
			
			@ -951,14 +964,6 @@ def: InstRW<[SBWriteResGroup59a], (instregex "MMX_PADD(B|D|W)irm",
 | 
			
		|||
                                             "MMX_P(MAX|MIN)(SW|UB)irm",
 | 
			
		||||
                                             "MMX_PSUB(B|D|Q|W)irm")>;
 | 
			
		||||
 | 
			
		||||
def SBWriteResGroup61 : SchedWriteRes<[SBPort0,SBPort05]> {
 | 
			
		||||
  let Latency = 7;
 | 
			
		||||
  let NumMicroOps = 3;
 | 
			
		||||
  let ResourceCycles = [2,1];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SBWriteResGroup61], (instregex "VRCPPSYr",
 | 
			
		||||
                                            "VRSQRTPSYr")>;
 | 
			
		||||
 | 
			
		||||
def SBWriteResGroup62 : SchedWriteRes<[SBPort5,SBPort23]> {
 | 
			
		||||
  let Latency = 7;
 | 
			
		||||
  let NumMicroOps = 3;
 | 
			
		||||
| 
						 | 
				
			
			@ -1361,18 +1366,8 @@ def SBWriteResGroup116 : SchedWriteRes<[SBPort0,SBFPDivider]> {
 | 
			
		|||
  let NumMicroOps = 1;
 | 
			
		||||
  let ResourceCycles = [1,14];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SBWriteResGroup116], (instregex "(V?)SQRTSSr",
 | 
			
		||||
                                             "(V?)DIVPSrr",
 | 
			
		||||
                                             "(V?)DIVSSrr",
 | 
			
		||||
                                             "(V?)SQRTPSr")>;
 | 
			
		||||
 | 
			
		||||
def SBWriteResGroup118 : SchedWriteRes<[SBPort0,SBPort23,SBPort05]> {
 | 
			
		||||
  let Latency = 14;
 | 
			
		||||
  let NumMicroOps = 4;
 | 
			
		||||
  let ResourceCycles = [2,1,1];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SBWriteResGroup118], (instregex "VRCPPSYm",
 | 
			
		||||
                                             "VRSQRTPSYm")>;
 | 
			
		||||
def: InstRW<[SBWriteResGroup116], (instregex "(V?)DIVPSrr",
 | 
			
		||||
                                             "(V?)DIVSSrr")>;
 | 
			
		||||
 | 
			
		||||
def SBWriteResGroup119 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
 | 
			
		||||
  let Latency = 15;
 | 
			
		||||
| 
						 | 
				
			
			@ -1386,26 +1381,8 @@ def SBWriteResGroup123 : SchedWriteRes<[SBPort0,SBPort23,SBFPDivider]> {
 | 
			
		|||
  let NumMicroOps = 2;
 | 
			
		||||
  let ResourceCycles = [1,1,14];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SBWriteResGroup123], (instregex "(V?)SQRTSSm",
 | 
			
		||||
                                             "(V?)DIVPSrm",
 | 
			
		||||
                                             "(V?)DIVSSrm",
 | 
			
		||||
                                             "(V?)SQRTPSm")>;
 | 
			
		||||
 | 
			
		||||
def SBWriteResGroup124 : SchedWriteRes<[SBPort0,SBFPDivider]> {
 | 
			
		||||
  let Latency = 21;
 | 
			
		||||
  let NumMicroOps = 1;
 | 
			
		||||
  let ResourceCycles = [1,21];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SBWriteResGroup124], (instregex "(V?)SQRTPDr",
 | 
			
		||||
                                             "(V?)SQRTSDr")>;
 | 
			
		||||
 | 
			
		||||
def SBWriteResGroup125 : SchedWriteRes<[SBPort0,SBPort23,SBFPDivider]> {
 | 
			
		||||
  let Latency = 27;
 | 
			
		||||
  let NumMicroOps = 2;
 | 
			
		||||
  let ResourceCycles = [1,1,21];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SBWriteResGroup125], (instregex "(V?)SQRTPDm",
 | 
			
		||||
                                             "(V?)SQRTSDm")>;
 | 
			
		||||
def: InstRW<[SBWriteResGroup123], (instregex "(V?)DIVPSrm",
 | 
			
		||||
                                             "(V?)DIVSSrm")>;
 | 
			
		||||
 | 
			
		||||
def SBWriteResGroup126 : SchedWriteRes<[SBPort0,SBFPDivider]> {
 | 
			
		||||
  let Latency = 22;
 | 
			
		||||
| 
						 | 
				
			
			@ -1428,8 +1405,7 @@ def SBWriteResGroup129 : SchedWriteRes<[SBPort0,SBPort05,SBFPDivider]> {
 | 
			
		|||
  let NumMicroOps = 3;
 | 
			
		||||
  let ResourceCycles = [2,1,28];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SBWriteResGroup129], (instregex "VDIVPSYrr",
 | 
			
		||||
                                             "VSQRTPSYr")>;
 | 
			
		||||
def: InstRW<[SBWriteResGroup129], (instregex "VDIVPSYrr")>;
 | 
			
		||||
 | 
			
		||||
def SBWriteResGroup130 : SchedWriteRes<[SBPort0,SBPort23]> {
 | 
			
		||||
  let Latency = 31;
 | 
			
		||||
| 
						 | 
				
			
			@ -1450,23 +1426,20 @@ def SBWriteResGroup132 : SchedWriteRes<[SBPort0,SBPort23,SBPort05,SBFPDivider]>
 | 
			
		|||
  let NumMicroOps = 4;
 | 
			
		||||
  let ResourceCycles = [2,1,1,28];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SBWriteResGroup132], (instregex "VDIVPSYrm",
 | 
			
		||||
                                             "VSQRTPSYm")>;
 | 
			
		||||
def: InstRW<[SBWriteResGroup132], (instregex "VDIVPSYrm")>;
 | 
			
		||||
 | 
			
		||||
def SBWriteResGroup133 : SchedWriteRes<[SBPort0,SBPort05,SBFPDivider]> {
 | 
			
		||||
  let Latency = 45;
 | 
			
		||||
  let NumMicroOps = 3;
 | 
			
		||||
  let ResourceCycles = [2,1,44];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SBWriteResGroup133], (instregex "VDIVPDYrr",
 | 
			
		||||
                                             "VSQRTPDYr")>;
 | 
			
		||||
def: InstRW<[SBWriteResGroup133], (instregex "VDIVPDYrr")>;
 | 
			
		||||
 | 
			
		||||
def SBWriteResGroup134 : SchedWriteRes<[SBPort0,SBPort23,SBPort05,SBFPDivider]> {
 | 
			
		||||
  let Latency = 52;
 | 
			
		||||
  let NumMicroOps = 4;
 | 
			
		||||
  let ResourceCycles = [2,1,1,44];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SBWriteResGroup134], (instregex "VDIVPDYrm",
 | 
			
		||||
                                             "VSQRTPDYm")>;
 | 
			
		||||
def: InstRW<[SBWriteResGroup134], (instregex "VDIVPDYrm")>;
 | 
			
		||||
 | 
			
		||||
} // SchedModel
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -161,12 +161,25 @@ defm : SKLWriteResPair<WriteFMul,  [SKLPort01],  4, [1], 1, 6>; // Floating poin
 | 
			
		|||
defm : SKLWriteResPair<WriteFMulY, [SKLPort01],  4, [1], 1, 7>; // Floating point multiplication (YMM/ZMM).
 | 
			
		||||
defm : SKLWriteResPair<WriteFDiv,   [SKLPort0], 12, [1], 1, 5>; // 10-14 cycles. // Floating point division.
 | 
			
		||||
defm : SKLWriteResPair<WriteFDivY,  [SKLPort0], 12, [1], 1, 7>; // 10-14 cycles. // Floating point division (YMM/ZMM).
 | 
			
		||||
defm : SKLWriteResPair<WriteFSqrt,  [SKLPort0], 15, [1], 1, 5>; // Floating point square root.
 | 
			
		||||
defm : SKLWriteResPair<WriteFSqrtY, [SKLPort0], 15, [1], 1, 7>; // Floating point square root (YMM/ZMM).
 | 
			
		||||
 | 
			
		||||
defm : SKLWriteResPair<WriteFSqrt,    [SKLPort0,SKLFPDivider], 12, [1,3], 1, 5>; // Floating point square root.
 | 
			
		||||
defm : SKLWriteResPair<WriteFSqrtX,   [SKLPort0,SKLFPDivider], 12, [1,3], 1, 6>; // Floating point square root (XMM).
 | 
			
		||||
defm : SKLWriteResPair<WriteFSqrtY,   [SKLPort0,SKLFPDivider], 12, [1,6], 1, 7>; // Floating point square root (YMM).
 | 
			
		||||
defm : SKLWriteResPair<WriteFSqrtZ,   [SKLPort0,SKLFPDivider], 12, [1,6], 1, 7>; // Floating point square root (ZMM).
 | 
			
		||||
defm : SKLWriteResPair<WriteFSqrt64,  [SKLPort0,SKLFPDivider], 18, [1,6], 1, 5>; // Floating point double square root.
 | 
			
		||||
defm : SKLWriteResPair<WriteFSqrt64X, [SKLPort0,SKLFPDivider], 18, [1,6], 1, 6>; // Floating point double square root (XMM).
 | 
			
		||||
defm : SKLWriteResPair<WriteFSqrt64Y, [SKLPort0,SKLFPDivider], 18, [1,12],1, 7>; // Floating point double square root (YMM).
 | 
			
		||||
defm : SKLWriteResPair<WriteFSqrt64Z, [SKLPort0,SKLFPDivider], 18, [1,12],1, 7>; // Floating point double square root (ZMM).
 | 
			
		||||
defm : SKLWriteResPair<WriteFSqrt80,  [SKLPort0,SKLFPDivider], 21, [1,7]>; // Floating point long double square root.
 | 
			
		||||
 | 
			
		||||
defm : SKLWriteResPair<WriteFRcp,   [SKLPort0], 4, [1], 1, 5>; // Floating point reciprocal estimate.
 | 
			
		||||
defm : SKLWriteResPair<WriteFRcpY,  [SKLPort0], 4, [1], 1, 5>; // Floating point reciprocal estimate (YMM/ZMM).
 | 
			
		||||
defm : SKLWriteResPair<WriteFRcpX,  [SKLPort0], 4, [1], 1, 6>; // Floating point reciprocal estimate (XMM).
 | 
			
		||||
defm : SKLWriteResPair<WriteFRcpY,  [SKLPort0], 4, [1], 1, 7>; // Floating point reciprocal estimate (YMM/ZMM).
 | 
			
		||||
 | 
			
		||||
defm : SKLWriteResPair<WriteFRsqrt, [SKLPort0], 4, [1], 1, 5>; // Floating point reciprocal square root estimate.
 | 
			
		||||
defm : SKLWriteResPair<WriteFRsqrtY,[SKLPort0], 4, [1], 1, 5>; // Floating point reciprocal square root estimate (YMM/ZMM).
 | 
			
		||||
defm : SKLWriteResPair<WriteFRsqrtX,[SKLPort0], 4, [1], 1, 6>; // Floating point reciprocal square root estimate (XMM).
 | 
			
		||||
defm : SKLWriteResPair<WriteFRsqrtY,[SKLPort0], 4, [1], 1, 7>; // Floating point reciprocal square root estimate (YMM/ZMM).
 | 
			
		||||
 | 
			
		||||
defm : SKLWriteResPair<WriteFMA,    [SKLPort01], 4, [1], 1, 5>; // Fused Multiply Add.
 | 
			
		||||
defm : SKLWriteResPair<WriteFMAX,   [SKLPort01], 4, [1], 1, 6>; // Fused Multiply Add (XMM).
 | 
			
		||||
defm : SKLWriteResPair<WriteFMAY,   [SKLPort01], 4, [1], 1, 7>; // Fused Multiply Add (YMM/ZMM).
 | 
			
		||||
| 
						 | 
				
			
			@ -1531,14 +1544,6 @@ def SKLWriteResGroup131 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort23,SKLPort0156
 | 
			
		|||
def: InstRW<[SKLWriteResGroup131], (instregex "LAR(16|32|64)rm",
 | 
			
		||||
                                              "LSL(16|32|64)rm")>;
 | 
			
		||||
 | 
			
		||||
def SKLWriteResGroup132 : SchedWriteRes<[SKLPort0,SKLPort23]> {
 | 
			
		||||
  let Latency = 10;
 | 
			
		||||
  let NumMicroOps = 2;
 | 
			
		||||
  let ResourceCycles = [1,1];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SKLWriteResGroup132], (instregex "(V?)RCPPSm",
 | 
			
		||||
                                              "(V?)RSQRTPSm")>;
 | 
			
		||||
 | 
			
		||||
def SKLWriteResGroup133 : SchedWriteRes<[SKLPort5,SKLPort23]> {
 | 
			
		||||
  let Latency = 10;
 | 
			
		||||
  let NumMicroOps = 2;
 | 
			
		||||
| 
						 | 
				
			
			@ -1621,9 +1626,7 @@ def SKLWriteResGroup146 : SchedWriteRes<[SKLPort0,SKLPort23]> {
 | 
			
		|||
  let NumMicroOps = 2;
 | 
			
		||||
  let ResourceCycles = [1,1];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SKLWriteResGroup146], (instregex "MUL_F(32|64)m",
 | 
			
		||||
                                              "VRCPPSYm",
 | 
			
		||||
                                              "VRSQRTPSYm")>;
 | 
			
		||||
def: InstRW<[SKLWriteResGroup146], (instregex "MUL_F(32|64)m")>;
 | 
			
		||||
 | 
			
		||||
def SKLWriteResGroup147 : SchedWriteRes<[SKLPort01,SKLPort23]> {
 | 
			
		||||
  let Latency = 11;
 | 
			
		||||
| 
						 | 
				
			
			@ -1707,21 +1710,6 @@ def SKLWriteResGroup156 : SchedWriteRes<[SKLPort06,SKLPort0156]> {
 | 
			
		|||
}
 | 
			
		||||
def: InstRW<[SKLWriteResGroup156], (instrs LOOPE, LOOPNE)>;
 | 
			
		||||
 | 
			
		||||
def SKLWriteResGroup157 : SchedWriteRes<[SKLPort0,SKLFPDivider]> {
 | 
			
		||||
  let Latency = 12;
 | 
			
		||||
  let NumMicroOps = 1;
 | 
			
		||||
  let ResourceCycles = [1,3];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SKLWriteResGroup157], (instregex "(V?)SQRTPSr",
 | 
			
		||||
                                              "(V?)SQRTSSr")>;
 | 
			
		||||
 | 
			
		||||
def SKLWriteResGroup158 : SchedWriteRes<[SKLPort0,SKLFPDivider]> {
 | 
			
		||||
  let Latency = 12;
 | 
			
		||||
  let NumMicroOps = 1;
 | 
			
		||||
  let ResourceCycles = [1,6];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SKLWriteResGroup158], (instregex "VSQRTPSYr")>;
 | 
			
		||||
 | 
			
		||||
def SKLWriteResGroup160 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23,SKLPort01]> {
 | 
			
		||||
  let Latency = 12;
 | 
			
		||||
  let NumMicroOps = 4;
 | 
			
		||||
| 
						 | 
				
			
			@ -1816,13 +1804,6 @@ def SKLWriteResGroup179 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> {
 | 
			
		|||
}
 | 
			
		||||
def: InstRW<[SKLWriteResGroup179], (instregex "(V?)DIVPSrm")>;
 | 
			
		||||
 | 
			
		||||
def SKLWriteResGroup179_1 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> {
 | 
			
		||||
  let Latency = 17;
 | 
			
		||||
  let NumMicroOps = 2;
 | 
			
		||||
  let ResourceCycles = [1,1,3];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SKLWriteResGroup179_1], (instregex "(V?)SQRTSSm")>;
 | 
			
		||||
 | 
			
		||||
def SKLWriteResGroup180 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort05,SKLPort0156]> {
 | 
			
		||||
  let Latency = 17;
 | 
			
		||||
  let NumMicroOps = 15;
 | 
			
		||||
| 
						 | 
				
			
			@ -1830,21 +1811,6 @@ def SKLWriteResGroup180 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKL
 | 
			
		|||
}
 | 
			
		||||
def: InstRW<[SKLWriteResGroup180], (instrs XCH_F)>;
 | 
			
		||||
 | 
			
		||||
def SKLWriteResGroup181 : SchedWriteRes<[SKLPort0,SKLFPDivider]> {
 | 
			
		||||
  let Latency = 18;
 | 
			
		||||
  let NumMicroOps = 1;
 | 
			
		||||
  let ResourceCycles = [1,6];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SKLWriteResGroup181], (instregex "(V?)SQRTPDr",
 | 
			
		||||
                                              "(V?)SQRTSDr")>;
 | 
			
		||||
 | 
			
		||||
def SKLWriteResGroup181_1 : SchedWriteRes<[SKLPort0,SKLFPDivider]> {
 | 
			
		||||
  let Latency = 18;
 | 
			
		||||
  let NumMicroOps = 1;
 | 
			
		||||
  let ResourceCycles = [1,12];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SKLWriteResGroup181_1], (instregex "VSQRTPDYr")>;
 | 
			
		||||
 | 
			
		||||
def SKLWriteResGroup182 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> {
 | 
			
		||||
  let Latency = 18;
 | 
			
		||||
  let NumMicroOps = 2;
 | 
			
		||||
| 
						 | 
				
			
			@ -1852,13 +1818,6 @@ def SKLWriteResGroup182 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> {
 | 
			
		|||
}
 | 
			
		||||
def: InstRW<[SKLWriteResGroup182], (instregex "VDIVPSYrm")>;
 | 
			
		||||
 | 
			
		||||
def SKLWriteResGroup183 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> {
 | 
			
		||||
  let Latency = 18;
 | 
			
		||||
  let NumMicroOps = 2;
 | 
			
		||||
  let ResourceCycles = [1,1,3];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SKLWriteResGroup183], (instregex "(V?)SQRTPSm")>;
 | 
			
		||||
 | 
			
		||||
def SKLWriteResGroup184 : SchedWriteRes<[SKLPort5,SKLPort6,SKLPort06,SKLPort0156]> {
 | 
			
		||||
  let Latency = 18;
 | 
			
		||||
  let NumMicroOps = 8;
 | 
			
		||||
| 
						 | 
				
			
			@ -1880,13 +1839,6 @@ def SKLWriteResGroup186 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> {
 | 
			
		|||
}
 | 
			
		||||
def: InstRW<[SKLWriteResGroup186], (instregex "(V?)DIVSDrm")>;
 | 
			
		||||
 | 
			
		||||
def SKLWriteResGroup186_1 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> {
 | 
			
		||||
  let Latency = 19;
 | 
			
		||||
  let NumMicroOps = 2;
 | 
			
		||||
  let ResourceCycles = [1,1,6];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SKLWriteResGroup186_1], (instregex "VSQRTPSYm")>;
 | 
			
		||||
 | 
			
		||||
def SKLWriteResGroup189 : SchedWriteRes<[SKLPort0]> {
 | 
			
		||||
  let Latency = 20;
 | 
			
		||||
  let NumMicroOps = 1;
 | 
			
		||||
| 
						 | 
				
			
			@ -1959,13 +1911,6 @@ def: InstRW<[SKLWriteResGroup196_2], (instrs VGATHERDPSYrm,
 | 
			
		|||
                                             VPGATHERQQYrm,
 | 
			
		||||
                                             VGATHERDPDYrm)>;
 | 
			
		||||
 | 
			
		||||
def SKLWriteResGroup197 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> {
 | 
			
		||||
  let Latency = 23;
 | 
			
		||||
  let NumMicroOps = 2;
 | 
			
		||||
  let ResourceCycles = [1,1,6];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SKLWriteResGroup197], (instregex "(V?)SQRTSDm")>;
 | 
			
		||||
 | 
			
		||||
def SKLWriteResGroup198 : SchedWriteRes<[SKLPort0,SKLPort4,SKLPort5,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> {
 | 
			
		||||
  let Latency = 23;
 | 
			
		||||
  let NumMicroOps = 19;
 | 
			
		||||
| 
						 | 
				
			
			@ -1973,20 +1918,6 @@ def SKLWriteResGroup198 : SchedWriteRes<[SKLPort0,SKLPort4,SKLPort5,SKLPort23,SK
 | 
			
		|||
}
 | 
			
		||||
def: InstRW<[SKLWriteResGroup198], (instregex "CMPXCHG16B")>;
 | 
			
		||||
 | 
			
		||||
def SKLWriteResGroup199 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> {
 | 
			
		||||
  let Latency = 24;
 | 
			
		||||
  let NumMicroOps = 2;
 | 
			
		||||
  let ResourceCycles = [1,1,6];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SKLWriteResGroup199], (instregex "(V?)SQRTPDm")>;
 | 
			
		||||
 | 
			
		||||
def SKLWriteResGroup201 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> {
 | 
			
		||||
  let Latency = 25;
 | 
			
		||||
  let NumMicroOps = 2;
 | 
			
		||||
  let ResourceCycles = [1,1,12];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SKLWriteResGroup201], (instregex "VSQRTPDYm")>;
 | 
			
		||||
 | 
			
		||||
def SKLWriteResGroup202 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
 | 
			
		||||
  let Latency = 25;
 | 
			
		||||
  let NumMicroOps = 3;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -161,12 +161,25 @@ defm : SKXWriteResPair<WriteFMul, [SKXPort015],  4, [1], 1, 6>; // Floating poin
 | 
			
		|||
defm : SKXWriteResPair<WriteFMulY,[SKXPort015],  4, [1], 1, 7>; // Floating point multiplication (YMM/ZMM).
 | 
			
		||||
defm : SKXWriteResPair<WriteFDiv,   [SKXPort0], 12, [1], 1, 5>; // 10-14 cycles. // Floating point division.
 | 
			
		||||
defm : SKXWriteResPair<WriteFDivY,  [SKXPort0], 12, [1], 1, 7>; // 10-14 cycles. // Floating point division (YMM/ZMM).
 | 
			
		||||
defm : SKXWriteResPair<WriteFSqrt,  [SKXPort0], 15, [1], 1, 5>; // Floating point square root.
 | 
			
		||||
defm : SKXWriteResPair<WriteFSqrtY, [SKXPort0], 15, [1], 1, 7>; // Floating point square root (YMM/ZMM).
 | 
			
		||||
defm : SKXWriteResPair<WriteFRcp,   [SKXPort0],  4, [1], 1, 6>; // Floating point reciprocal estimate.
 | 
			
		||||
 | 
			
		||||
defm : SKXWriteResPair<WriteFSqrt,    [SKXPort0,SKXFPDivider], 12, [1,3], 1, 5>; // Floating point square root.
 | 
			
		||||
defm : SKXWriteResPair<WriteFSqrtX,   [SKXPort0,SKXFPDivider], 12, [1,3], 1, 6>; // Floating point square root (XMM).
 | 
			
		||||
defm : SKXWriteResPair<WriteFSqrtY,   [SKXPort0,SKXFPDivider], 12, [1,6], 1, 7>; // Floating point square root (YMM).
 | 
			
		||||
defm : SKXWriteResPair<WriteFSqrtZ,   [SKXPort0,SKXPort5,SKXFPDivider], 20, [2,1,12], 3, 7>; // Floating point square root (ZMM).
 | 
			
		||||
defm : SKXWriteResPair<WriteFSqrt64,  [SKXPort0,SKXFPDivider], 18, [1,6], 1, 5>; // Floating point double square root.
 | 
			
		||||
defm : SKXWriteResPair<WriteFSqrt64X, [SKXPort0,SKXFPDivider], 18, [1,6], 1, 6>; // Floating point double square root (XMM).
 | 
			
		||||
defm : SKXWriteResPair<WriteFSqrt64Y, [SKXPort0,SKXFPDivider], 18, [1,12],1, 7>; // Floating point double square root (YMM).
 | 
			
		||||
defm : SKXWriteResPair<WriteFSqrt64Z, [SKXPort0,SKXPort5,SKXFPDivider], 32, [2,1,24], 3, 7>; // Floating point double square root (ZMM).
 | 
			
		||||
defm : SKXWriteResPair<WriteFSqrt80,  [SKXPort0,SKXFPDivider], 21, [1,7]>; // Floating point long double square root.
 | 
			
		||||
 | 
			
		||||
defm : SKXWriteResPair<WriteFRcp,   [SKXPort0],  4, [1], 1, 5>; // Floating point reciprocal estimate.
 | 
			
		||||
defm : SKXWriteResPair<WriteFRcpX,  [SKXPort0],  4, [1], 1, 6>; // Floating point reciprocal estimate (XMM).
 | 
			
		||||
defm : SKXWriteResPair<WriteFRcpY,  [SKXPort0],  4, [1], 1, 7>; // Floating point reciprocal estimate (YMM/ZMM).
 | 
			
		||||
defm : SKXWriteResPair<WriteFRsqrt, [SKXPort0],  4, [1], 1, 6>; // Floating point reciprocal square root estimate.
 | 
			
		||||
 | 
			
		||||
defm : SKXWriteResPair<WriteFRsqrt, [SKXPort0],  4, [1], 1, 5>; // Floating point reciprocal square root estimate.
 | 
			
		||||
defm : SKXWriteResPair<WriteFRsqrtX,[SKXPort0],  4, [1], 1, 6>; // Floating point reciprocal square root estimate (XMM).
 | 
			
		||||
defm : SKXWriteResPair<WriteFRsqrtY,[SKXPort0],  4, [1], 1, 7>; // Floating point reciprocal square root estimate (YMM/ZMM).
 | 
			
		||||
 | 
			
		||||
defm : SKXWriteResPair<WriteFMA,  [SKXPort015],  4, [1], 1, 5>; // Fused Multiply Add.
 | 
			
		||||
defm : SKXWriteResPair<WriteFMAX, [SKXPort015],  4, [1], 1, 6>; // Fused Multiply Add (XMM).
 | 
			
		||||
defm : SKXWriteResPair<WriteFMAY, [SKXPort015],  4, [1], 1, 7>; // Fused Multiply Add (YMM/ZMM).
 | 
			
		||||
| 
						 | 
				
			
			@ -2388,10 +2401,6 @@ def SKXWriteResGroup135 : SchedWriteRes<[SKXPort0,SKXPort23]> {
 | 
			
		|||
  let ResourceCycles = [1,1];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SKXWriteResGroup135], (instregex "MMX_CVTPI2PSirm",
 | 
			
		||||
                                              "RCPSSm",
 | 
			
		||||
                                              "RSQRTSSm",
 | 
			
		||||
                                              "VRCPSSm",
 | 
			
		||||
                                              "VRSQRTSSm",
 | 
			
		||||
                                              "VTESTPDYrm",
 | 
			
		||||
                                              "VTESTPSYrm")>;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -2877,21 +2886,6 @@ def SKXWriteResGroup171 : SchedWriteRes<[SKXPort06,SKXPort0156]> {
 | 
			
		|||
}
 | 
			
		||||
def: InstRW<[SKXWriteResGroup171], (instrs LOOPE, LOOPNE)>;
 | 
			
		||||
 | 
			
		||||
def SKXWriteResGroup172 : SchedWriteRes<[SKXPort0,SKXFPDivider]> {
 | 
			
		||||
  let Latency = 12;
 | 
			
		||||
  let NumMicroOps = 1;
 | 
			
		||||
  let ResourceCycles = [1,3];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SKXWriteResGroup172], (instregex "(V?)SQRTPS(Z128)?r",
 | 
			
		||||
                                              "(V?)SQRTSS(Z?)r")>;
 | 
			
		||||
 | 
			
		||||
def SKXWriteResGroup173 : SchedWriteRes<[SKXPort0,SKXFPDivider]> {
 | 
			
		||||
  let Latency = 12;
 | 
			
		||||
  let NumMicroOps = 1;
 | 
			
		||||
  let ResourceCycles = [1,6];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SKXWriteResGroup173], (instregex "VSQRTPS(Y|Z256)r")>;
 | 
			
		||||
 | 
			
		||||
def SKXWriteResGroup174 : SchedWriteRes<[SKXPort015]> {
 | 
			
		||||
  let Latency = 12;
 | 
			
		||||
  let NumMicroOps = 3;
 | 
			
		||||
| 
						 | 
				
			
			@ -3072,13 +3066,6 @@ def SKXWriteResGroup201 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> {
 | 
			
		|||
}
 | 
			
		||||
def: InstRW<[SKXWriteResGroup201], (instregex "(V?)DIVPS(Z128)?rm")>;
 | 
			
		||||
 | 
			
		||||
def SKXWriteResGroup201_1 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> {
 | 
			
		||||
  let Latency = 17;
 | 
			
		||||
  let NumMicroOps = 2;
 | 
			
		||||
  let ResourceCycles = [1,1,3];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SKXWriteResGroup201_1], (instregex "(V?)SQRTSS(Z?)m")>;
 | 
			
		||||
 | 
			
		||||
def SKXWriteResGroup202 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156]> {
 | 
			
		||||
  let Latency = 17;
 | 
			
		||||
  let NumMicroOps = 15;
 | 
			
		||||
| 
						 | 
				
			
			@ -3086,21 +3073,6 @@ def SKXWriteResGroup202 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKX
 | 
			
		|||
}
 | 
			
		||||
def: InstRW<[SKXWriteResGroup202], (instrs XCH_F)>;
 | 
			
		||||
 | 
			
		||||
def SKXWriteResGroup203 : SchedWriteRes<[SKXPort0,SKXFPDivider]> {
 | 
			
		||||
  let Latency = 18;
 | 
			
		||||
  let NumMicroOps = 1;
 | 
			
		||||
  let ResourceCycles = [1,6];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SKXWriteResGroup203], (instregex "(V?)SQRTPD(Z128)?r",
 | 
			
		||||
                                              "(V?)SQRTSD(Z?)r")>;
 | 
			
		||||
 | 
			
		||||
def SKXWriteResGroup203_1 : SchedWriteRes<[SKXPort0,SKXFPDivider]> {
 | 
			
		||||
  let Latency = 18;
 | 
			
		||||
  let NumMicroOps = 1;
 | 
			
		||||
  let ResourceCycles = [1,12];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SKXWriteResGroup203_1], (instregex "VSQRTPD(Y|Z256)r")>;
 | 
			
		||||
 | 
			
		||||
def SKXWriteResGroup204 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> {
 | 
			
		||||
  let Latency = 18;
 | 
			
		||||
  let NumMicroOps = 2;
 | 
			
		||||
| 
						 | 
				
			
			@ -3108,13 +3080,6 @@ def SKXWriteResGroup204 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> {
 | 
			
		|||
}
 | 
			
		||||
def: InstRW<[SKXWriteResGroup204], (instregex "VDIVPS(Y|Z256)rm")>;
 | 
			
		||||
 | 
			
		||||
def SKXWriteResGroup204_1 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> {
 | 
			
		||||
  let Latency = 18;
 | 
			
		||||
  let NumMicroOps = 2;
 | 
			
		||||
  let ResourceCycles = [1,1,3];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SKXWriteResGroup204_1], (instregex "(V?)SQRTPS(Z128)?m")>;
 | 
			
		||||
 | 
			
		||||
def SKXWriteResGroup205 : SchedWriteRes<[SKXPort23,SKXPort015]> {
 | 
			
		||||
  let Latency = 18;
 | 
			
		||||
  let NumMicroOps = 4;
 | 
			
		||||
| 
						 | 
				
			
			@ -3143,20 +3108,6 @@ def SKXWriteResGroup209 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> {
 | 
			
		|||
}
 | 
			
		||||
def: InstRW<[SKXWriteResGroup209], (instregex "(V?)DIVSD(Z?)rm")>;
 | 
			
		||||
 | 
			
		||||
def SKXWriteResGroup209_1 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> {
 | 
			
		||||
  let Latency = 19;
 | 
			
		||||
  let NumMicroOps = 2;
 | 
			
		||||
  let ResourceCycles = [1,1,6];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SKXWriteResGroup209_1], (instregex "VSQRTPS(Y|Z256)m")>;
 | 
			
		||||
 | 
			
		||||
def SKXWriteResGroup210 : SchedWriteRes<[SKXPort0,SKXPort5,SKXFPDivider]> {
 | 
			
		||||
  let Latency = 20;
 | 
			
		||||
  let NumMicroOps = 3;
 | 
			
		||||
  let ResourceCycles = [2,1,12];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SKXWriteResGroup210], (instregex "VSQRTPSZr")>;
 | 
			
		||||
 | 
			
		||||
def SKXWriteResGroup211 : SchedWriteRes<[SKXPort23,SKXPort015]> {
 | 
			
		||||
  let Latency = 19;
 | 
			
		||||
  let NumMicroOps = 4;
 | 
			
		||||
| 
						 | 
				
			
			@ -3287,13 +3238,6 @@ def SKXWriteResGroup225 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort015]> {
 | 
			
		|||
def: InstRW<[SKXWriteResGroup225], (instregex "VPCONFLICTDZ128rr",
 | 
			
		||||
                                              "VPCONFLICTQZ256rr")>;
 | 
			
		||||
 | 
			
		||||
def SKXWriteResGroup226 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> {
 | 
			
		||||
  let Latency = 23;
 | 
			
		||||
  let NumMicroOps = 2;
 | 
			
		||||
  let ResourceCycles = [1,1,6];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SKXWriteResGroup226], (instregex "(V?)SQRTSD(Z?)m")>;
 | 
			
		||||
 | 
			
		||||
def SKXWriteResGroup227 : SchedWriteRes<[SKXPort0,SKXPort5,SKXFPDivider]> {
 | 
			
		||||
  let Latency = 23;
 | 
			
		||||
  let NumMicroOps = 3;
 | 
			
		||||
| 
						 | 
				
			
			@ -3315,13 +3259,6 @@ def SKXWriteResGroup228 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort23,SK
 | 
			
		|||
}
 | 
			
		||||
def: InstRW<[SKXWriteResGroup228], (instregex "CMPXCHG16B")>;
 | 
			
		||||
 | 
			
		||||
def SKXWriteResGroup229 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> {
 | 
			
		||||
  let Latency = 24;
 | 
			
		||||
  let NumMicroOps = 2;
 | 
			
		||||
  let ResourceCycles = [1,1,6];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SKXWriteResGroup229], (instregex "(V?)SQRTPD(Z128)?m")>;
 | 
			
		||||
 | 
			
		||||
def SKXWriteResGroup230 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort5,SKXFPDivider]> {
 | 
			
		||||
  let Latency = 25;
 | 
			
		||||
  let NumMicroOps = 4;
 | 
			
		||||
| 
						 | 
				
			
			@ -3329,13 +3266,6 @@ def SKXWriteResGroup230 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort5,SKXFPDivide
 | 
			
		|||
}
 | 
			
		||||
def: InstRW<[SKXWriteResGroup230], (instregex "VDIVPSZrm(b?)")>;
 | 
			
		||||
 | 
			
		||||
def SKXWriteResGroup232 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> {
 | 
			
		||||
  let Latency = 25;
 | 
			
		||||
  let NumMicroOps = 2;
 | 
			
		||||
  let ResourceCycles = [1,1,12];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SKXWriteResGroup232], (instregex "VSQRTPD(Y|Z256)m")>;
 | 
			
		||||
 | 
			
		||||
def SKXWriteResGroup233 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
 | 
			
		||||
  let Latency = 25;
 | 
			
		||||
  let NumMicroOps = 3;
 | 
			
		||||
| 
						 | 
				
			
			@ -3354,13 +3284,6 @@ def: InstRW<[SKXWriteResGroup234], (instrs VGATHERDPDZ256rm,
 | 
			
		|||
                                           VPGATHERQDZrm,
 | 
			
		||||
                                           VPGATHERQQZ256rm)>;
 | 
			
		||||
 | 
			
		||||
def SKXWriteResGroup237 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort5,SKXFPDivider]> {
 | 
			
		||||
  let Latency = 27;
 | 
			
		||||
  let NumMicroOps = 4;
 | 
			
		||||
  let ResourceCycles = [2,1,1,12];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SKXWriteResGroup237], (instregex "VSQRTPSZm(b?)")>;
 | 
			
		||||
 | 
			
		||||
def SKXWriteResGroup238 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
 | 
			
		||||
  let Latency = 26;
 | 
			
		||||
  let NumMicroOps = 5;
 | 
			
		||||
| 
						 | 
				
			
			@ -3422,13 +3345,6 @@ def SKXWriteResGroup245 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort01
 | 
			
		|||
def: InstRW<[SKXWriteResGroup245], (instrs VGATHERDPSZrm,
 | 
			
		||||
                                           VPGATHERDDZrm)>;
 | 
			
		||||
 | 
			
		||||
def SKXWriteResGroup246 : SchedWriteRes<[SKXPort0,SKXPort5,SKXFPDivider]> {
 | 
			
		||||
  let Latency = 32;
 | 
			
		||||
  let NumMicroOps = 3;
 | 
			
		||||
  let ResourceCycles = [2,1,24];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SKXWriteResGroup246], (instregex "VSQRTPDZr")>;
 | 
			
		||||
 | 
			
		||||
def SKXWriteResGroup247 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort23,SKXPort06,SKXPort0156]> {
 | 
			
		||||
  let Latency = 35;
 | 
			
		||||
  let NumMicroOps = 23;
 | 
			
		||||
| 
						 | 
				
			
			@ -3460,13 +3376,6 @@ def SKXWriteResGroup250 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort23,SKXPort0156
 | 
			
		|||
}
 | 
			
		||||
def: InstRW<[SKXWriteResGroup250], (instregex "XRSTOR(64)?")>;
 | 
			
		||||
 | 
			
		||||
def SKXWriteResGroup251 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort5,SKXFPDivider]> {
 | 
			
		||||
  let Latency = 39;
 | 
			
		||||
  let NumMicroOps = 4;
 | 
			
		||||
  let ResourceCycles = [2,1,1,24];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SKXWriteResGroup251], (instregex "VSQRTPDZm(b?)")>;
 | 
			
		||||
 | 
			
		||||
def SKXWriteResGroup252 : SchedWriteRes<[SKXPort1,SKXPort4,SKXPort5,SKXPort6,SKXPort23,SKXPort237,SKXPort15,SKXPort0156]> {
 | 
			
		||||
  let Latency = 40;
 | 
			
		||||
  let NumMicroOps = 18;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -61,6 +61,13 @@ class X86SchedWriteWidths<X86FoldableSchedWrite sScl,
 | 
			
		|||
  X86FoldableSchedWrite ZMM = s512; // ZMM operations.
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Multiclass that wraps X86SchedWriteWidths for each fp vector type.
 | 
			
		||||
class X86SchedWriteSizes<X86SchedWriteWidths sPS,
 | 
			
		||||
                         X86SchedWriteWidths sPD> {
 | 
			
		||||
  X86SchedWriteWidths PS = sPS;
 | 
			
		||||
  X86SchedWriteWidths PD = sPD;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Loads, stores, and moves, not folded with other operations.
 | 
			
		||||
def WriteLoad  : SchedWrite;
 | 
			
		||||
def WriteStore : SchedWrite;
 | 
			
		||||
| 
						 | 
				
			
			@ -111,10 +118,19 @@ defm WriteFMulY  : X86SchedWritePair; // Floating point multiplication (YMM/ZMM)
 | 
			
		|||
defm WriteFDiv   : X86SchedWritePair; // Floating point division.
 | 
			
		||||
defm WriteFDivY  : X86SchedWritePair; // Floating point division (YMM/ZMM).
 | 
			
		||||
defm WriteFSqrt  : X86SchedWritePair; // Floating point square root.
 | 
			
		||||
defm WriteFSqrtY : X86SchedWritePair; // Floating point square root (YMM/ZMM).
 | 
			
		||||
defm WriteFSqrtX : X86SchedWritePair; // Floating point square root (XMM).
 | 
			
		||||
defm WriteFSqrtY : X86SchedWritePair; // Floating point square root (YMM).
 | 
			
		||||
defm WriteFSqrtZ : X86SchedWritePair; // Floating point square root (ZMM).
 | 
			
		||||
defm WriteFSqrt64  : X86SchedWritePair; // Floating point double square root.
 | 
			
		||||
defm WriteFSqrt64X : X86SchedWritePair; // Floating point double square root (XMM).
 | 
			
		||||
defm WriteFSqrt64Y : X86SchedWritePair; // Floating point double square root (YMM).
 | 
			
		||||
defm WriteFSqrt64Z : X86SchedWritePair; // Floating point double square root (ZMM).
 | 
			
		||||
defm WriteFSqrt80  : X86SchedWritePair; // Floating point long double square root.
 | 
			
		||||
defm WriteFRcp   : X86SchedWritePair; // Floating point reciprocal estimate.
 | 
			
		||||
defm WriteFRcpX  : X86SchedWritePair; // Floating point reciprocal estimate (XMM).
 | 
			
		||||
defm WriteFRcpY  : X86SchedWritePair; // Floating point reciprocal estimate (YMM/ZMM).
 | 
			
		||||
defm WriteFRsqrt : X86SchedWritePair; // Floating point reciprocal square root estimate.
 | 
			
		||||
defm WriteFRsqrtX: X86SchedWritePair; // Floating point reciprocal square root estimate (XMM).
 | 
			
		||||
defm WriteFRsqrtY: X86SchedWritePair; // Floating point reciprocal square root estimate (YMM/ZMM).
 | 
			
		||||
defm WriteFMA    : X86SchedWritePair; // Fused Multiply Add.
 | 
			
		||||
defm WriteFMAX   : X86SchedWritePair; // Fused Multiply Add (XMM).
 | 
			
		||||
| 
						 | 
				
			
			@ -261,11 +277,15 @@ def SchedWriteDPPS
 | 
			
		|||
def SchedWriteFDiv
 | 
			
		||||
 : X86SchedWriteWidths<WriteFDiv, WriteFDiv, WriteFDivY, WriteFDivY>;
 | 
			
		||||
def SchedWriteFSqrt
 | 
			
		||||
 : X86SchedWriteWidths<WriteFSqrt, WriteFSqrt, WriteFSqrtY, WriteFSqrtY>;
 | 
			
		||||
 : X86SchedWriteWidths<WriteFSqrt, WriteFSqrtX,
 | 
			
		||||
                       WriteFSqrtY, WriteFSqrtZ>;
 | 
			
		||||
def SchedWriteFSqrt64
 | 
			
		||||
 : X86SchedWriteWidths<WriteFSqrt64, WriteFSqrt64X,
 | 
			
		||||
                       WriteFSqrt64Y, WriteFSqrt64Z>;
 | 
			
		||||
def SchedWriteFRcp
 | 
			
		||||
 : X86SchedWriteWidths<WriteFRcp, WriteFRcp, WriteFRcpY, WriteFRcpY>;
 | 
			
		||||
 : X86SchedWriteWidths<WriteFRcp, WriteFRcpX, WriteFRcpY, WriteFRcpY>;
 | 
			
		||||
def SchedWriteFRsqrt
 | 
			
		||||
 : X86SchedWriteWidths<WriteFRsqrt, WriteFRsqrt, WriteFRsqrtY, WriteFRsqrtY>;
 | 
			
		||||
 : X86SchedWriteWidths<WriteFRsqrt, WriteFRsqrtX, WriteFRsqrtY, WriteFRsqrtY>;
 | 
			
		||||
def SchedWriteFRnd
 | 
			
		||||
 : X86SchedWriteWidths<WriteFRnd, WriteFRnd, WriteFRndY, WriteFRndY>;
 | 
			
		||||
def SchedWriteFLogic
 | 
			
		||||
| 
						 | 
				
			
			@ -324,6 +344,16 @@ def SchedWriteVarBlend
 | 
			
		|||
 : X86SchedWriteWidths<WriteVarBlend, WriteVarBlend,
 | 
			
		||||
                       WriteVarBlendY, WriteVarBlendY>;
 | 
			
		||||
 | 
			
		||||
// Vector size wrappers.
 | 
			
		||||
def SchedWriteFAddSizes
 | 
			
		||||
 : X86SchedWriteSizes<SchedWriteFAdd, SchedWriteFAdd>;
 | 
			
		||||
def SchedWriteFMulSizes
 | 
			
		||||
 : X86SchedWriteSizes<SchedWriteFMul, SchedWriteFMul>;
 | 
			
		||||
def SchedWriteFDivSizes
 | 
			
		||||
 : X86SchedWriteSizes<SchedWriteFDiv, SchedWriteFDiv>;
 | 
			
		||||
def SchedWriteFSqrtSizes
 | 
			
		||||
 : X86SchedWriteSizes<SchedWriteFSqrt, SchedWriteFSqrt64>;
 | 
			
		||||
 | 
			
		||||
//===----------------------------------------------------------------------===//
 | 
			
		||||
// Generic Processor Scheduler Models.
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -211,13 +211,22 @@ defm : AtomWriteResPair<WriteFCom,           [AtomPort0],  [AtomPort0],  5,  5,
 | 
			
		|||
defm : AtomWriteResPair<WriteFMul,           [AtomPort0],  [AtomPort0],  4,  4,  [4],  [4]>;
 | 
			
		||||
defm : AtomWriteResPair<WriteFMulY,          [AtomPort0],  [AtomPort0],  4,  4,  [4],  [4]>;
 | 
			
		||||
defm : AtomWriteResPair<WriteFRcp,           [AtomPort0],  [AtomPort0],  4,  4,  [4],  [4]>;
 | 
			
		||||
defm : AtomWriteResPair<WriteFRcpY,          [AtomPort0],  [AtomPort0],  4,  4,  [4],  [4]>;
 | 
			
		||||
defm : AtomWriteResPair<WriteFRcpX,         [AtomPort01], [AtomPort01],  9, 10,  [9], [10]>;
 | 
			
		||||
defm : AtomWriteResPair<WriteFRcpY,         [AtomPort01], [AtomPort01],  9, 10,  [9], [10]>;
 | 
			
		||||
defm : AtomWriteResPair<WriteFRsqrt,         [AtomPort0],  [AtomPort0],  4,  4,  [4],  [4]>;
 | 
			
		||||
defm : AtomWriteResPair<WriteFRsqrtY,        [AtomPort0],  [AtomPort0],  4,  4,  [4],  [4]>;
 | 
			
		||||
defm : AtomWriteResPair<WriteFRsqrtX,       [AtomPort01], [AtomPort01],  9, 10,  [9], [10]>;
 | 
			
		||||
defm : AtomWriteResPair<WriteFRsqrtY,       [AtomPort01], [AtomPort01],  9, 10,  [9], [10]>;
 | 
			
		||||
defm : AtomWriteResPair<WriteFDiv,          [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
 | 
			
		||||
defm : AtomWriteResPair<WriteFDivY,         [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
 | 
			
		||||
defm : AtomWriteResPair<WriteFSqrt,         [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
 | 
			
		||||
defm : AtomWriteResPair<WriteFSqrtY,        [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
 | 
			
		||||
defm : AtomWriteResPair<WriteFSqrtX,        [AtomPort01], [AtomPort01], 70, 70, [70], [70]>;
 | 
			
		||||
defm : AtomWriteResPair<WriteFSqrtY,        [AtomPort01], [AtomPort01], 70, 70, [70], [70]>;
 | 
			
		||||
defm : AtomWriteResPair<WriteFSqrtZ,        [AtomPort01], [AtomPort01], 70, 70, [70], [70]>;
 | 
			
		||||
defm : AtomWriteResPair<WriteFSqrt64,       [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
 | 
			
		||||
defm : AtomWriteResPair<WriteFSqrt64X,      [AtomPort01], [AtomPort01],125,125,[125],[125]>;
 | 
			
		||||
defm : AtomWriteResPair<WriteFSqrt64Y,      [AtomPort01], [AtomPort01],125,125,[125],[125]>;
 | 
			
		||||
defm : AtomWriteResPair<WriteFSqrt64Z,      [AtomPort01], [AtomPort01],125,125,[125],[125]>;
 | 
			
		||||
defm : AtomWriteResPair<WriteFSqrt80,       [AtomPort01], [AtomPort01], 71, 71, [71], [71]>;
 | 
			
		||||
defm : AtomWriteResPair<WriteFSign,          [AtomPort1],  [AtomPort1]>;
 | 
			
		||||
defm : AtomWriteResPair<WriteFRnd,           [AtomPort0],  [AtomPort0],  5,  5,  [5],  [5]>;
 | 
			
		||||
defm : AtomWriteResPair<WriteFRndY,          [AtomPort0],  [AtomPort0],  5,  5,  [5],  [5]>;
 | 
			
		||||
| 
						 | 
				
			
			@ -557,7 +566,7 @@ def : InstRW<[AtomWrite01_9], (instrs BT16mr, BT32mr, BT64mr,
 | 
			
		|||
                                      SHLD64mri8, SHRD64mri8,
 | 
			
		||||
                                      SHLD64rri8, SHRD64rri8,
 | 
			
		||||
                                      CMPXCHG8rr,
 | 
			
		||||
                                      MULPDrr, RCPPSr, RSQRTPSr)>;
 | 
			
		||||
                                      MULPDrr)>;
 | 
			
		||||
def : InstRW<[AtomWrite01_9], (instregex "CMOV(B|BE|E|P|NB|NBE|NE|NP)_F",
 | 
			
		||||
                                         "(U)?COM_FI", "TST_F",
 | 
			
		||||
                                         "(U)?COMIS(D|S)rr",
 | 
			
		||||
| 
						 | 
				
			
			@ -568,7 +577,7 @@ def AtomWrite01_10 : SchedWriteRes<[AtomPort01]> {
 | 
			
		|||
  let ResourceCycles = [10];
 | 
			
		||||
}
 | 
			
		||||
def : InstRW<[AtomWrite01_10], (instrs FLDL2E, FLDL2T, FLDLG2, FLDLN2, FLDPI,
 | 
			
		||||
                                       MULPDrm, RCPPSm, RSQRTPSm)>;
 | 
			
		||||
                                       MULPDrm)>;
 | 
			
		||||
def : InstRW<[AtomWrite01_10], (instregex "(U)?COMIS(D|S)rm",
 | 
			
		||||
                                          "CVT(T)?SS2SI64rm(_Int)?")>;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -697,8 +706,7 @@ def AtomWrite01_62 : SchedWriteRes<[AtomPort01]> {
 | 
			
		|||
  let Latency = 62;
 | 
			
		||||
  let ResourceCycles = [62];
 | 
			
		||||
}
 | 
			
		||||
def : InstRW<[AtomWrite01_62], (instregex "DIVSD(r|m)(_Int)?",
 | 
			
		||||
                                          "SQRTSD(r|m)(_Int)?")>;
 | 
			
		||||
def : InstRW<[AtomWrite01_62], (instregex "DIVSD(r|m)(_Int)?")>;
 | 
			
		||||
 | 
			
		||||
def AtomWrite01_63 : SchedWriteRes<[AtomPort01]> {
 | 
			
		||||
  let Latency = 63;
 | 
			
		||||
| 
						 | 
				
			
			@ -716,7 +724,7 @@ def AtomWrite01_70 : SchedWriteRes<[AtomPort01]> {
 | 
			
		|||
  let Latency = 70;
 | 
			
		||||
  let ResourceCycles = [70];
 | 
			
		||||
}
 | 
			
		||||
def : InstRW<[AtomWrite01_70], (instrs DIVPSrr, DIVPSrm, SQRTPSr, SQRTPSm)>;
 | 
			
		||||
def : InstRW<[AtomWrite01_70], (instrs DIVPSrr, DIVPSrm)>;
 | 
			
		||||
 | 
			
		||||
def AtomWrite01_71 : SchedWriteRes<[AtomPort01]> {
 | 
			
		||||
  let Latency = 71;
 | 
			
		||||
| 
						 | 
				
			
			@ -724,7 +732,6 @@ def AtomWrite01_71 : SchedWriteRes<[AtomPort01]> {
 | 
			
		|||
}
 | 
			
		||||
def : InstRW<[AtomWrite01_71], (instrs FPREM1,
 | 
			
		||||
                                       INVLPG, INVLPGA32, INVLPGA64)>;
 | 
			
		||||
def : InstRW<[AtomWrite01_71], (instregex "SQRT_F")>;
 | 
			
		||||
 | 
			
		||||
def AtomWrite01_72 : SchedWriteRes<[AtomPort01]> {
 | 
			
		||||
  let Latency = 72;
 | 
			
		||||
| 
						 | 
				
			
			@ -785,7 +792,7 @@ def AtomWrite01_125 : SchedWriteRes<[AtomPort01]> {
 | 
			
		|||
  let Latency = 125;
 | 
			
		||||
  let ResourceCycles = [125];
 | 
			
		||||
}
 | 
			
		||||
def : InstRW<[AtomWrite01_125], (instrs DIVPDrr, DIVPDrm, SQRTPDr, SQRTPDm)>;
 | 
			
		||||
def : InstRW<[AtomWrite01_125], (instrs DIVPDrr, DIVPDrm)>;
 | 
			
		||||
 | 
			
		||||
def AtomWrite01_127 : SchedWriteRes<[AtomPort01]> {
 | 
			
		||||
  let Latency = 127;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -330,13 +330,22 @@ defm : JWriteResFpuPair<WriteDPPD,   [JFPU1, JFPM, JFPA],  9, [1, 3, 3],  3>;
 | 
			
		|||
defm : JWriteResFpuPair<WriteDPPS,   [JFPU1, JFPM, JFPA], 11, [1, 3, 3],  5>;
 | 
			
		||||
defm : JWriteResYMMPair<WriteDPPSY,  [JFPU1, JFPM, JFPA], 12, [2, 6, 6], 10>;
 | 
			
		||||
defm : JWriteResFpuPair<WriteFRcp,         [JFPU1, JFPM],  2>;
 | 
			
		||||
defm : JWriteResFpuPair<WriteFRcpX,        [JFPU1, JFPM],  2>;
 | 
			
		||||
defm : JWriteResYMMPair<WriteFRcpY,        [JFPU1, JFPM],  2, [2,2], 2>;
 | 
			
		||||
defm : JWriteResFpuPair<WriteFRsqrt,       [JFPU1, JFPM],  2>;
 | 
			
		||||
defm : JWriteResFpuPair<WriteFRsqrtX,      [JFPU1, JFPM],  2>;
 | 
			
		||||
defm : JWriteResYMMPair<WriteFRsqrtY,      [JFPU1, JFPM],  2, [2,2], 2>;
 | 
			
		||||
defm : JWriteResFpuPair<WriteFDiv,         [JFPU1, JFPM], 19, [1, 19]>;
 | 
			
		||||
defm : JWriteResYMMPair<WriteFDivY,        [JFPU1, JFPM], 38, [2, 38], 2>;
 | 
			
		||||
defm : JWriteResFpuPair<WriteFSqrt,        [JFPU1, JFPM], 21, [1, 21]>;
 | 
			
		||||
defm : JWriteResFpuPair<WriteFSqrtX,       [JFPU1, JFPM], 21, [1, 21]>;
 | 
			
		||||
defm : JWriteResYMMPair<WriteFSqrtY,       [JFPU1, JFPM], 42, [2, 42], 2>;
 | 
			
		||||
defm : JWriteResYMMPair<WriteFSqrtZ,       [JFPU1, JFPM], 42, [2, 42], 2>;
 | 
			
		||||
defm : JWriteResFpuPair<WriteFSqrt64,      [JFPU1, JFPM], 27, [1, 27]>;
 | 
			
		||||
defm : JWriteResFpuPair<WriteFSqrt64X,     [JFPU1, JFPM], 27, [1, 27]>;
 | 
			
		||||
defm : JWriteResYMMPair<WriteFSqrt64Y,     [JFPU1, JFPM], 54, [2, 54], 2>;
 | 
			
		||||
defm : JWriteResYMMPair<WriteFSqrt64Z,     [JFPU1, JFPM], 54, [2, 54], 2>;
 | 
			
		||||
defm : JWriteResFpuPair<WriteFSqrt80,      [JFPU1, JFPM], 35, [1, 35]>;
 | 
			
		||||
defm : JWriteResFpuPair<WriteFSign,        [JFPU1, JFPM],  2>;
 | 
			
		||||
defm : JWriteResFpuPair<WriteFRnd,         [JFPU1, JSTC],  3>;
 | 
			
		||||
defm : JWriteResYMMPair<WriteFRndY,        [JFPU1, JSTC],  3, [2,2], 2>;
 | 
			
		||||
| 
						 | 
				
			
			@ -667,36 +676,6 @@ def JWriteVTESTLd: SchedWriteRes<[JLAGU, JFPU0, JFPA, JALU0]> {
 | 
			
		|||
}
 | 
			
		||||
def : InstRW<[JWriteVTESTLd], (instrs PTESTrm, VPTESTrm, VTESTPDrm, VTESTPSrm)>;
 | 
			
		||||
 | 
			
		||||
def JWriteVSQRTPD: SchedWriteRes<[JFPU1, JFPM]> {
 | 
			
		||||
  let Latency = 27;
 | 
			
		||||
  let ResourceCycles = [1, 27];
 | 
			
		||||
}
 | 
			
		||||
def : InstRW<[JWriteVSQRTPD], (instrs SQRTPDr, VSQRTPDr,
 | 
			
		||||
                                      SQRTSDr, VSQRTSDr,
 | 
			
		||||
                                      SQRTSDr_Int, VSQRTSDr_Int)>;
 | 
			
		||||
 | 
			
		||||
def JWriteVSQRTPDLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> {
 | 
			
		||||
  let Latency = 32;
 | 
			
		||||
  let ResourceCycles = [1, 1, 27];
 | 
			
		||||
}
 | 
			
		||||
def : InstRW<[JWriteVSQRTPDLd], (instrs SQRTPDm, VSQRTPDm,
 | 
			
		||||
                                        SQRTSDm, VSQRTSDm,
 | 
			
		||||
                                        SQRTSDm_Int, VSQRTSDm_Int)>;
 | 
			
		||||
 | 
			
		||||
def JWriteVSQRTYPD: SchedWriteRes<[JFPU1, JFPM]> {
 | 
			
		||||
  let Latency = 54; // each uOp is 27cy.
 | 
			
		||||
  let ResourceCycles = [2, 54];
 | 
			
		||||
  let NumMicroOps = 2;
 | 
			
		||||
}
 | 
			
		||||
def : InstRW<[JWriteVSQRTYPD], (instrs VSQRTPDYr)>;
 | 
			
		||||
 | 
			
		||||
def JWriteVSQRTYPDLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> {
 | 
			
		||||
  let Latency = 59; // each uOp is 27cy (+5cy of memory load).
 | 
			
		||||
  let ResourceCycles = [2, 2, 54];
 | 
			
		||||
  let NumMicroOps = 2;
 | 
			
		||||
}
 | 
			
		||||
def : InstRW<[JWriteVSQRTYPDLd], (instrs VSQRTPDYm)>;
 | 
			
		||||
 | 
			
		||||
def JWriteJVZEROALL: SchedWriteRes<[]> {
 | 
			
		||||
  let Latency = 90;
 | 
			
		||||
  let NumMicroOps = 73;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -140,11 +140,20 @@ defm : SLMWriteResPair<WriteFMulY,  [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
 | 
			
		|||
defm : SLMWriteResPair<WriteFDiv,   [SLM_FPC_RSV0, SLMFPDivider], 34, [1,34]>;
 | 
			
		||||
defm : SLMWriteResPair<WriteFDivY,  [SLM_FPC_RSV0, SLMFPDivider], 34, [1,34]>;
 | 
			
		||||
defm : SLMWriteResPair<WriteFRcp,     [SLM_FPC_RSV0], 5>;
 | 
			
		||||
defm : SLMWriteResPair<WriteFRcpX,    [SLM_FPC_RSV0], 5>;
 | 
			
		||||
defm : SLMWriteResPair<WriteFRcpY,    [SLM_FPC_RSV0], 5>;
 | 
			
		||||
defm : SLMWriteResPair<WriteFRsqrt,   [SLM_FPC_RSV0], 5>;
 | 
			
		||||
defm : SLMWriteResPair<WriteFRsqrtX,  [SLM_FPC_RSV0], 5>;
 | 
			
		||||
defm : SLMWriteResPair<WriteFRsqrtY,  [SLM_FPC_RSV0], 5>;
 | 
			
		||||
defm : SLMWriteResPair<WriteFSqrt,  [SLM_FPC_RSV0], 15>;
 | 
			
		||||
defm : SLMWriteResPair<WriteFSqrtY, [SLM_FPC_RSV0], 15>;
 | 
			
		||||
defm : SLMWriteResPair<WriteFSqrt,    [SLM_FPC_RSV0,SLMFPDivider], 20, [1,20], 1, 3>;
 | 
			
		||||
defm : SLMWriteResPair<WriteFSqrtX,   [SLM_FPC_RSV0,SLMFPDivider], 41, [1,40], 1, 3>;
 | 
			
		||||
defm : SLMWriteResPair<WriteFSqrtY,   [SLM_FPC_RSV0,SLMFPDivider], 41, [1,40], 1, 3>;
 | 
			
		||||
defm : SLMWriteResPair<WriteFSqrtZ,   [SLM_FPC_RSV0,SLMFPDivider], 41, [1,40], 1, 3>;
 | 
			
		||||
defm : SLMWriteResPair<WriteFSqrt64,  [SLM_FPC_RSV0,SLMFPDivider], 35, [1,35], 1, 3>;
 | 
			
		||||
defm : SLMWriteResPair<WriteFSqrt64X, [SLM_FPC_RSV0,SLMFPDivider], 71, [1,70], 1, 3>;
 | 
			
		||||
defm : SLMWriteResPair<WriteFSqrt64Y, [SLM_FPC_RSV0,SLMFPDivider], 71, [1,70], 1, 3>;
 | 
			
		||||
defm : SLMWriteResPair<WriteFSqrt64Z, [SLM_FPC_RSV0,SLMFPDivider], 71, [1,70], 1, 3>;
 | 
			
		||||
defm : SLMWriteResPair<WriteFSqrt80,  [SLM_FPC_RSV0,SLMFPDivider], 40, [1,40]>;
 | 
			
		||||
defm : SLMWriteResPair<WriteDPPD,   [SLM_FPC_RSV1], 3>;
 | 
			
		||||
defm : SLMWriteResPair<WriteDPPS,   [SLM_FPC_RSV1], 3>;
 | 
			
		||||
defm : SLMWriteResPair<WriteDPPSY,  [SLM_FPC_RSV1], 3>;
 | 
			
		||||
| 
						 | 
				
			
			@ -382,60 +391,4 @@ def SLMriteResGroup8 : SchedWriteRes<[SLM_MEC_RSV,SLM_FPC_RSV0,SLMFPDivider]> {
 | 
			
		|||
}
 | 
			
		||||
def: InstRW<[SLMriteResGroup8], (instregex "(V?)DIVSSrm")>;
 | 
			
		||||
 | 
			
		||||
def SLMriteResGroup9 : SchedWriteRes<[SLM_FPC_RSV0,SLMFPDivider]> {
 | 
			
		||||
  let Latency = 71;
 | 
			
		||||
  let NumMicroOps = 1;
 | 
			
		||||
  let ResourceCycles = [1,70];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SLMriteResGroup9], (instregex "(V?)SQRTPDr")>;
 | 
			
		||||
 | 
			
		||||
def SLMriteResGroup10 : SchedWriteRes<[SLM_FPC_RSV0,SLMFPDivider]> {
 | 
			
		||||
  let Latency = 41;
 | 
			
		||||
  let NumMicroOps = 1;
 | 
			
		||||
  let ResourceCycles = [1,40];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SLMriteResGroup10], (instregex "(V?)SQRTPSr")>;
 | 
			
		||||
 | 
			
		||||
def SLMriteResGroup11 : SchedWriteRes<[SLM_FPC_RSV0,SLMFPDivider]> {
 | 
			
		||||
  let Latency = 35;
 | 
			
		||||
  let NumMicroOps = 1;
 | 
			
		||||
  let ResourceCycles = [1,35];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SLMriteResGroup11], (instregex "(V?)SQRTSDr")>;
 | 
			
		||||
 | 
			
		||||
def SLMriteResGroup12 : SchedWriteRes<[SLM_FPC_RSV0,SLMFPDivider]> {
 | 
			
		||||
  let Latency = 20;
 | 
			
		||||
  let NumMicroOps = 1;
 | 
			
		||||
  let ResourceCycles = [1,20];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SLMriteResGroup12], (instregex "(V?)SQRTSSr")>;
 | 
			
		||||
 | 
			
		||||
def SLMriteResGroup13 : SchedWriteRes<[SLM_MEC_RSV,SLM_FPC_RSV0,SLMFPDivider]> {
 | 
			
		||||
  let Latency = 74;
 | 
			
		||||
  let NumMicroOps = 1;
 | 
			
		||||
  let ResourceCycles = [1,1,70];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SLMriteResGroup13], (instregex "(V?)SQRTPDm")>;
 | 
			
		||||
 | 
			
		||||
def SLMriteResGroup14 : SchedWriteRes<[SLM_MEC_RSV,SLM_FPC_RSV0,SLMFPDivider]> {
 | 
			
		||||
  let Latency = 44;
 | 
			
		||||
  let NumMicroOps = 1;
 | 
			
		||||
  let ResourceCycles = [1,1,40];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SLMriteResGroup14], (instregex "(V?)SQRTPSm")>;
 | 
			
		||||
 | 
			
		||||
def SLMriteResGroup15 : SchedWriteRes<[SLM_MEC_RSV,SLM_FPC_RSV0,SLMFPDivider]> {
 | 
			
		||||
  let Latency = 38;
 | 
			
		||||
  let NumMicroOps = 1;
 | 
			
		||||
  let ResourceCycles = [1,1,35];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SLMriteResGroup15], (instregex "(V?)SQRTSDm")>;
 | 
			
		||||
 | 
			
		||||
def SLMriteResGroup16 : SchedWriteRes<[SLM_MEC_RSV,SLM_FPC_RSV0,SLMFPDivider]> {
 | 
			
		||||
  let Latency = 23;
 | 
			
		||||
  let NumMicroOps = 1;
 | 
			
		||||
  let ResourceCycles = [1,1,20];
 | 
			
		||||
}
 | 
			
		||||
def: InstRW<[SLMriteResGroup16], (instregex "(V?)SQRTSSm")>;
 | 
			
		||||
 | 
			
		||||
} // SchedModel
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -223,11 +223,20 @@ defm : ZnWriteResFpuPair<WriteFMA,       [ZnFPU03], 5>;
 | 
			
		|||
defm : ZnWriteResFpuPair<WriteFMAX,      [ZnFPU03], 5>;
 | 
			
		||||
defm : ZnWriteResFpuPair<WriteFMAY,      [ZnFPU03], 5>;
 | 
			
		||||
defm : ZnWriteResFpuPair<WriteFRcp,      [ZnFPU01], 5>;
 | 
			
		||||
defm : ZnWriteResFpuPair<WriteFRcpY,     [ZnFPU01], 5>;
 | 
			
		||||
defm : ZnWriteResFpuPair<WriteFRsqrt,    [ZnFPU01], 5>;
 | 
			
		||||
defm : ZnWriteResFpuPair<WriteFRsqrtY,   [ZnFPU01], 5>;
 | 
			
		||||
defm : ZnWriteResFpuPair<WriteFSqrt,     [ZnFPU3], 20>;
 | 
			
		||||
defm : ZnWriteResFpuPair<WriteFSqrtY,    [ZnFPU3], 20>;
 | 
			
		||||
defm : ZnWriteResFpuPair<WriteFRcpX,     [ZnFPU01], 5>;
 | 
			
		||||
//defm : ZnWriteResFpuPair<WriteFRcpY,     [ZnFPU01], 5, [1], 1, 7, 1>;
 | 
			
		||||
//defm : ZnWriteResFpuPair<WriteFRsqrt,    [ZnFPU02], 5>;
 | 
			
		||||
defm : ZnWriteResFpuPair<WriteFRsqrtX,   [ZnFPU01], 5, [1], 1, 7, 1>;
 | 
			
		||||
//defm : ZnWriteResFpuPair<WriteFRsqrtY,   [ZnFPU01], 5, [2], 2>;
 | 
			
		||||
defm : ZnWriteResFpuPair<WriteFSqrt,     [ZnFPU3], 20, [20]>;
 | 
			
		||||
defm : ZnWriteResFpuPair<WriteFSqrtX,    [ZnFPU3], 20, [20]>;
 | 
			
		||||
defm : ZnWriteResFpuPair<WriteFSqrtY,    [ZnFPU3], 28, [28], 1, 7, 1>;
 | 
			
		||||
defm : ZnWriteResFpuPair<WriteFSqrtZ,    [ZnFPU3], 28, [28], 1, 7, 1>;
 | 
			
		||||
defm : ZnWriteResFpuPair<WriteFSqrt64,   [ZnFPU3], 20, [20]>;
 | 
			
		||||
defm : ZnWriteResFpuPair<WriteFSqrt64X,  [ZnFPU3], 20, [20]>;
 | 
			
		||||
defm : ZnWriteResFpuPair<WriteFSqrt64Y,  [ZnFPU3], 40, [40], 1, 7, 1>;
 | 
			
		||||
defm : ZnWriteResFpuPair<WriteFSqrt64Z,  [ZnFPU3], 40, [40], 1, 7, 1>;
 | 
			
		||||
defm : ZnWriteResFpuPair<WriteFSqrt80,   [ZnFPU3], 20, [20]>;
 | 
			
		||||
def  : WriteRes<WriteCvtF2FSt, [ZnFPU3, ZnAGU]>;
 | 
			
		||||
 | 
			
		||||
// Vector integer operations which uses FPU units
 | 
			
		||||
| 
						 | 
				
			
			@ -1504,18 +1513,19 @@ def ZnWriteVDIVPDYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
 | 
			
		|||
def : InstRW<[ZnWriteVDIVPDYLd], (instregex "VDIVPDYrm")>;
 | 
			
		||||
 | 
			
		||||
// VRCPPS.
 | 
			
		||||
// TODO - convert to ZnWriteResFpuPair
 | 
			
		||||
// y,y.
 | 
			
		||||
def ZnWriteVRCPPSr : SchedWriteRes<[ZnFPU01]> {
 | 
			
		||||
def ZnWriteVRCPPSYr : SchedWriteRes<[ZnFPU01]> {
 | 
			
		||||
  let Latency = 5;
 | 
			
		||||
}
 | 
			
		||||
def : InstRW<[ZnWriteVRCPPSr], (instregex "VRCPPSYr")>;
 | 
			
		||||
def : SchedAlias<WriteFRcpY,   ZnWriteVRCPPSYr>;
 | 
			
		||||
 | 
			
		||||
// y,m256.
 | 
			
		||||
def ZnWriteVRCPPSLd : SchedWriteRes<[ZnAGU, ZnFPU01]> {
 | 
			
		||||
def ZnWriteVRCPPSYLd : SchedWriteRes<[ZnAGU, ZnFPU01]> {
 | 
			
		||||
  let Latency = 12;
 | 
			
		||||
  let NumMicroOps = 3;
 | 
			
		||||
}
 | 
			
		||||
def : InstRW<[ZnWriteVRCPPSLd], (instregex "VRCPPSYm")>;
 | 
			
		||||
def : SchedAlias<WriteFRcpYLd, ZnWriteVRCPPSYLd>;
 | 
			
		||||
 | 
			
		||||
// DPPS.
 | 
			
		||||
// x,x,i / v,v,v,i.
 | 
			
		||||
| 
						 | 
				
			
			@ -1533,83 +1543,38 @@ def : SchedAlias<WriteDPPD,   ZnWriteMicrocoded>;
 | 
			
		|||
// x,m,i.
 | 
			
		||||
def : SchedAlias<WriteDPPDLd, ZnWriteMicrocoded>;
 | 
			
		||||
 | 
			
		||||
// VSQRTPS.
 | 
			
		||||
// y,y.
 | 
			
		||||
def ZnWriteVSQRTPSYr : SchedWriteRes<[ZnFPU3]> {
 | 
			
		||||
  let Latency = 28;
 | 
			
		||||
  let ResourceCycles = [28];
 | 
			
		||||
}
 | 
			
		||||
def : InstRW<[ZnWriteVSQRTPSYr], (instregex "VSQRTPSYr")>;
 | 
			
		||||
 | 
			
		||||
// y,m256.
 | 
			
		||||
def ZnWriteVSQRTPSYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
 | 
			
		||||
  let Latency = 35;
 | 
			
		||||
  let ResourceCycles = [1,35];
 | 
			
		||||
  let NumMicroOps = 2;
 | 
			
		||||
}
 | 
			
		||||
def : InstRW<[ZnWriteVSQRTPSYLd], (instregex "VSQRTPSYm")>;
 | 
			
		||||
 | 
			
		||||
// VSQRTPD.
 | 
			
		||||
// y,y.
 | 
			
		||||
def ZnWriteVSQRTPDYr : SchedWriteRes<[ZnFPU3]> {
 | 
			
		||||
  let Latency = 40;
 | 
			
		||||
  let ResourceCycles = [40];
 | 
			
		||||
}
 | 
			
		||||
def : InstRW<[ZnWriteVSQRTPDYr], (instregex "VSQRTPDYr")>;
 | 
			
		||||
 | 
			
		||||
// y,m256.
 | 
			
		||||
def ZnWriteVSQRTPDYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
 | 
			
		||||
  let Latency = 47;
 | 
			
		||||
  let NumMicroOps = 2;
 | 
			
		||||
  let ResourceCycles = [1,47];
 | 
			
		||||
}
 | 
			
		||||
def : InstRW<[ZnWriteVSQRTPDYLd], (instregex "VSQRTPDYm")>;
 | 
			
		||||
 | 
			
		||||
// RSQRTSS
 | 
			
		||||
// TODO - convert to ZnWriteResFpuPair
 | 
			
		||||
// x,x.
 | 
			
		||||
def ZnWriteRSQRTSSr : SchedWriteRes<[ZnFPU02]> {
 | 
			
		||||
  let Latency = 5;
 | 
			
		||||
}
 | 
			
		||||
def : InstRW<[ZnWriteRSQRTSSr], (instregex "(V?)RSQRTSS(Y?)r")>;
 | 
			
		||||
def : SchedAlias<WriteFRsqrt, ZnWriteRSQRTSSr>;
 | 
			
		||||
 | 
			
		||||
// RSQRTPS
 | 
			
		||||
// x,x.
 | 
			
		||||
def ZnWriteRSQRTPSr : SchedWriteRes<[ZnFPU01]> {
 | 
			
		||||
  let Latency = 5;
 | 
			
		||||
}
 | 
			
		||||
def : InstRW<[ZnWriteRSQRTPSr], (instregex "(V?)RSQRTPSr")>;
 | 
			
		||||
 | 
			
		||||
// RSQRTSSm
 | 
			
		||||
// x,m128.
 | 
			
		||||
def ZnWriteRSQRTSSLd: SchedWriteRes<[ZnAGU, ZnFPU02]> {
 | 
			
		||||
  let Latency = 12;
 | 
			
		||||
  let NumMicroOps = 2;
 | 
			
		||||
  let ResourceCycles = [1,2];
 | 
			
		||||
  let ResourceCycles = [1,2]; // FIXME: Is this right?
 | 
			
		||||
}
 | 
			
		||||
def : InstRW<[ZnWriteRSQRTSSLd], (instregex "(V?)RSQRTSSm")>;
 | 
			
		||||
def : SchedAlias<WriteFRsqrtLd, ZnWriteRSQRTSSLd>;
 | 
			
		||||
 | 
			
		||||
// RSQRTPSm
 | 
			
		||||
def ZnWriteRSQRTPSLd : SchedWriteRes<[ZnAGU, ZnFPU01]> {
 | 
			
		||||
  let Latency = 12;
 | 
			
		||||
  let NumMicroOps = 2;
 | 
			
		||||
}
 | 
			
		||||
def : InstRW<[ZnWriteRSQRTPSLd], (instregex "(V?)RSQRTPSm")>;
 | 
			
		||||
 | 
			
		||||
// RSQRTPS 256.
 | 
			
		||||
// RSQRTPS
 | 
			
		||||
// TODO - convert to ZnWriteResFpuPair
 | 
			
		||||
// y,y.
 | 
			
		||||
def ZnWriteRSQRTPSYr : SchedWriteRes<[ZnFPU01]> {
 | 
			
		||||
  let Latency = 5;
 | 
			
		||||
  let NumMicroOps = 2;
 | 
			
		||||
  let ResourceCycles = [2];
 | 
			
		||||
}
 | 
			
		||||
def : InstRW<[ZnWriteRSQRTPSYr], (instregex "VRSQRTPSYr")>;
 | 
			
		||||
def : SchedAlias<WriteFRsqrtY, ZnWriteRSQRTPSYr>;
 | 
			
		||||
 | 
			
		||||
// y,m256.
 | 
			
		||||
def ZnWriteRSQRTPSYLd : SchedWriteRes<[ZnAGU, ZnFPU01]> {
 | 
			
		||||
  let Latency = 12;
 | 
			
		||||
  let NumMicroOps = 2;
 | 
			
		||||
}
 | 
			
		||||
def : InstRW<[ZnWriteRSQRTPSYLd], (instregex "VRSQRTPSYm")>;
 | 
			
		||||
def : SchedAlias<WriteFRsqrtYLd, ZnWriteRSQRTPSYLd>;
 | 
			
		||||
 | 
			
		||||
//-- Other instructions --//
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -4450,7 +4450,7 @@ define <4 x double> @test_sqrtpd(<4 x double> %a0, <4 x double> *%a1) {
 | 
			
		|||
;
 | 
			
		||||
; ZNVER1-LABEL: test_sqrtpd:
 | 
			
		||||
; ZNVER1:       # %bb.0:
 | 
			
		||||
; ZNVER1-NEXT:    vsqrtpd (%rdi), %ymm1 # sched: [47:47.00]
 | 
			
		||||
; ZNVER1-NEXT:    vsqrtpd (%rdi), %ymm1 # sched: [47:40.00]
 | 
			
		||||
; ZNVER1-NEXT:    vsqrtpd %ymm0, %ymm0 # sched: [40:40.00]
 | 
			
		||||
; ZNVER1-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 | 
			
		||||
; ZNVER1-NEXT:    retq # sched: [1:0.50]
 | 
			
		||||
| 
						 | 
				
			
			@ -4514,7 +4514,7 @@ define <8 x float> @test_sqrtps(<8 x float> %a0, <8 x float> *%a1) {
 | 
			
		|||
;
 | 
			
		||||
; ZNVER1-LABEL: test_sqrtps:
 | 
			
		||||
; ZNVER1:       # %bb.0:
 | 
			
		||||
; ZNVER1-NEXT:    vsqrtps (%rdi), %ymm1 # sched: [35:35.00]
 | 
			
		||||
; ZNVER1-NEXT:    vsqrtps (%rdi), %ymm1 # sched: [35:28.00]
 | 
			
		||||
; ZNVER1-NEXT:    vsqrtps %ymm0, %ymm0 # sched: [28:28.00]
 | 
			
		||||
; ZNVER1-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 | 
			
		||||
; ZNVER1-NEXT:    retq # sched: [1:0.50]
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -600,7 +600,7 @@ declare <16 x float> @llvm.sqrt.v16f32(<16 x float>)
 | 
			
		|||
define <16 x float> @sqrtD(<16 x float> %a) nounwind {
 | 
			
		||||
; GENERIC-LABEL: sqrtD:
 | 
			
		||||
; GENERIC:       # %bb.0:
 | 
			
		||||
; GENERIC-NEXT:    vsqrtps %zmm0, %zmm0 # sched: [14:1.00]
 | 
			
		||||
; GENERIC-NEXT:    vsqrtps %zmm0, %zmm0 # sched: [29:28.00]
 | 
			
		||||
; GENERIC-NEXT:    retq # sched: [1:1.00]
 | 
			
		||||
;
 | 
			
		||||
; SKX-LABEL: sqrtD:
 | 
			
		||||
| 
						 | 
				
			
			@ -615,7 +615,7 @@ declare <8 x double> @llvm.sqrt.v8f64(<8 x double>)
 | 
			
		|||
define <8 x double> @sqrtE(<8 x double> %a) nounwind {
 | 
			
		||||
; GENERIC-LABEL: sqrtE:
 | 
			
		||||
; GENERIC:       # %bb.0:
 | 
			
		||||
; GENERIC-NEXT:    vsqrtpd %zmm0, %zmm0 # sched: [14:1.00]
 | 
			
		||||
; GENERIC-NEXT:    vsqrtpd %zmm0, %zmm0 # sched: [45:44.00]
 | 
			
		||||
; GENERIC-NEXT:    retq # sched: [1:1.00]
 | 
			
		||||
;
 | 
			
		||||
; SKX-LABEL: sqrtE:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1024,7 +1024,7 @@ define <16 x float> @v16f32_one_step(<16 x float> %x) #1 {
 | 
			
		|||
;
 | 
			
		||||
; KNL-LABEL: v16f32_one_step:
 | 
			
		||||
; KNL:       # %bb.0:
 | 
			
		||||
; KNL-NEXT:    vrcp14ps %zmm0, %zmm1 # sched: [5:1.00]
 | 
			
		||||
; KNL-NEXT:    vrcp14ps %zmm0, %zmm1 # sched: [11:2.00]
 | 
			
		||||
; KNL-NEXT:    vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [12:0.50]
 | 
			
		||||
; KNL-NEXT:    vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [5:0.50]
 | 
			
		||||
; KNL-NEXT:    retq # sched: [7:1.00]
 | 
			
		||||
| 
						 | 
				
			
			@ -1224,7 +1224,7 @@ define <16 x float> @v16f32_two_step(<16 x float> %x) #2 {
 | 
			
		|||
;
 | 
			
		||||
; KNL-LABEL: v16f32_two_step:
 | 
			
		||||
; KNL:       # %bb.0:
 | 
			
		||||
; KNL-NEXT:    vrcp14ps %zmm0, %zmm1 # sched: [5:1.00]
 | 
			
		||||
; KNL-NEXT:    vrcp14ps %zmm0, %zmm1 # sched: [11:2.00]
 | 
			
		||||
; KNL-NEXT:    vbroadcastss {{.*#+}} zmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] sched: [10:1.00]
 | 
			
		||||
; KNL-NEXT:    vmovaps %zmm1, %zmm3 # sched: [1:1.00]
 | 
			
		||||
; KNL-NEXT:    vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [5:0.50]
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1323,7 +1323,7 @@ define <16 x float> @v16f32_one_step2(<16 x float> %x) #1 {
 | 
			
		|||
;
 | 
			
		||||
; KNL-LABEL: v16f32_one_step2:
 | 
			
		||||
; KNL:       # %bb.0:
 | 
			
		||||
; KNL-NEXT:    vrcp14ps %zmm0, %zmm1 # sched: [5:1.00]
 | 
			
		||||
; KNL-NEXT:    vrcp14ps %zmm0, %zmm1 # sched: [11:2.00]
 | 
			
		||||
; KNL-NEXT:    vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [12:0.50]
 | 
			
		||||
; KNL-NEXT:    vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [5:0.50]
 | 
			
		||||
; KNL-NEXT:    vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [12:0.50]
 | 
			
		||||
| 
						 | 
				
			
			@ -1489,7 +1489,7 @@ define <16 x float> @v16f32_one_step_2_divs(<16 x float> %x) #1 {
 | 
			
		|||
;
 | 
			
		||||
; KNL-LABEL: v16f32_one_step_2_divs:
 | 
			
		||||
; KNL:       # %bb.0:
 | 
			
		||||
; KNL-NEXT:    vrcp14ps %zmm0, %zmm1 # sched: [5:1.00]
 | 
			
		||||
; KNL-NEXT:    vrcp14ps %zmm0, %zmm1 # sched: [11:2.00]
 | 
			
		||||
; KNL-NEXT:    vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [12:0.50]
 | 
			
		||||
; KNL-NEXT:    vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [5:0.50]
 | 
			
		||||
; KNL-NEXT:    vmulps {{.*}}(%rip), %zmm0, %zmm1 # sched: [12:0.50]
 | 
			
		||||
| 
						 | 
				
			
			@ -1709,7 +1709,7 @@ define <16 x float> @v16f32_two_step2(<16 x float> %x) #2 {
 | 
			
		|||
;
 | 
			
		||||
; KNL-LABEL: v16f32_two_step2:
 | 
			
		||||
; KNL:       # %bb.0:
 | 
			
		||||
; KNL-NEXT:    vrcp14ps %zmm0, %zmm1 # sched: [5:1.00]
 | 
			
		||||
; KNL-NEXT:    vrcp14ps %zmm0, %zmm1 # sched: [11:2.00]
 | 
			
		||||
; KNL-NEXT:    vbroadcastss {{.*#+}} zmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] sched: [10:1.00]
 | 
			
		||||
; KNL-NEXT:    vmovaps %zmm1, %zmm3 # sched: [1:1.00]
 | 
			
		||||
; KNL-NEXT:    vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [5:0.50]
 | 
			
		||||
| 
						 | 
				
			
			@ -1781,7 +1781,7 @@ define <16 x float> @v16f32_no_step(<16 x float> %x) #3 {
 | 
			
		|||
;
 | 
			
		||||
; KNL-LABEL: v16f32_no_step:
 | 
			
		||||
; KNL:       # %bb.0:
 | 
			
		||||
; KNL-NEXT:    vrcp14ps %zmm0, %zmm0 # sched: [5:1.00]
 | 
			
		||||
; KNL-NEXT:    vrcp14ps %zmm0, %zmm0 # sched: [11:2.00]
 | 
			
		||||
; KNL-NEXT:    retq # sched: [7:1.00]
 | 
			
		||||
;
 | 
			
		||||
; SKX-LABEL: v16f32_no_step:
 | 
			
		||||
| 
						 | 
				
			
			@ -1855,7 +1855,7 @@ define <16 x float> @v16f32_no_step2(<16 x float> %x) #3 {
 | 
			
		|||
;
 | 
			
		||||
; KNL-LABEL: v16f32_no_step2:
 | 
			
		||||
; KNL:       # %bb.0:
 | 
			
		||||
; KNL-NEXT:    vrcp14ps %zmm0, %zmm0 # sched: [5:1.00]
 | 
			
		||||
; KNL-NEXT:    vrcp14ps %zmm0, %zmm0 # sched: [11:2.00]
 | 
			
		||||
; KNL-NEXT:    vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [12:0.50]
 | 
			
		||||
; KNL-NEXT:    retq # sched: [7:1.00]
 | 
			
		||||
;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -5007,15 +5007,15 @@ define <4 x float> @test_sqrtps(<4 x float> %a0, <4 x float> *%a1) {
 | 
			
		|||
;
 | 
			
		||||
; ZNVER1-SSE-LABEL: test_sqrtps:
 | 
			
		||||
; ZNVER1-SSE:       # %bb.0:
 | 
			
		||||
; ZNVER1-SSE-NEXT:    sqrtps %xmm0, %xmm1 # sched: [20:1.00]
 | 
			
		||||
; ZNVER1-SSE-NEXT:    sqrtps (%rdi), %xmm0 # sched: [27:1.00]
 | 
			
		||||
; ZNVER1-SSE-NEXT:    sqrtps %xmm0, %xmm1 # sched: [20:20.00]
 | 
			
		||||
; ZNVER1-SSE-NEXT:    sqrtps (%rdi), %xmm0 # sched: [27:20.00]
 | 
			
		||||
; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
 | 
			
		||||
; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
 | 
			
		||||
;
 | 
			
		||||
; ZNVER1-LABEL: test_sqrtps:
 | 
			
		||||
; ZNVER1:       # %bb.0:
 | 
			
		||||
; ZNVER1-NEXT:    vsqrtps (%rdi), %xmm1 # sched: [27:1.00]
 | 
			
		||||
; ZNVER1-NEXT:    vsqrtps %xmm0, %xmm0 # sched: [20:1.00]
 | 
			
		||||
; ZNVER1-NEXT:    vsqrtps (%rdi), %xmm1 # sched: [27:20.00]
 | 
			
		||||
; ZNVER1-NEXT:    vsqrtps %xmm0, %xmm0 # sched: [20:20.00]
 | 
			
		||||
; ZNVER1-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 | 
			
		||||
; ZNVER1-NEXT:    retq # sched: [1:0.50]
 | 
			
		||||
  %1 = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0)
 | 
			
		||||
| 
						 | 
				
			
			@ -5152,16 +5152,16 @@ define <4 x float> @test_sqrtss(<4 x float> %a0, <4 x float> *%a1) {
 | 
			
		|||
; ZNVER1-SSE-LABEL: test_sqrtss:
 | 
			
		||||
; ZNVER1-SSE:       # %bb.0:
 | 
			
		||||
; ZNVER1-SSE-NEXT:    movaps (%rdi), %xmm1 # sched: [8:0.50]
 | 
			
		||||
; ZNVER1-SSE-NEXT:    sqrtss %xmm0, %xmm0 # sched: [20:1.00]
 | 
			
		||||
; ZNVER1-SSE-NEXT:    sqrtss %xmm1, %xmm1 # sched: [20:1.00]
 | 
			
		||||
; ZNVER1-SSE-NEXT:    sqrtss %xmm0, %xmm0 # sched: [20:20.00]
 | 
			
		||||
; ZNVER1-SSE-NEXT:    sqrtss %xmm1, %xmm1 # sched: [20:20.00]
 | 
			
		||||
; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
 | 
			
		||||
; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
 | 
			
		||||
;
 | 
			
		||||
; ZNVER1-LABEL: test_sqrtss:
 | 
			
		||||
; ZNVER1:       # %bb.0:
 | 
			
		||||
; ZNVER1-NEXT:    vmovaps (%rdi), %xmm1 # sched: [8:0.50]
 | 
			
		||||
; ZNVER1-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0 # sched: [20:1.00]
 | 
			
		||||
; ZNVER1-NEXT:    vsqrtss %xmm1, %xmm1, %xmm1 # sched: [20:1.00]
 | 
			
		||||
; ZNVER1-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0 # sched: [20:20.00]
 | 
			
		||||
; ZNVER1-NEXT:    vsqrtss %xmm1, %xmm1, %xmm1 # sched: [20:20.00]
 | 
			
		||||
; ZNVER1-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 | 
			
		||||
; ZNVER1-NEXT:    retq # sched: [1:0.50]
 | 
			
		||||
  %1 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -14247,15 +14247,15 @@ define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) {
 | 
			
		|||
;
 | 
			
		||||
; ZNVER1-SSE-LABEL: test_sqrtpd:
 | 
			
		||||
; ZNVER1-SSE:       # %bb.0:
 | 
			
		||||
; ZNVER1-SSE-NEXT:    sqrtpd %xmm0, %xmm1 # sched: [20:1.00]
 | 
			
		||||
; ZNVER1-SSE-NEXT:    sqrtpd (%rdi), %xmm0 # sched: [27:1.00]
 | 
			
		||||
; ZNVER1-SSE-NEXT:    sqrtpd %xmm0, %xmm1 # sched: [20:20.00]
 | 
			
		||||
; ZNVER1-SSE-NEXT:    sqrtpd (%rdi), %xmm0 # sched: [27:20.00]
 | 
			
		||||
; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
 | 
			
		||||
; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
 | 
			
		||||
;
 | 
			
		||||
; ZNVER1-LABEL: test_sqrtpd:
 | 
			
		||||
; ZNVER1:       # %bb.0:
 | 
			
		||||
; ZNVER1-NEXT:    vsqrtpd (%rdi), %xmm1 # sched: [27:1.00]
 | 
			
		||||
; ZNVER1-NEXT:    vsqrtpd %xmm0, %xmm0 # sched: [20:1.00]
 | 
			
		||||
; ZNVER1-NEXT:    vsqrtpd (%rdi), %xmm1 # sched: [27:20.00]
 | 
			
		||||
; ZNVER1-NEXT:    vsqrtpd %xmm0, %xmm0 # sched: [20:20.00]
 | 
			
		||||
; ZNVER1-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 | 
			
		||||
; ZNVER1-NEXT:    retq # sched: [1:0.50]
 | 
			
		||||
  %1 = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0)
 | 
			
		||||
| 
						 | 
				
			
			@ -14392,16 +14392,16 @@ define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) {
 | 
			
		|||
; ZNVER1-SSE-LABEL: test_sqrtsd:
 | 
			
		||||
; ZNVER1-SSE:       # %bb.0:
 | 
			
		||||
; ZNVER1-SSE-NEXT:    movapd (%rdi), %xmm1 # sched: [8:0.50]
 | 
			
		||||
; ZNVER1-SSE-NEXT:    sqrtsd %xmm0, %xmm0 # sched: [20:1.00]
 | 
			
		||||
; ZNVER1-SSE-NEXT:    sqrtsd %xmm1, %xmm1 # sched: [20:1.00]
 | 
			
		||||
; ZNVER1-SSE-NEXT:    sqrtsd %xmm0, %xmm0 # sched: [20:20.00]
 | 
			
		||||
; ZNVER1-SSE-NEXT:    sqrtsd %xmm1, %xmm1 # sched: [20:20.00]
 | 
			
		||||
; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
 | 
			
		||||
; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
 | 
			
		||||
;
 | 
			
		||||
; ZNVER1-LABEL: test_sqrtsd:
 | 
			
		||||
; ZNVER1:       # %bb.0:
 | 
			
		||||
; ZNVER1-NEXT:    vmovapd (%rdi), %xmm1 # sched: [8:0.50]
 | 
			
		||||
; ZNVER1-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [20:1.00]
 | 
			
		||||
; ZNVER1-NEXT:    vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [20:1.00]
 | 
			
		||||
; ZNVER1-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [20:20.00]
 | 
			
		||||
; ZNVER1-NEXT:    vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [20:20.00]
 | 
			
		||||
; ZNVER1-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 | 
			
		||||
; ZNVER1-NEXT:    retq # sched: [1:0.50]
 | 
			
		||||
  %1 = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -4083,56 +4083,56 @@ define void @test_fsqrt() optsize {
 | 
			
		|||
; SLM-LABEL: test_fsqrt:
 | 
			
		||||
; SLM:       # %bb.0:
 | 
			
		||||
; SLM-NEXT:    #APP
 | 
			
		||||
; SLM-NEXT:    fsqrt # sched: [15:1.00]
 | 
			
		||||
; SLM-NEXT:    fsqrt # sched: [40:40.00]
 | 
			
		||||
; SLM-NEXT:    #NO_APP
 | 
			
		||||
; SLM-NEXT:    retl # sched: [4:1.00]
 | 
			
		||||
;
 | 
			
		||||
; SANDY-LABEL: test_fsqrt:
 | 
			
		||||
; SANDY:       # %bb.0:
 | 
			
		||||
; SANDY-NEXT:    #APP
 | 
			
		||||
; SANDY-NEXT:    fsqrt # sched: [14:1.00]
 | 
			
		||||
; SANDY-NEXT:    fsqrt # sched: [24:24.00]
 | 
			
		||||
; SANDY-NEXT:    #NO_APP
 | 
			
		||||
; SANDY-NEXT:    retl # sched: [6:1.00]
 | 
			
		||||
;
 | 
			
		||||
; HASWELL-LABEL: test_fsqrt:
 | 
			
		||||
; HASWELL:       # %bb.0:
 | 
			
		||||
; HASWELL-NEXT:    #APP
 | 
			
		||||
; HASWELL-NEXT:    fsqrt # sched: [15:1.00]
 | 
			
		||||
; HASWELL-NEXT:    fsqrt # sched: [23:17.00]
 | 
			
		||||
; HASWELL-NEXT:    #NO_APP
 | 
			
		||||
; HASWELL-NEXT:    retl # sched: [7:1.00]
 | 
			
		||||
;
 | 
			
		||||
; BROADWELL-LABEL: test_fsqrt:
 | 
			
		||||
; BROADWELL:       # %bb.0:
 | 
			
		||||
; BROADWELL-NEXT:    #APP
 | 
			
		||||
; BROADWELL-NEXT:    fsqrt # sched: [15:1.00]
 | 
			
		||||
; BROADWELL-NEXT:    fsqrt # sched: [23:9.00]
 | 
			
		||||
; BROADWELL-NEXT:    #NO_APP
 | 
			
		||||
; BROADWELL-NEXT:    retl # sched: [6:0.50]
 | 
			
		||||
;
 | 
			
		||||
; SKYLAKE-LABEL: test_fsqrt:
 | 
			
		||||
; SKYLAKE:       # %bb.0:
 | 
			
		||||
; SKYLAKE-NEXT:    #APP
 | 
			
		||||
; SKYLAKE-NEXT:    fsqrt # sched: [15:1.00]
 | 
			
		||||
; SKYLAKE-NEXT:    fsqrt # sched: [21:7.00]
 | 
			
		||||
; SKYLAKE-NEXT:    #NO_APP
 | 
			
		||||
; SKYLAKE-NEXT:    retl # sched: [6:0.50]
 | 
			
		||||
;
 | 
			
		||||
; SKX-LABEL: test_fsqrt:
 | 
			
		||||
; SKX:       # %bb.0:
 | 
			
		||||
; SKX-NEXT:    #APP
 | 
			
		||||
; SKX-NEXT:    fsqrt # sched: [15:1.00]
 | 
			
		||||
; SKX-NEXT:    fsqrt # sched: [21:7.00]
 | 
			
		||||
; SKX-NEXT:    #NO_APP
 | 
			
		||||
; SKX-NEXT:    retl # sched: [6:0.50]
 | 
			
		||||
;
 | 
			
		||||
; BTVER2-LABEL: test_fsqrt:
 | 
			
		||||
; BTVER2:       # %bb.0:
 | 
			
		||||
; BTVER2-NEXT:    #APP
 | 
			
		||||
; BTVER2-NEXT:    fsqrt # sched: [21:21.00]
 | 
			
		||||
; BTVER2-NEXT:    fsqrt # sched: [35:35.00]
 | 
			
		||||
; BTVER2-NEXT:    #NO_APP
 | 
			
		||||
; BTVER2-NEXT:    retl # sched: [4:1.00]
 | 
			
		||||
;
 | 
			
		||||
; ZNVER1-LABEL: test_fsqrt:
 | 
			
		||||
; ZNVER1:       # %bb.0:
 | 
			
		||||
; ZNVER1-NEXT:    #APP
 | 
			
		||||
; ZNVER1-NEXT:    fsqrt # sched: [20:1.00]
 | 
			
		||||
; ZNVER1-NEXT:    fsqrt # sched: [20:20.00]
 | 
			
		||||
; ZNVER1-NEXT:    #NO_APP
 | 
			
		||||
; ZNVER1-NEXT:    retl # sched: [1:0.50]
 | 
			
		||||
  tail call void asm sideeffect "fsqrt", ""() nounwind
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -307,7 +307,7 @@ fyl2xp1
 | 
			
		|||
# CHECK-NEXT:  1      100   0.25                  * 	fscale
 | 
			
		||||
# CHECK-NEXT:  1      100   0.25                  * 	fsin
 | 
			
		||||
# CHECK-NEXT:  1      100   0.25                  * 	fsincos
 | 
			
		||||
# CHECK-NEXT:  1      15    1.00                  * 	fsqrt
 | 
			
		||||
# CHECK-NEXT:  1      23    9.00                  * 	fsqrt
 | 
			
		||||
# CHECK-NEXT:  1      1     0.25                  * 	fst	%st(0)
 | 
			
		||||
# CHECK-NEXT:  1      1     1.00           *      * 	fsts	(%edx)
 | 
			
		||||
# CHECK-NEXT:  1      1     1.00           *      * 	fstl	(%ecx)
 | 
			
		||||
| 
						 | 
				
			
			@ -369,7 +369,7 @@ fyl2xp1
 | 
			
		|||
 | 
			
		||||
# CHECK:      Resource pressure per iteration:
 | 
			
		||||
# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
 | 
			
		||||
# CHECK-NEXT:  -      -     111.67 140.67 49.00  49.00  27.00  60.67  71.00  9.00
 | 
			
		||||
# CHECK-NEXT:  -     9.00   111.67 140.67 49.00  49.00  27.00  60.67  71.00  9.00
 | 
			
		||||
 | 
			
		||||
# CHECK:      Resource pressure by instruction:
 | 
			
		||||
# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    	Instructions:
 | 
			
		||||
| 
						 | 
				
			
			@ -474,7 +474,7 @@ fyl2xp1
 | 
			
		|||
# CHECK-NEXT:  -      -     0.25   0.25    -      -      -     0.25   0.25    -     	fscale
 | 
			
		||||
# CHECK-NEXT:  -      -     0.25   0.25    -      -      -     0.25   0.25    -     	fsin
 | 
			
		||||
# CHECK-NEXT:  -      -     0.25   0.25    -      -      -     0.25   0.25    -     	fsincos
 | 
			
		||||
# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     	fsqrt
 | 
			
		||||
# CHECK-NEXT:  -     9.00   1.00    -      -      -      -      -      -      -     	fsqrt
 | 
			
		||||
# CHECK-NEXT:  -      -     0.25   0.25    -      -      -     0.25   0.25    -     	fst	%st(0)
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -     0.33   0.33   1.00    -      -     0.33   	fsts	(%edx)
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -     0.33   0.33   1.00    -      -     0.33   	fstl	(%ecx)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -307,7 +307,7 @@ fyl2xp1
 | 
			
		|||
# CHECK-NEXT:  1      100   0.50                  * 	fscale
 | 
			
		||||
# CHECK-NEXT:  1      100   0.50                  * 	fsin
 | 
			
		||||
# CHECK-NEXT:  1      100   0.50                  * 	fsincos
 | 
			
		||||
# CHECK-NEXT:  1      21    21.00                 * 	fsqrt
 | 
			
		||||
# CHECK-NEXT:  1      35    35.00                 * 	fsqrt
 | 
			
		||||
# CHECK-NEXT:  1      1     0.50                  * 	fst	%st(0)
 | 
			
		||||
# CHECK-NEXT:  1      1     1.00           *      * 	fsts	(%edx)
 | 
			
		||||
# CHECK-NEXT:  1      1     1.00           *      * 	fstl	(%ecx)
 | 
			
		||||
| 
						 | 
				
			
			@ -373,7 +373,7 @@ fyl2xp1
 | 
			
		|||
 | 
			
		||||
# CHECK:      Resource pressure per iteration:
 | 
			
		||||
# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]
 | 
			
		||||
# CHECK-NEXT: 44.50  22.50   -     54.00  335.00 54.00  27.00  39.00   -     13.00   -      -      -      -
 | 
			
		||||
# CHECK-NEXT: 44.50  22.50   -     54.00  349.00 54.00  27.00  39.00   -     13.00   -      -      -      -
 | 
			
		||||
 | 
			
		||||
# CHECK:      Resource pressure by instruction:
 | 
			
		||||
# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]   	Instructions:
 | 
			
		||||
| 
						 | 
				
			
			@ -478,7 +478,7 @@ fyl2xp1
 | 
			
		|||
# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     	fscale
 | 
			
		||||
# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     	fsin
 | 
			
		||||
# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     	fsincos
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -     21.00   -     1.00    -      -      -      -      -      -      -     	fsqrt
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -     35.00   -     1.00    -      -      -      -      -      -      -     	fsqrt
 | 
			
		||||
# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     	fst	%st(0)
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00    -      -      -      -     	fsts	(%edx)
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00    -      -      -      -     	fstl	(%ecx)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -307,7 +307,7 @@ fyl2xp1
 | 
			
		|||
# CHECK-NEXT:  50     75     -                    * 	fscale
 | 
			
		||||
# CHECK-NEXT:  1      100   0.25                  * 	fsin
 | 
			
		||||
# CHECK-NEXT:  1      100   0.25                  * 	fsincos
 | 
			
		||||
# CHECK-NEXT:  1      15    1.00                  * 	fsqrt
 | 
			
		||||
# CHECK-NEXT:  1      23    17.00                 * 	fsqrt
 | 
			
		||||
# CHECK-NEXT:  1      1     0.50                  * 	fst	%st(0)
 | 
			
		||||
# CHECK-NEXT:  1      1     1.00           *      * 	fsts	(%edx)
 | 
			
		||||
# CHECK-NEXT:  1      1     1.00           *      * 	fstl	(%ecx)
 | 
			
		||||
| 
						 | 
				
			
			@ -369,7 +369,7 @@ fyl2xp1
 | 
			
		|||
 | 
			
		||||
# CHECK:      Resource pressure per iteration:
 | 
			
		||||
# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
 | 
			
		||||
# CHECK-NEXT:  -      -     117.92 141.92 49.00  49.00  27.00  56.92  65.25  9.00
 | 
			
		||||
# CHECK-NEXT:  -     17.00  117.92 141.92 49.00  49.00  27.00  56.92  65.25  9.00
 | 
			
		||||
 | 
			
		||||
# CHECK:      Resource pressure by instruction:
 | 
			
		||||
# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    	Instructions:
 | 
			
		||||
| 
						 | 
				
			
			@ -474,7 +474,7 @@ fyl2xp1
 | 
			
		|||
# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     	fscale
 | 
			
		||||
# CHECK-NEXT:  -      -     0.25   0.25    -      -      -     0.25   0.25    -     	fsin
 | 
			
		||||
# CHECK-NEXT:  -      -     0.25   0.25    -      -      -     0.25   0.25    -     	fsincos
 | 
			
		||||
# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     	fsqrt
 | 
			
		||||
# CHECK-NEXT:  -     17.00  1.00    -      -      -      -      -      -      -     	fsqrt
 | 
			
		||||
# CHECK-NEXT:  -      -     0.50   0.50    -      -      -      -      -      -     	fst	%st(0)
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -     0.33   0.33   1.00    -      -     0.33   	fsts	(%edx)
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -     0.33   0.33   1.00    -      -     0.33   	fstl	(%ecx)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -307,7 +307,7 @@ fyl2xp1
 | 
			
		|||
# CHECK-NEXT:  1      100   1.00                  * 	fscale
 | 
			
		||||
# CHECK-NEXT:  1      100   1.00                  * 	fsin
 | 
			
		||||
# CHECK-NEXT:  1      100   1.00                  * 	fsincos
 | 
			
		||||
# CHECK-NEXT:  1      15    1.00                  * 	fsqrt
 | 
			
		||||
# CHECK-NEXT:  1      40    40.00                 * 	fsqrt
 | 
			
		||||
# CHECK-NEXT:  1      1     0.50                  * 	fst	%st(0)
 | 
			
		||||
# CHECK-NEXT:  1      1     1.00           *      * 	fsts	(%edx)
 | 
			
		||||
# CHECK-NEXT:  1      1     1.00           *      * 	fstl	(%ecx)
 | 
			
		||||
| 
						 | 
				
			
			@ -367,7 +367,7 @@ fyl2xp1
 | 
			
		|||
 | 
			
		||||
# CHECK:      Resource pressure per iteration:
 | 
			
		||||
# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]
 | 
			
		||||
# CHECK-NEXT:  -     544.00 16.00  64.00  55.00  9.50   9.50   52.00
 | 
			
		||||
# CHECK-NEXT:  -     584.00 16.00  64.00  55.00  9.50   9.50   52.00
 | 
			
		||||
 | 
			
		||||
# CHECK:      Resource pressure by instruction:
 | 
			
		||||
# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    	Instructions:
 | 
			
		||||
| 
						 | 
				
			
			@ -472,7 +472,7 @@ fyl2xp1
 | 
			
		|||
# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     	fscale
 | 
			
		||||
# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     	fsin
 | 
			
		||||
# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     	fsincos
 | 
			
		||||
# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     	fsqrt
 | 
			
		||||
# CHECK-NEXT:  -     40.00   -     1.00    -      -      -      -     	fsqrt
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -      -     0.50   0.50    -     	fst	%st(0)
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -      -     0.50   0.50   1.00   	fsts	(%edx)
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -      -     0.50   0.50   1.00   	fstl	(%ecx)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -307,7 +307,7 @@ fyl2xp1
 | 
			
		|||
# CHECK-NEXT:  1      100   0.33                  * 	fscale
 | 
			
		||||
# CHECK-NEXT:  1      100   0.33                  * 	fsin
 | 
			
		||||
# CHECK-NEXT:  1      100   0.33                  * 	fsincos
 | 
			
		||||
# CHECK-NEXT:  1      14    1.00                  * 	fsqrt
 | 
			
		||||
# CHECK-NEXT:  1      24    24.00                 * 	fsqrt
 | 
			
		||||
# CHECK-NEXT:  1      1     1.00                  * 	fst	%st(0)
 | 
			
		||||
# CHECK-NEXT:  3      6     1.00           *      * 	fsts	(%edx)
 | 
			
		||||
# CHECK-NEXT:  3      6     1.00           *      * 	fstl	(%ecx)
 | 
			
		||||
| 
						 | 
				
			
			@ -367,7 +367,7 @@ fyl2xp1
 | 
			
		|||
 | 
			
		||||
# CHECK:      Resource pressure per iteration:
 | 
			
		||||
# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]
 | 
			
		||||
# CHECK-NEXT:  -      -     48.33  87.33  17.00  54.33  34.00  34.00
 | 
			
		||||
# CHECK-NEXT:  -     24.00  48.33  87.33  17.00  54.33  34.00  34.00
 | 
			
		||||
 | 
			
		||||
# CHECK:      Resource pressure by instruction:
 | 
			
		||||
# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]  	Instructions:
 | 
			
		||||
| 
						 | 
				
			
			@ -472,7 +472,7 @@ fyl2xp1
 | 
			
		|||
# CHECK-NEXT:  -      -     0.33   0.33    -     0.33    -      -     	fscale
 | 
			
		||||
# CHECK-NEXT:  -      -     0.33   0.33    -     0.33    -      -     	fsin
 | 
			
		||||
# CHECK-NEXT:  -      -     0.33   0.33    -     0.33    -      -     	fsincos
 | 
			
		||||
# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     	fsqrt
 | 
			
		||||
# CHECK-NEXT:  -     24.00  1.00    -      -      -      -      -     	fsqrt
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     	fst	%st(0)
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -     1.00    -     1.00   1.00   	fsts	(%edx)
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -     1.00    -     1.00   1.00   	fstl	(%ecx)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -307,7 +307,7 @@ fyl2xp1
 | 
			
		|||
# CHECK-NEXT:  1      100   0.25                  * 	fscale
 | 
			
		||||
# CHECK-NEXT:  1      100   0.25                  * 	fsin
 | 
			
		||||
# CHECK-NEXT:  1      100   0.25                  * 	fsincos
 | 
			
		||||
# CHECK-NEXT:  1      15    1.00                  * 	fsqrt
 | 
			
		||||
# CHECK-NEXT:  1      21    7.00                  * 	fsqrt
 | 
			
		||||
# CHECK-NEXT:  1      1     0.25                  * 	fst	%st(0)
 | 
			
		||||
# CHECK-NEXT:  1      1     1.00           *      * 	fsts	(%edx)
 | 
			
		||||
# CHECK-NEXT:  1      1     1.00           *      * 	fstl	(%ecx)
 | 
			
		||||
| 
						 | 
				
			
			@ -369,7 +369,7 @@ fyl2xp1
 | 
			
		|||
 | 
			
		||||
# CHECK:      Resource pressure per iteration:
 | 
			
		||||
# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
 | 
			
		||||
# CHECK-NEXT:  -      -     121.50 53.50  49.00  49.00  27.00  144.50 70.50  9.00
 | 
			
		||||
# CHECK-NEXT:  -     7.00   121.50 53.50  49.00  49.00  27.00  144.50 70.50  9.00
 | 
			
		||||
 | 
			
		||||
# CHECK:      Resource pressure by instruction:
 | 
			
		||||
# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    	Instructions:
 | 
			
		||||
| 
						 | 
				
			
			@ -474,7 +474,7 @@ fyl2xp1
 | 
			
		|||
# CHECK-NEXT:  -      -     0.25   0.25    -      -      -     0.25   0.25    -     	fscale
 | 
			
		||||
# CHECK-NEXT:  -      -     0.25   0.25    -      -      -     0.25   0.25    -     	fsin
 | 
			
		||||
# CHECK-NEXT:  -      -     0.25   0.25    -      -      -     0.25   0.25    -     	fsincos
 | 
			
		||||
# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     	fsqrt
 | 
			
		||||
# CHECK-NEXT:  -     7.00   1.00    -      -      -      -      -      -      -     	fsqrt
 | 
			
		||||
# CHECK-NEXT:  -      -     0.25   0.25    -      -      -     0.25   0.25    -     	fst	%st(0)
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -     0.33   0.33   1.00    -      -     0.33   	fsts	(%edx)
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -     0.33   0.33   1.00    -      -     0.33   	fstl	(%ecx)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -307,7 +307,7 @@ fyl2xp1
 | 
			
		|||
# CHECK-NEXT:  1      100   0.25                  * 	fscale
 | 
			
		||||
# CHECK-NEXT:  1      100   0.25                  * 	fsin
 | 
			
		||||
# CHECK-NEXT:  1      100   0.25                  * 	fsincos
 | 
			
		||||
# CHECK-NEXT:  1      15    1.00                  * 	fsqrt
 | 
			
		||||
# CHECK-NEXT:  1      21    7.00                  * 	fsqrt
 | 
			
		||||
# CHECK-NEXT:  1      1     0.25                  * 	fst	%st(0)
 | 
			
		||||
# CHECK-NEXT:  1      1     1.00           *      * 	fsts	(%edx)
 | 
			
		||||
# CHECK-NEXT:  1      1     1.00           *      * 	fstl	(%ecx)
 | 
			
		||||
| 
						 | 
				
			
			@ -369,7 +369,7 @@ fyl2xp1
 | 
			
		|||
 | 
			
		||||
# CHECK:      Resource pressure per iteration:
 | 
			
		||||
# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
 | 
			
		||||
# CHECK-NEXT:  -      -     121.50 53.50  49.00  49.00  27.00  144.50 70.50  9.00
 | 
			
		||||
# CHECK-NEXT:  -     7.00   121.50 53.50  49.00  49.00  27.00  144.50 70.50  9.00
 | 
			
		||||
 | 
			
		||||
# CHECK:      Resource pressure by instruction:
 | 
			
		||||
# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    	Instructions:
 | 
			
		||||
| 
						 | 
				
			
			@ -474,7 +474,7 @@ fyl2xp1
 | 
			
		|||
# CHECK-NEXT:  -      -     0.25   0.25    -      -      -     0.25   0.25    -     	fscale
 | 
			
		||||
# CHECK-NEXT:  -      -     0.25   0.25    -      -      -     0.25   0.25    -     	fsin
 | 
			
		||||
# CHECK-NEXT:  -      -     0.25   0.25    -      -      -     0.25   0.25    -     	fsincos
 | 
			
		||||
# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     	fsqrt
 | 
			
		||||
# CHECK-NEXT:  -     7.00   1.00    -      -      -      -      -      -      -     	fsqrt
 | 
			
		||||
# CHECK-NEXT:  -      -     0.25   0.25    -      -      -     0.25   0.25    -     	fst	%st(0)
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -     0.33   0.33   1.00    -      -     0.33   	fsts	(%edx)
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -     0.33   0.33   1.00    -      -     0.33   	fstl	(%ecx)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1638,18 +1638,18 @@ vzeroupper
 | 
			
		|||
# CHECK-NEXT:  1      8     0.50    *               	vshufps	$1, (%rax), %xmm1, %xmm2
 | 
			
		||||
# CHECK-NEXT:  1      1     0.50                    	vshufps	$1, %ymm0, %ymm1, %ymm2
 | 
			
		||||
# CHECK-NEXT:  1      8     0.50    *               	vshufps	$1, (%rax), %ymm1, %ymm2
 | 
			
		||||
# CHECK-NEXT:  1      20    1.00                    	vsqrtpd	%xmm0, %xmm2
 | 
			
		||||
# CHECK-NEXT:  1      27    1.00    *               	vsqrtpd	(%rax), %xmm2
 | 
			
		||||
# CHECK-NEXT:  1      20    20.00                   	vsqrtpd	%xmm0, %xmm2
 | 
			
		||||
# CHECK-NEXT:  1      27    20.00   *               	vsqrtpd	(%rax), %xmm2
 | 
			
		||||
# CHECK-NEXT:  1      40    40.00                   	vsqrtpd	%ymm0, %ymm2
 | 
			
		||||
# CHECK-NEXT:  2      47    47.00   *               	vsqrtpd	(%rax), %ymm2
 | 
			
		||||
# CHECK-NEXT:  1      20    1.00                    	vsqrtps	%xmm0, %xmm2
 | 
			
		||||
# CHECK-NEXT:  1      27    1.00    *               	vsqrtps	(%rax), %xmm2
 | 
			
		||||
# CHECK-NEXT:  2      47    40.00   *               	vsqrtpd	(%rax), %ymm2
 | 
			
		||||
# CHECK-NEXT:  1      20    20.00                   	vsqrtps	%xmm0, %xmm2
 | 
			
		||||
# CHECK-NEXT:  1      27    20.00   *               	vsqrtps	(%rax), %xmm2
 | 
			
		||||
# CHECK-NEXT:  1      28    28.00                   	vsqrtps	%ymm0, %ymm2
 | 
			
		||||
# CHECK-NEXT:  2      35    35.00   *               	vsqrtps	(%rax), %ymm2
 | 
			
		||||
# CHECK-NEXT:  1      20    1.00                    	vsqrtsd	%xmm0, %xmm1, %xmm2
 | 
			
		||||
# CHECK-NEXT:  1      27    1.00    *               	vsqrtsd	(%rax), %xmm1, %xmm2
 | 
			
		||||
# CHECK-NEXT:  1      20    1.00                    	vsqrtss	%xmm0, %xmm1, %xmm2
 | 
			
		||||
# CHECK-NEXT:  1      27    1.00    *               	vsqrtss	(%rax), %xmm1, %xmm2
 | 
			
		||||
# CHECK-NEXT:  2      35    28.00   *               	vsqrtps	(%rax), %ymm2
 | 
			
		||||
# CHECK-NEXT:  1      20    20.00                   	vsqrtsd	%xmm0, %xmm1, %xmm2
 | 
			
		||||
# CHECK-NEXT:  1      27    20.00   *               	vsqrtsd	(%rax), %xmm1, %xmm2
 | 
			
		||||
# CHECK-NEXT:  1      20    20.00                   	vsqrtss	%xmm0, %xmm1, %xmm2
 | 
			
		||||
# CHECK-NEXT:  1      27    20.00   *               	vsqrtss	(%rax), %xmm1, %xmm2
 | 
			
		||||
# CHECK-NEXT:  1      100    -      *      *      * 	vstmxcsr	(%rax)
 | 
			
		||||
# CHECK-NEXT:  1      3     1.00                    	vsubpd	%xmm0, %xmm1, %xmm2
 | 
			
		||||
# CHECK-NEXT:  1      10    1.00    *               	vsubpd	(%rax), %xmm1, %xmm2
 | 
			
		||||
| 
						 | 
				
			
			@ -1718,7 +1718,7 @@ vzeroupper
 | 
			
		|||
 | 
			
		||||
# CHECK:      Resource pressure per iteration:
 | 
			
		||||
# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]
 | 
			
		||||
# CHECK-NEXT: 168.00 168.00  -      -      -      -      -     194.25 142.75 168.25 366.75  -
 | 
			
		||||
# CHECK-NEXT: 168.00 168.00  -      -      -      -      -     194.25 142.75 168.25 504.75  -
 | 
			
		||||
 | 
			
		||||
# CHECK:      Resource pressure by instruction:
 | 
			
		||||
# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   	Instructions:
 | 
			
		||||
| 
						 | 
				
			
			@ -2342,18 +2342,18 @@ vzeroupper
 | 
			
		|||
# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -     	vshufps	$1, (%rax), %xmm1, %xmm2
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -     	vshufps	$1, %ymm0, %ymm1, %ymm2
 | 
			
		||||
# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -     	vshufps	$1, (%rax), %ymm1, %ymm2
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -     	vsqrtpd	%xmm0, %xmm2
 | 
			
		||||
# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -     1.00    -     	vsqrtpd	(%rax), %xmm2
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     20.00   -     	vsqrtpd	%xmm0, %xmm2
 | 
			
		||||
# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -     20.00   -     	vsqrtpd	(%rax), %xmm2
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     40.00   -     	vsqrtpd	%ymm0, %ymm2
 | 
			
		||||
# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -     47.00   -     	vsqrtpd	(%rax), %ymm2
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -     	vsqrtps	%xmm0, %xmm2
 | 
			
		||||
# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -     1.00    -     	vsqrtps	(%rax), %xmm2
 | 
			
		||||
# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -     40.00   -     	vsqrtpd	(%rax), %ymm2
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     20.00   -     	vsqrtps	%xmm0, %xmm2
 | 
			
		||||
# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -     20.00   -     	vsqrtps	(%rax), %xmm2
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     28.00   -     	vsqrtps	%ymm0, %ymm2
 | 
			
		||||
# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -     35.00   -     	vsqrtps	(%rax), %ymm2
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -     	vsqrtsd	%xmm0, %xmm1, %xmm2
 | 
			
		||||
# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -     1.00    -     	vsqrtsd	(%rax), %xmm1, %xmm2
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -     	vsqrtss	%xmm0, %xmm1, %xmm2
 | 
			
		||||
# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -     1.00    -     	vsqrtss	(%rax), %xmm1, %xmm2
 | 
			
		||||
# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -     28.00   -     	vsqrtps	(%rax), %ymm2
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     20.00   -     	vsqrtsd	%xmm0, %xmm1, %xmm2
 | 
			
		||||
# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -     20.00   -     	vsqrtsd	(%rax), %xmm1, %xmm2
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     20.00   -     	vsqrtss	%xmm0, %xmm1, %xmm2
 | 
			
		||||
# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -     20.00   -     	vsqrtss	(%rax), %xmm1, %xmm2
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     	vstmxcsr	(%rax)
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     	vsubpd	%xmm0, %xmm1, %xmm2
 | 
			
		||||
# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     1.00    -      -      -      -     	vsubpd	(%rax), %xmm1, %xmm2
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -301,10 +301,10 @@ xorps       (%rax), %xmm2
 | 
			
		|||
# CHECK-NEXT:  1      1     0.50    *      *      * 	sfence
 | 
			
		||||
# CHECK-NEXT:  1      1     0.50                    	shufps	$1, %xmm0, %xmm2
 | 
			
		||||
# CHECK-NEXT:  1      8     0.50    *               	shufps	$1, (%rax), %xmm2
 | 
			
		||||
# CHECK-NEXT:  1      20    1.00                    	sqrtps	%xmm0, %xmm2
 | 
			
		||||
# CHECK-NEXT:  1      27    1.00    *               	sqrtps	(%rax), %xmm2
 | 
			
		||||
# CHECK-NEXT:  1      20    1.00                    	sqrtss	%xmm0, %xmm2
 | 
			
		||||
# CHECK-NEXT:  1      27    1.00    *               	sqrtss	(%rax), %xmm2
 | 
			
		||||
# CHECK-NEXT:  1      20    20.00                   	sqrtps	%xmm0, %xmm2
 | 
			
		||||
# CHECK-NEXT:  1      27    20.00   *               	sqrtps	(%rax), %xmm2
 | 
			
		||||
# CHECK-NEXT:  1      20    20.00                   	sqrtss	%xmm0, %xmm2
 | 
			
		||||
# CHECK-NEXT:  1      27    20.00   *               	sqrtss	(%rax), %xmm2
 | 
			
		||||
# CHECK-NEXT:  1      100    -      *      *      * 	stmxcsr	(%rax)
 | 
			
		||||
# CHECK-NEXT:  1      3     1.00                    	subps	%xmm0, %xmm2
 | 
			
		||||
# CHECK-NEXT:  1      10    1.00    *               	subps	(%rax), %xmm2
 | 
			
		||||
| 
						 | 
				
			
			@ -335,7 +335,7 @@ xorps       (%rax), %xmm2
 | 
			
		|||
 | 
			
		||||
# CHECK:      Resource pressure per iteration:
 | 
			
		||||
# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]
 | 
			
		||||
# CHECK-NEXT: 32.50  32.50   -      -      -      -      -     41.00  21.50  22.00  32.50   -
 | 
			
		||||
# CHECK-NEXT: 32.50  32.50   -      -      -      -      -     41.00  21.50  22.00  108.50  -
 | 
			
		||||
 | 
			
		||||
# CHECK:      Resource pressure by instruction:
 | 
			
		||||
# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   	Instructions:
 | 
			
		||||
| 
						 | 
				
			
			@ -446,10 +446,10 @@ xorps       (%rax), %xmm2
 | 
			
		|||
# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     	sfence
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -     	shufps	$1, %xmm0, %xmm2
 | 
			
		||||
# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -     	shufps	$1, (%rax), %xmm2
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -     	sqrtps	%xmm0, %xmm2
 | 
			
		||||
# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -     1.00    -     	sqrtps	(%rax), %xmm2
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -     	sqrtss	%xmm0, %xmm2
 | 
			
		||||
# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -     1.00    -     	sqrtss	(%rax), %xmm2
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     20.00   -     	sqrtps	%xmm0, %xmm2
 | 
			
		||||
# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -     20.00   -     	sqrtps	(%rax), %xmm2
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     20.00   -     	sqrtss	%xmm0, %xmm2
 | 
			
		||||
# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -     20.00   -     	sqrtss	(%rax), %xmm2
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     	stmxcsr	(%rax)
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     	subps	%xmm0, %xmm2
 | 
			
		||||
# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     1.00    -      -      -      -     	subps	(%rax), %xmm2
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -650,10 +650,10 @@ xorpd       (%rax), %xmm2
 | 
			
		|||
# CHECK-NEXT:  1      8     0.50    *               	pxor	(%rax), %xmm2
 | 
			
		||||
# CHECK-NEXT:  1      1     0.50                    	shufpd	$1, %xmm0, %xmm2
 | 
			
		||||
# CHECK-NEXT:  1      8     0.50    *               	shufpd	$1, (%rax), %xmm2
 | 
			
		||||
# CHECK-NEXT:  1      20    1.00                    	sqrtpd	%xmm0, %xmm2
 | 
			
		||||
# CHECK-NEXT:  1      27    1.00    *               	sqrtpd	(%rax), %xmm2
 | 
			
		||||
# CHECK-NEXT:  1      20    1.00                    	sqrtsd	%xmm0, %xmm2
 | 
			
		||||
# CHECK-NEXT:  1      27    1.00    *               	sqrtsd	(%rax), %xmm2
 | 
			
		||||
# CHECK-NEXT:  1      20    20.00                   	sqrtpd	%xmm0, %xmm2
 | 
			
		||||
# CHECK-NEXT:  1      27    20.00   *               	sqrtpd	(%rax), %xmm2
 | 
			
		||||
# CHECK-NEXT:  1      20    20.00                   	sqrtsd	%xmm0, %xmm2
 | 
			
		||||
# CHECK-NEXT:  1      27    20.00   *               	sqrtsd	(%rax), %xmm2
 | 
			
		||||
# CHECK-NEXT:  1      3     1.00                    	subpd	%xmm0, %xmm2
 | 
			
		||||
# CHECK-NEXT:  1      10    1.00    *               	subpd	(%rax), %xmm2
 | 
			
		||||
# CHECK-NEXT:  1      3     1.00                    	subsd	%xmm0, %xmm2
 | 
			
		||||
| 
						 | 
				
			
			@ -683,7 +683,7 @@ xorpd       (%rax), %xmm2
 | 
			
		|||
 | 
			
		||||
# CHECK:      Resource pressure per iteration:
 | 
			
		||||
# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]
 | 
			
		||||
# CHECK-NEXT: 65.00  65.00  0.50   0.50   0.50   0.50    -     74.08  39.58  69.25  77.08   -
 | 
			
		||||
# CHECK-NEXT: 65.00  65.00  0.50   0.50   0.50   0.50    -     74.08  39.58  69.25  153.08  -
 | 
			
		||||
 | 
			
		||||
# CHECK:      Resource pressure by instruction:
 | 
			
		||||
# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   	Instructions:
 | 
			
		||||
| 
						 | 
				
			
			@ -935,10 +935,10 @@ xorpd       (%rax), %xmm2
 | 
			
		|||
# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.25   0.25   0.25   0.25    -     	pxor	(%rax), %xmm2
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50    -      -     	shufpd	$1, %xmm0, %xmm2
 | 
			
		||||
# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     0.50   0.50    -      -     	shufpd	$1, (%rax), %xmm2
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -     	sqrtpd	%xmm0, %xmm2
 | 
			
		||||
# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -     1.00    -     	sqrtpd	(%rax), %xmm2
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -     	sqrtsd	%xmm0, %xmm2
 | 
			
		||||
# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -     1.00    -     	sqrtsd	(%rax), %xmm2
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     20.00   -     	sqrtpd	%xmm0, %xmm2
 | 
			
		||||
# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -     20.00   -     	sqrtpd	(%rax), %xmm2
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     20.00   -     	sqrtsd	%xmm0, %xmm2
 | 
			
		||||
# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -     20.00   -     	sqrtsd	(%rax), %xmm2
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     	subpd	%xmm0, %xmm2
 | 
			
		||||
# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     1.00    -      -      -      -     	subpd	(%rax), %xmm2
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     	subsd	%xmm0, %xmm2
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -307,7 +307,7 @@ fyl2xp1
 | 
			
		|||
# CHECK-NEXT:  1      100    -                    * 	fscale
 | 
			
		||||
# CHECK-NEXT:  1      100    -                    * 	fsin
 | 
			
		||||
# CHECK-NEXT:  1      100    -                    * 	fsincos
 | 
			
		||||
# CHECK-NEXT:  1      20    1.00                  * 	fsqrt
 | 
			
		||||
# CHECK-NEXT:  1      20    20.00                 * 	fsqrt
 | 
			
		||||
# CHECK-NEXT:  2      5     0.50                  * 	fst	%st(0)
 | 
			
		||||
# CHECK-NEXT:  1      1     0.50           *      * 	fsts	(%edx)
 | 
			
		||||
# CHECK-NEXT:  1      1     0.50           *      * 	fstl	(%ecx)
 | 
			
		||||
| 
						 | 
				
			
			@ -371,7 +371,7 @@ fyl2xp1
 | 
			
		|||
 | 
			
		||||
# CHECK:      Resource pressure per iteration:
 | 
			
		||||
# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]
 | 
			
		||||
# CHECK-NEXT: 32.50  32.50   -      -      -      -      -     58.50  2.00   8.00   45.50   -
 | 
			
		||||
# CHECK-NEXT: 32.50  32.50   -      -      -      -      -     58.50  2.00   8.00   64.50   -
 | 
			
		||||
 | 
			
		||||
# CHECK:      Resource pressure by instruction:
 | 
			
		||||
# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   	Instructions:
 | 
			
		||||
| 
						 | 
				
			
			@ -476,7 +476,7 @@ fyl2xp1
 | 
			
		|||
# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     	fscale
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     	fsin
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     	fsincos
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -     	fsqrt
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     20.00   -     	fsqrt
 | 
			
		||||
# CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -     	fst	%st(0)
 | 
			
		||||
# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     	fsts	(%edx)
 | 
			
		||||
# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     	fstl	(%ecx)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue