forked from OSchip/llvm-project
				
			1. Use pxor instead of xoraps / xorapd to clear FR32 / FR64 registers. This
proves to be worth 20% on Ptrdist/ks. Might be related to dependency breaking support. 2. Added FsMOVAPSrr and FsMOVAPDrr as aliases to MOVAPSrr and MOVAPDrr. These are used for FR32 / FR64 reg-to-reg copies. 3. Tell reg-allocator to generate MOVSSrm / MOVSDrm and MOVSSmr / MOVSDmr to spill / restore FsMOVAPSrr and FsMOVAPDrr. llvm-svn: 26241
This commit is contained in:
		
							parent
							
								
									3f99628939
								
							
						
					
					
						commit
						24c461b51e
					
				| 
						 | 
					@ -29,6 +29,7 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI,
 | 
				
			||||||
  MachineOpCode oc = MI.getOpcode();
 | 
					  MachineOpCode oc = MI.getOpcode();
 | 
				
			||||||
  if (oc == X86::MOV8rr || oc == X86::MOV16rr || oc == X86::MOV32rr ||
 | 
					  if (oc == X86::MOV8rr || oc == X86::MOV16rr || oc == X86::MOV32rr ||
 | 
				
			||||||
      oc == X86::FpMOV  || oc == X86::MOVSSrr || oc == X86::MOVSDrr ||
 | 
					      oc == X86::FpMOV  || oc == X86::MOVSSrr || oc == X86::MOVSDrr ||
 | 
				
			||||||
 | 
					      oc == X86::FsMOVAPSrr || oc == X86::FsMOVAPDrr ||
 | 
				
			||||||
      oc == X86::MOVAPSrr || oc == X86::MOVAPDrr) {
 | 
					      oc == X86::MOVAPSrr || oc == X86::MOVAPDrr) {
 | 
				
			||||||
      assert(MI.getNumOperands() == 2 &&
 | 
					      assert(MI.getNumOperands() == 2 &&
 | 
				
			||||||
             MI.getOperand(0).isRegister() &&
 | 
					             MI.getOperand(0).isRegister() &&
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2487,13 +2487,13 @@ def UCOMISDrm: I<0x2E, MRMSrcMem, (ops FR64:$src1, f64mem:$src2),
 | 
				
			||||||
                 [(X86cmp FR64:$src1, (loadf64 addr:$src2))]>,
 | 
					                 [(X86cmp FR64:$src1, (loadf64 addr:$src2))]>,
 | 
				
			||||||
               Requires<[HasSSE2]>, TB, OpSize;
 | 
					               Requires<[HasSSE2]>, TB, OpSize;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Pseudo-instructions that map fld0 to xorps/xorpd for sse.
 | 
					// Pseudo-instructions that map fld0 to pxor for sse.
 | 
				
			||||||
// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
 | 
					// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
 | 
				
			||||||
def FLD0SS : I<0x57, MRMInitReg, (ops FR32:$dst),
 | 
					def FLD0SS : I<0xEF, MRMInitReg, (ops FR32:$dst),
 | 
				
			||||||
               "xorps $dst, $dst", [(set FR32:$dst, fp32imm0)]>,
 | 
					               "pxor $dst, $dst", [(set FR32:$dst, fp32imm0)]>,
 | 
				
			||||||
             Requires<[HasSSE1]>, TB;
 | 
					             Requires<[HasSSE1]>, TB;
 | 
				
			||||||
def FLD0SD : I<0x57, MRMInitReg, (ops FR64:$dst),
 | 
					def FLD0SD : I<0xEF, MRMInitReg, (ops FR64:$dst),
 | 
				
			||||||
               "xorpd $dst, $dst", [(set FR64:$dst, fp64imm0)]>,
 | 
					               "pxor $dst, $dst", [(set FR64:$dst, fp64imm0)]>,
 | 
				
			||||||
             Requires<[HasSSE2]>, TB, OpSize;
 | 
					             Requires<[HasSSE2]>, TB, OpSize;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
let isTwoAddress = 1 in {
 | 
					let isTwoAddress = 1 in {
 | 
				
			||||||
| 
						 | 
					@ -3033,13 +3033,22 @@ def MOVAPDmr : I<0x29, MRMDestMem, (ops f128mem:$dst, V2F8:$src),
 | 
				
			||||||
                "movapd {$src, $dst|$dst, $src}",[]>,
 | 
					                "movapd {$src, $dst|$dst, $src}",[]>,
 | 
				
			||||||
               Requires<[HasSSE2]>, TB, OpSize;
 | 
					               Requires<[HasSSE2]>, TB, OpSize;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Pseudo-instructions to load FR32 / FR64 from f128mem using movaps / movapd.
 | 
					// Alias instructions to do FR32 / FR64 reg-to-reg copy using movaps / movapd.
 | 
				
			||||||
// Upper bits are disregarded.
 | 
					// Upper bits are disregarded.
 | 
				
			||||||
def MOVSAPSrm : I<0x28, MRMSrcMem, (ops FR32:$dst, f128mem:$src),
 | 
					def FsMOVAPSrr : I<0x28, MRMSrcReg, (ops V4F4:$dst, V4F4:$src),
 | 
				
			||||||
 | 
					                   "movaps {$src, $dst|$dst, $src}", []>,
 | 
				
			||||||
 | 
					                 Requires<[HasSSE1]>, TB;
 | 
				
			||||||
 | 
					def FsMOVAPDrr : I<0x28, MRMSrcReg, (ops V2F8:$dst, V2F8:$src),
 | 
				
			||||||
 | 
					                   "movapd {$src, $dst|$dst, $src}", []>,
 | 
				
			||||||
 | 
					                 Requires<[HasSSE2]>, TB, OpSize;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Alias instructions to load FR32 / FR64 from f128mem using movaps / movapd.
 | 
				
			||||||
 | 
					// Upper bits are disregarded.
 | 
				
			||||||
 | 
					def FsMOVAPSrm : I<0x28, MRMSrcMem, (ops FR32:$dst, f128mem:$src),
 | 
				
			||||||
                   "movaps {$src, $dst|$dst, $src}",
 | 
					                   "movaps {$src, $dst|$dst, $src}",
 | 
				
			||||||
                   [(set FR32:$dst, (X86loadpf32 addr:$src))]>,
 | 
					                   [(set FR32:$dst, (X86loadpf32 addr:$src))]>,
 | 
				
			||||||
                 Requires<[HasSSE1]>, TB;
 | 
					                 Requires<[HasSSE1]>, TB;
 | 
				
			||||||
def MOVSAPDrm : I<0x28, MRMSrcMem, (ops FR64:$dst, f128mem:$src),
 | 
					def FsMOVAPDrm : I<0x28, MRMSrcMem, (ops FR64:$dst, f128mem:$src),
 | 
				
			||||||
                  "movapd {$src, $dst|$dst, $src}",
 | 
					                  "movapd {$src, $dst|$dst, $src}",
 | 
				
			||||||
                  [(set FR64:$dst, (X86loadpf64 addr:$src))]>,
 | 
					                  [(set FR64:$dst, (X86loadpf64 addr:$src))]>,
 | 
				
			||||||
                Requires<[HasSSE2]>, TB, OpSize;
 | 
					                Requires<[HasSSE2]>, TB, OpSize;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -114,9 +114,9 @@ void X86RegisterInfo::copyRegToReg(MachineBasicBlock &MBB,
 | 
				
			||||||
  } else if (RC == &X86::RFPRegClass || RC == &X86::RSTRegClass) {
 | 
					  } else if (RC == &X86::RFPRegClass || RC == &X86::RSTRegClass) {
 | 
				
			||||||
    Opc = X86::FpMOV;
 | 
					    Opc = X86::FpMOV;
 | 
				
			||||||
  } else if (RC == &X86::FR32RegClass || RC == &X86::V4F4RegClass) {
 | 
					  } else if (RC == &X86::FR32RegClass || RC == &X86::V4F4RegClass) {
 | 
				
			||||||
    Opc = X86::MOVAPSrr;
 | 
					    Opc = X86::FsMOVAPSrr;
 | 
				
			||||||
  } else if (RC == &X86::FR64RegClass || RC == &X86::V2F8RegClass) {
 | 
					  } else if (RC == &X86::FR64RegClass || RC == &X86::V2F8RegClass) {
 | 
				
			||||||
    Opc = X86::MOVAPDrr;
 | 
					    Opc = X86::FsMOVAPDrr;
 | 
				
			||||||
  } else {
 | 
					  } else {
 | 
				
			||||||
    assert(0 && "Unknown regclass");
 | 
					    assert(0 && "Unknown regclass");
 | 
				
			||||||
    abort();
 | 
					    abort();
 | 
				
			||||||
| 
						 | 
					@ -313,6 +313,9 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr* MI,
 | 
				
			||||||
    case X86::CMP8ri:    return MakeMIInst(X86::CMP8mi , FrameIndex, MI);
 | 
					    case X86::CMP8ri:    return MakeMIInst(X86::CMP8mi , FrameIndex, MI);
 | 
				
			||||||
    case X86::CMP16ri:   return MakeMIInst(X86::CMP16mi, FrameIndex, MI);
 | 
					    case X86::CMP16ri:   return MakeMIInst(X86::CMP16mi, FrameIndex, MI);
 | 
				
			||||||
    case X86::CMP32ri:   return MakeMIInst(X86::CMP32mi, FrameIndex, MI);
 | 
					    case X86::CMP32ri:   return MakeMIInst(X86::CMP32mi, FrameIndex, MI);
 | 
				
			||||||
 | 
					    // Alias scalar SSE instructions
 | 
				
			||||||
 | 
					    case X86::FsMOVAPSrr: return MakeMRInst(X86::MOVSSmr, FrameIndex, MI);
 | 
				
			||||||
 | 
					    case X86::FsMOVAPDrr: return MakeMRInst(X86::MOVSDmr, FrameIndex, MI);
 | 
				
			||||||
    // Scalar SSE instructions
 | 
					    // Scalar SSE instructions
 | 
				
			||||||
    case X86::MOVSSrr:   return MakeMRInst(X86::MOVSSmr, FrameIndex, MI);
 | 
					    case X86::MOVSSrr:   return MakeMRInst(X86::MOVSSmr, FrameIndex, MI);
 | 
				
			||||||
    case X86::MOVSDrr:   return MakeMRInst(X86::MOVSDmr, FrameIndex, MI);
 | 
					    case X86::MOVSDrr:   return MakeMRInst(X86::MOVSDmr, FrameIndex, MI);
 | 
				
			||||||
| 
						 | 
					@ -393,6 +396,9 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr* MI,
 | 
				
			||||||
    case X86::MOVZX16rr8:return MakeRMInst(X86::MOVZX16rm8 , FrameIndex, MI);
 | 
					    case X86::MOVZX16rr8:return MakeRMInst(X86::MOVZX16rm8 , FrameIndex, MI);
 | 
				
			||||||
    case X86::MOVZX32rr8:return MakeRMInst(X86::MOVZX32rm8, FrameIndex, MI);
 | 
					    case X86::MOVZX32rr8:return MakeRMInst(X86::MOVZX32rm8, FrameIndex, MI);
 | 
				
			||||||
    case X86::MOVZX32rr16:return MakeRMInst(X86::MOVZX32rm16, FrameIndex, MI);
 | 
					    case X86::MOVZX32rr16:return MakeRMInst(X86::MOVZX32rm16, FrameIndex, MI);
 | 
				
			||||||
 | 
					    // Alias scalar SSE instructions
 | 
				
			||||||
 | 
					    case X86::FsMOVAPSrr:return MakeRMInst(X86::MOVSSrm, FrameIndex, MI);
 | 
				
			||||||
 | 
					    case X86::FsMOVAPDrr:return MakeRMInst(X86::MOVSDrm, FrameIndex, MI);
 | 
				
			||||||
    // Scalar SSE instructions
 | 
					    // Scalar SSE instructions
 | 
				
			||||||
    case X86::MOVSSrr:   return MakeRMInst(X86::MOVSSrm, FrameIndex, MI);
 | 
					    case X86::MOVSSrr:   return MakeRMInst(X86::MOVSSrm, FrameIndex, MI);
 | 
				
			||||||
    case X86::MOVSDrr:   return MakeRMInst(X86::MOVSDrm, FrameIndex, MI);
 | 
					    case X86::MOVSDrr:   return MakeRMInst(X86::MOVSDrm, FrameIndex, MI);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue