703 lines
		
	
	
		
			25 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			703 lines
		
	
	
		
			25 KiB
		
	
	
	
		
			C++
		
	
	
	
| //===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===//
 | |
| //
 | |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 | |
| // See https://llvm.org/LICENSE.txt for license information.
 | |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 | |
| //
 | |
| //===----------------------------------------------------------------------===//
 | |
| //
 | |
| // This file defines the pass that finds instructions that can be
 | |
| // re-written as LEA instructions in order to reduce pipeline delays.
 | |
| // It replaces LEAs with ADD/INC/DEC when that is better for size/speed.
 | |
| //
 | |
| //===----------------------------------------------------------------------===//
 | |
| 
 | |
| #include "X86.h"
 | |
| #include "X86InstrInfo.h"
 | |
| #include "X86Subtarget.h"
 | |
| #include "llvm/ADT/Statistic.h"
 | |
| #include "llvm/Analysis/ProfileSummaryInfo.h"
 | |
| #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
 | |
| #include "llvm/CodeGen/MachineFunctionPass.h"
 | |
| #include "llvm/CodeGen/MachineInstrBuilder.h"
 | |
| #include "llvm/CodeGen/MachineSizeOpts.h"
 | |
| #include "llvm/CodeGen/Passes.h"
 | |
| #include "llvm/CodeGen/TargetSchedule.h"
 | |
| #include "llvm/Support/Debug.h"
 | |
| #include "llvm/Support/raw_ostream.h"
 | |
| using namespace llvm;
 | |
| 
 | |
| #define FIXUPLEA_DESC "X86 LEA Fixup"
 | |
| #define FIXUPLEA_NAME "x86-fixup-LEAs"
 | |
| 
 | |
| #define DEBUG_TYPE FIXUPLEA_NAME
 | |
| 
 | |
| STATISTIC(NumLEAs, "Number of LEA instructions created");
 | |
| 
 | |
| namespace {
 | |
| class FixupLEAPass : public MachineFunctionPass {
 | |
|   enum RegUsageState { RU_NotUsed, RU_Write, RU_Read };
 | |
| 
 | |
|   /// Given a machine register, look for the instruction
 | |
|   /// which writes it in the current basic block. If found,
 | |
|   /// try to replace it with an equivalent LEA instruction.
 | |
|   /// If replacement succeeds, then also process the newly created
 | |
|   /// instruction.
 | |
|   void seekLEAFixup(MachineOperand &p, MachineBasicBlock::iterator &I,
 | |
|                     MachineBasicBlock &MBB);
 | |
| 
 | |
|   /// Given a memory access or LEA instruction
 | |
|   /// whose address mode uses a base and/or index register, look for
 | |
|   /// an opportunity to replace the instruction which sets the base or index
 | |
|   /// register with an equivalent LEA instruction.
 | |
|   void processInstruction(MachineBasicBlock::iterator &I,
 | |
|                           MachineBasicBlock &MBB);
 | |
| 
 | |
|   /// Given a LEA instruction which is unprofitable
 | |
|   /// on SlowLEA targets try to replace it with an equivalent ADD instruction.
 | |
|   void processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
 | |
|                                     MachineBasicBlock &MBB);
 | |
| 
 | |
|   /// Given a LEA instruction which is unprofitable
 | |
|   /// on SNB+ try to replace it with other instructions.
 | |
|   /// According to Intel's Optimization Reference Manual:
 | |
|   /// " For LEA instructions with three source operands and some specific
 | |
|   ///   situations, instruction latency has increased to 3 cycles, and must
 | |
|   ///   dispatch via port 1:
 | |
|   /// - LEA that has all three source operands: base, index, and offset
 | |
|   /// - LEA that uses base and index registers where the base is EBP, RBP,
 | |
|   ///   or R13
 | |
|   /// - LEA that uses RIP relative addressing mode
 | |
|   /// - LEA that uses 16-bit addressing mode "
 | |
|   /// This function currently handles the first 2 cases only.
 | |
|   void processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
 | |
|                                  MachineBasicBlock &MBB, bool OptIncDec);
 | |
| 
 | |
|   /// Look for LEAs that are really two address LEAs that we might be able to
 | |
|   /// turn into regular ADD instructions.
 | |
|   bool optTwoAddrLEA(MachineBasicBlock::iterator &I,
 | |
|                      MachineBasicBlock &MBB, bool OptIncDec,
 | |
|                      bool UseLEAForSP) const;
 | |
| 
 | |
|   /// Determine if an instruction references a machine register
 | |
|   /// and, if so, whether it reads or writes the register.
 | |
|   RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I);
 | |
| 
 | |
|   /// Step backwards through a basic block, looking
 | |
|   /// for an instruction which writes a register within
 | |
|   /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles.
 | |
|   MachineBasicBlock::iterator searchBackwards(MachineOperand &p,
 | |
|                                               MachineBasicBlock::iterator &I,
 | |
|                                               MachineBasicBlock &MBB);
 | |
| 
 | |
|   /// if an instruction can be converted to an
 | |
|   /// equivalent LEA, insert the new instruction into the basic block
 | |
|   /// and return a pointer to it. Otherwise, return zero.
 | |
|   MachineInstr *postRAConvertToLEA(MachineBasicBlock &MBB,
 | |
|                                    MachineBasicBlock::iterator &MBBI) const;
 | |
| 
 | |
| public:
 | |
|   static char ID;
 | |
| 
 | |
|   StringRef getPassName() const override { return FIXUPLEA_DESC; }
 | |
| 
 | |
|   FixupLEAPass() : MachineFunctionPass(ID) { }
 | |
| 
 | |
|   /// Loop over all of the basic blocks,
 | |
|   /// replacing instructions by equivalent LEA instructions
 | |
|   /// if needed and when possible.
 | |
|   bool runOnMachineFunction(MachineFunction &MF) override;
 | |
| 
 | |
|   // This pass runs after regalloc and doesn't support VReg operands.
 | |
|   MachineFunctionProperties getRequiredProperties() const override {
 | |
|     return MachineFunctionProperties().set(
 | |
|         MachineFunctionProperties::Property::NoVRegs);
 | |
|   }
 | |
| 
 | |
|   void getAnalysisUsage(AnalysisUsage &AU) const override {
 | |
|     AU.addRequired<ProfileSummaryInfoWrapperPass>();
 | |
|     AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
 | |
|     MachineFunctionPass::getAnalysisUsage(AU);
 | |
|   }
 | |
| 
 | |
| private:
 | |
|   TargetSchedModel TSM;
 | |
|   const X86InstrInfo *TII = nullptr;
 | |
|   const X86RegisterInfo *TRI = nullptr;
 | |
| };
 | |
| }
 | |
| 
 | |
| char FixupLEAPass::ID = 0;
 | |
| 
 | |
| INITIALIZE_PASS(FixupLEAPass, FIXUPLEA_NAME, FIXUPLEA_DESC, false, false)
 | |
| 
 | |
| MachineInstr *
 | |
| FixupLEAPass::postRAConvertToLEA(MachineBasicBlock &MBB,
 | |
|                                  MachineBasicBlock::iterator &MBBI) const {
 | |
|   MachineInstr &MI = *MBBI;
 | |
|   switch (MI.getOpcode()) {
 | |
|   case X86::MOV32rr:
 | |
|   case X86::MOV64rr: {
 | |
|     const MachineOperand &Src = MI.getOperand(1);
 | |
|     const MachineOperand &Dest = MI.getOperand(0);
 | |
|     MachineInstr *NewMI =
 | |
|         BuildMI(MBB, MBBI, MI.getDebugLoc(),
 | |
|                 TII->get(MI.getOpcode() == X86::MOV32rr ? X86::LEA32r
 | |
|                                                         : X86::LEA64r))
 | |
|             .add(Dest)
 | |
|             .add(Src)
 | |
|             .addImm(1)
 | |
|             .addReg(0)
 | |
|             .addImm(0)
 | |
|             .addReg(0);
 | |
|     return NewMI;
 | |
|   }
 | |
|   }
 | |
| 
 | |
|   if (!MI.isConvertibleTo3Addr())
 | |
|     return nullptr;
 | |
| 
 | |
|   switch (MI.getOpcode()) {
 | |
|   default:
 | |
|     // Only convert instructions that we've verified are safe.
 | |
|     return nullptr;
 | |
|   case X86::ADD64ri32:
 | |
|   case X86::ADD64ri8:
 | |
|   case X86::ADD64ri32_DB:
 | |
|   case X86::ADD64ri8_DB:
 | |
|   case X86::ADD32ri:
 | |
|   case X86::ADD32ri8:
 | |
|   case X86::ADD32ri_DB:
 | |
|   case X86::ADD32ri8_DB:
 | |
|     if (!MI.getOperand(2).isImm()) {
 | |
|       // convertToThreeAddress will call getImm()
 | |
|       // which requires isImm() to be true
 | |
|       return nullptr;
 | |
|     }
 | |
|     break;
 | |
|   case X86::SHL64ri:
 | |
|   case X86::SHL32ri:
 | |
|   case X86::INC64r:
 | |
|   case X86::INC32r:
 | |
|   case X86::DEC64r:
 | |
|   case X86::DEC32r:
 | |
|   case X86::ADD64rr:
 | |
|   case X86::ADD64rr_DB:
 | |
|   case X86::ADD32rr:
 | |
|   case X86::ADD32rr_DB:
 | |
|     // These instructions are all fine to convert.
 | |
|     break;
 | |
|   }
 | |
|   MachineFunction::iterator MFI = MBB.getIterator();
 | |
|   return TII->convertToThreeAddress(MFI, MI, nullptr);
 | |
| }
 | |
| 
 | |
| FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); }
 | |
| 
 | |
| static bool isLEA(unsigned Opcode) {
 | |
|   return Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
 | |
|          Opcode == X86::LEA64_32r;
 | |
| }
 | |
| 
 | |
| bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) {
 | |
|   if (skipFunction(MF.getFunction()))
 | |
|     return false;
 | |
| 
 | |
|   const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
 | |
|   bool IsSlowLEA = ST.slowLEA();
 | |
|   bool IsSlow3OpsLEA = ST.slow3OpsLEA();
 | |
|   bool LEAUsesAG = ST.LEAusesAG();
 | |
| 
 | |
|   bool OptIncDec = !ST.slowIncDec() || MF.getFunction().hasOptSize();
 | |
|   bool UseLEAForSP = ST.useLeaForSP();
 | |
| 
 | |
|   TSM.init(&ST);
 | |
|   TII = ST.getInstrInfo();
 | |
|   TRI = ST.getRegisterInfo();
 | |
|   auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
 | |
|   auto *MBFI = (PSI && PSI->hasProfileSummary())
 | |
|                    ? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI()
 | |
|                    : nullptr;
 | |
| 
 | |
|   LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";);
 | |
|   for (MachineBasicBlock &MBB : MF) {
 | |
|     // First pass. Try to remove or optimize existing LEAs.
 | |
|     bool OptIncDecPerBB =
 | |
|         OptIncDec || llvm::shouldOptimizeForSize(&MBB, PSI, MBFI);
 | |
|     for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
 | |
|       if (!isLEA(I->getOpcode()))
 | |
|         continue;
 | |
| 
 | |
|       if (optTwoAddrLEA(I, MBB, OptIncDecPerBB, UseLEAForSP))
 | |
|         continue;
 | |
| 
 | |
|       if (IsSlowLEA)
 | |
|         processInstructionForSlowLEA(I, MBB);
 | |
|       else if (IsSlow3OpsLEA)
 | |
|         processInstrForSlow3OpLEA(I, MBB, OptIncDecPerBB);
 | |
|     }
 | |
| 
 | |
|     // Second pass for creating LEAs. This may reverse some of the
 | |
|     // transformations above.
 | |
|     if (LEAUsesAG) {
 | |
|       for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
 | |
|         processInstruction(I, MBB);
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   LLVM_DEBUG(dbgs() << "End X86FixupLEAs\n";);
 | |
| 
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| FixupLEAPass::RegUsageState
 | |
| FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) {
 | |
|   RegUsageState RegUsage = RU_NotUsed;
 | |
|   MachineInstr &MI = *I;
 | |
| 
 | |
|   for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
 | |
|     MachineOperand &opnd = MI.getOperand(i);
 | |
|     if (opnd.isReg() && opnd.getReg() == p.getReg()) {
 | |
|       if (opnd.isDef())
 | |
|         return RU_Write;
 | |
|       RegUsage = RU_Read;
 | |
|     }
 | |
|   }
 | |
|   return RegUsage;
 | |
| }
 | |
| 
 | |
| /// getPreviousInstr - Given a reference to an instruction in a basic
 | |
| /// block, return a reference to the previous instruction in the block,
 | |
| /// wrapping around to the last instruction of the block if the block
 | |
| /// branches to itself.
 | |
| static inline bool getPreviousInstr(MachineBasicBlock::iterator &I,
 | |
|                                     MachineBasicBlock &MBB) {
 | |
|   if (I == MBB.begin()) {
 | |
|     if (MBB.isPredecessor(&MBB)) {
 | |
|       I = --MBB.end();
 | |
|       return true;
 | |
|     } else
 | |
|       return false;
 | |
|   }
 | |
|   --I;
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| MachineBasicBlock::iterator
 | |
| FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I,
 | |
|                               MachineBasicBlock &MBB) {
 | |
|   int InstrDistance = 1;
 | |
|   MachineBasicBlock::iterator CurInst;
 | |
|   static const int INSTR_DISTANCE_THRESHOLD = 5;
 | |
| 
 | |
|   CurInst = I;
 | |
|   bool Found;
 | |
|   Found = getPreviousInstr(CurInst, MBB);
 | |
|   while (Found && I != CurInst) {
 | |
|     if (CurInst->isCall() || CurInst->isInlineAsm())
 | |
|       break;
 | |
|     if (InstrDistance > INSTR_DISTANCE_THRESHOLD)
 | |
|       break; // too far back to make a difference
 | |
|     if (usesRegister(p, CurInst) == RU_Write) {
 | |
|       return CurInst;
 | |
|     }
 | |
|     InstrDistance += TSM.computeInstrLatency(&*CurInst);
 | |
|     Found = getPreviousInstr(CurInst, MBB);
 | |
|   }
 | |
|   return MachineBasicBlock::iterator();
 | |
| }
 | |
| 
 | |
| static inline bool isInefficientLEAReg(unsigned Reg) {
 | |
|   return Reg == X86::EBP || Reg == X86::RBP ||
 | |
|          Reg == X86::R13D || Reg == X86::R13;
 | |
| }
 | |
| 
 | |
| /// Returns true if this LEA uses base an index registers, and the base register
 | |
| /// is known to be inefficient for the subtarget.
 | |
| // TODO: use a variant scheduling class to model the latency profile
 | |
| // of LEA instructions, and implement this logic as a scheduling predicate.
 | |
| static inline bool hasInefficientLEABaseReg(const MachineOperand &Base,
 | |
|                                             const MachineOperand &Index) {
 | |
|   return Base.isReg() && isInefficientLEAReg(Base.getReg()) && Index.isReg() &&
 | |
|          Index.getReg() != X86::NoRegister;
 | |
| }
 | |
| 
 | |
| static inline bool hasLEAOffset(const MachineOperand &Offset) {
 | |
|   return (Offset.isImm() && Offset.getImm() != 0) || Offset.isGlobal();
 | |
| }
 | |
| 
 | |
| static inline unsigned getADDrrFromLEA(unsigned LEAOpcode) {
 | |
|   switch (LEAOpcode) {
 | |
|   default:
 | |
|     llvm_unreachable("Unexpected LEA instruction");
 | |
|   case X86::LEA32r:
 | |
|   case X86::LEA64_32r:
 | |
|     return X86::ADD32rr;
 | |
|   case X86::LEA64r:
 | |
|     return X86::ADD64rr;
 | |
|   }
 | |
| }
 | |
| 
 | |
| static inline unsigned getADDriFromLEA(unsigned LEAOpcode,
 | |
|                                        const MachineOperand &Offset) {
 | |
|   bool IsInt8 = Offset.isImm() && isInt<8>(Offset.getImm());
 | |
|   switch (LEAOpcode) {
 | |
|   default:
 | |
|     llvm_unreachable("Unexpected LEA instruction");
 | |
|   case X86::LEA32r:
 | |
|   case X86::LEA64_32r:
 | |
|     return IsInt8 ? X86::ADD32ri8 : X86::ADD32ri;
 | |
|   case X86::LEA64r:
 | |
|     return IsInt8 ? X86::ADD64ri8 : X86::ADD64ri32;
 | |
|   }
 | |
| }
 | |
| 
 | |
| static inline unsigned getINCDECFromLEA(unsigned LEAOpcode, bool IsINC) {
 | |
|   switch (LEAOpcode) {
 | |
|   default:
 | |
|     llvm_unreachable("Unexpected LEA instruction");
 | |
|   case X86::LEA32r:
 | |
|   case X86::LEA64_32r:
 | |
|     return IsINC ? X86::INC32r : X86::DEC32r;
 | |
|   case X86::LEA64r:
 | |
|     return IsINC ? X86::INC64r : X86::DEC64r;
 | |
|   }
 | |
| }
 | |
| 
 | |
| bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I,
 | |
|                                  MachineBasicBlock &MBB, bool OptIncDec,
 | |
|                                  bool UseLEAForSP) const {
 | |
|   MachineInstr &MI = *I;
 | |
| 
 | |
|   const MachineOperand &Base =    MI.getOperand(1 + X86::AddrBaseReg);
 | |
|   const MachineOperand &Scale =   MI.getOperand(1 + X86::AddrScaleAmt);
 | |
|   const MachineOperand &Index =   MI.getOperand(1 + X86::AddrIndexReg);
 | |
|   const MachineOperand &Disp =    MI.getOperand(1 + X86::AddrDisp);
 | |
|   const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
 | |
| 
 | |
|   if (Segment.getReg() != 0 || !Disp.isImm() || Scale.getImm() > 1 ||
 | |
|       MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I) !=
 | |
|           MachineBasicBlock::LQR_Dead)
 | |
|     return false;
 | |
| 
 | |
|   Register DestReg = MI.getOperand(0).getReg();
 | |
|   Register BaseReg = Base.getReg();
 | |
|   Register IndexReg = Index.getReg();
 | |
| 
 | |
|   // Don't change stack adjustment LEAs.
 | |
|   if (UseLEAForSP && (DestReg == X86::ESP || DestReg == X86::RSP))
 | |
|     return false;
 | |
| 
 | |
|   // LEA64_32 has 64-bit operands but 32-bit result.
 | |
|   if (MI.getOpcode() == X86::LEA64_32r) {
 | |
|     if (BaseReg != 0)
 | |
|       BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
 | |
|     if (IndexReg != 0)
 | |
|       IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
 | |
|   }
 | |
| 
 | |
|   MachineInstr *NewMI = nullptr;
 | |
| 
 | |
|   // Look for lea(%reg1, %reg2), %reg1 or lea(%reg2, %reg1), %reg1
 | |
|   // which can be turned into add %reg2, %reg1
 | |
|   if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0 &&
 | |
|       (DestReg == BaseReg || DestReg == IndexReg)) {
 | |
|     unsigned NewOpcode = getADDrrFromLEA(MI.getOpcode());
 | |
|     if (DestReg != BaseReg)
 | |
|       std::swap(BaseReg, IndexReg);
 | |
| 
 | |
|     if (MI.getOpcode() == X86::LEA64_32r) {
 | |
|       // TODO: Do we need the super register implicit use?
 | |
|       NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
 | |
|         .addReg(BaseReg).addReg(IndexReg)
 | |
|         .addReg(Base.getReg(), RegState::Implicit)
 | |
|         .addReg(Index.getReg(), RegState::Implicit);
 | |
|     } else {
 | |
|       NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
 | |
|         .addReg(BaseReg).addReg(IndexReg);
 | |
|     }
 | |
|   } else if (DestReg == BaseReg && IndexReg == 0) {
 | |
|     // This is an LEA with only a base register and a displacement,
 | |
|     // We can use ADDri or INC/DEC.
 | |
| 
 | |
|     // Does this LEA have one these forms:
 | |
|     // lea  %reg, 1(%reg)
 | |
|     // lea  %reg, -1(%reg)
 | |
|     if (OptIncDec && (Disp.getImm() == 1 || Disp.getImm() == -1)) {
 | |
|       bool IsINC = Disp.getImm() == 1;
 | |
|       unsigned NewOpcode = getINCDECFromLEA(MI.getOpcode(), IsINC);
 | |
| 
 | |
|       if (MI.getOpcode() == X86::LEA64_32r) {
 | |
|         // TODO: Do we need the super register implicit use?
 | |
|         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
 | |
|           .addReg(BaseReg).addReg(Base.getReg(), RegState::Implicit);
 | |
|       } else {
 | |
|         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
 | |
|           .addReg(BaseReg);
 | |
|       }
 | |
|     } else {
 | |
|       unsigned NewOpcode = getADDriFromLEA(MI.getOpcode(), Disp);
 | |
|       if (MI.getOpcode() == X86::LEA64_32r) {
 | |
|         // TODO: Do we need the super register implicit use?
 | |
|         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
 | |
|           .addReg(BaseReg).addImm(Disp.getImm())
 | |
|           .addReg(Base.getReg(), RegState::Implicit);
 | |
|       } else {
 | |
|         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
 | |
|           .addReg(BaseReg).addImm(Disp.getImm());
 | |
|       }
 | |
|     }
 | |
|   } else
 | |
|     return false;
 | |
| 
 | |
|   MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
 | |
|   MBB.erase(I);
 | |
|   I = NewMI;
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I,
 | |
|                                       MachineBasicBlock &MBB) {
 | |
|   // Process a load, store, or LEA instruction.
 | |
|   MachineInstr &MI = *I;
 | |
|   const MCInstrDesc &Desc = MI.getDesc();
 | |
|   int AddrOffset = X86II::getMemoryOperandNo(Desc.TSFlags);
 | |
|   if (AddrOffset >= 0) {
 | |
|     AddrOffset += X86II::getOperandBias(Desc);
 | |
|     MachineOperand &p = MI.getOperand(AddrOffset + X86::AddrBaseReg);
 | |
|     if (p.isReg() && p.getReg() != X86::ESP) {
 | |
|       seekLEAFixup(p, I, MBB);
 | |
|     }
 | |
|     MachineOperand &q = MI.getOperand(AddrOffset + X86::AddrIndexReg);
 | |
|     if (q.isReg() && q.getReg() != X86::ESP) {
 | |
|       seekLEAFixup(q, I, MBB);
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| void FixupLEAPass::seekLEAFixup(MachineOperand &p,
 | |
|                                 MachineBasicBlock::iterator &I,
 | |
|                                 MachineBasicBlock &MBB) {
 | |
|   MachineBasicBlock::iterator MBI = searchBackwards(p, I, MBB);
 | |
|   if (MBI != MachineBasicBlock::iterator()) {
 | |
|     MachineInstr *NewMI = postRAConvertToLEA(MBB, MBI);
 | |
|     if (NewMI) {
 | |
|       ++NumLEAs;
 | |
|       LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump(););
 | |
|       // now to replace with an equivalent LEA...
 | |
|       LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump(););
 | |
|       MBB.getParent()->substituteDebugValuesForInst(*MBI, *NewMI, 1);
 | |
|       MBB.erase(MBI);
 | |
|       MachineBasicBlock::iterator J =
 | |
|           static_cast<MachineBasicBlock::iterator>(NewMI);
 | |
|       processInstruction(J, MBB);
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
 | |
|                                                 MachineBasicBlock &MBB) {
 | |
|   MachineInstr &MI = *I;
 | |
|   const unsigned Opcode = MI.getOpcode();
 | |
| 
 | |
|   const MachineOperand &Dst =     MI.getOperand(0);
 | |
|   const MachineOperand &Base =    MI.getOperand(1 + X86::AddrBaseReg);
 | |
|   const MachineOperand &Scale =   MI.getOperand(1 + X86::AddrScaleAmt);
 | |
|   const MachineOperand &Index =   MI.getOperand(1 + X86::AddrIndexReg);
 | |
|   const MachineOperand &Offset =  MI.getOperand(1 + X86::AddrDisp);
 | |
|   const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
 | |
| 
 | |
|   if (Segment.getReg() != 0 || !Offset.isImm() ||
 | |
|       MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) !=
 | |
|           MachineBasicBlock::LQR_Dead)
 | |
|     return;
 | |
|   const Register DstR = Dst.getReg();
 | |
|   const Register SrcR1 = Base.getReg();
 | |
|   const Register SrcR2 = Index.getReg();
 | |
|   if ((SrcR1 == 0 || SrcR1 != DstR) && (SrcR2 == 0 || SrcR2 != DstR))
 | |
|     return;
 | |
|   if (Scale.getImm() > 1)
 | |
|     return;
 | |
|   LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump(););
 | |
|   LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
 | |
|   MachineInstr *NewMI = nullptr;
 | |
|   // Make ADD instruction for two registers writing to LEA's destination
 | |
|   if (SrcR1 != 0 && SrcR2 != 0) {
 | |
|     const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(Opcode));
 | |
|     const MachineOperand &Src = SrcR1 == DstR ? Index : Base;
 | |
|     NewMI =
 | |
|         BuildMI(MBB, I, MI.getDebugLoc(), ADDrr, DstR).addReg(DstR).add(Src);
 | |
|     LLVM_DEBUG(NewMI->dump(););
 | |
|   }
 | |
|   // Make ADD instruction for immediate
 | |
|   if (Offset.getImm() != 0) {
 | |
|     const MCInstrDesc &ADDri =
 | |
|         TII->get(getADDriFromLEA(Opcode, Offset));
 | |
|     const MachineOperand &SrcR = SrcR1 == DstR ? Base : Index;
 | |
|     NewMI = BuildMI(MBB, I, MI.getDebugLoc(), ADDri, DstR)
 | |
|                 .add(SrcR)
 | |
|                 .addImm(Offset.getImm());
 | |
|     LLVM_DEBUG(NewMI->dump(););
 | |
|   }
 | |
|   if (NewMI) {
 | |
|     MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
 | |
|     MBB.erase(I);
 | |
|     I = NewMI;
 | |
|   }
 | |
| }
 | |
| 
 | |
| void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
 | |
|                                              MachineBasicBlock &MBB,
 | |
|                                              bool OptIncDec) {
 | |
|   MachineInstr &MI = *I;
 | |
|   const unsigned LEAOpcode = MI.getOpcode();
 | |
| 
 | |
|   const MachineOperand &Dest =    MI.getOperand(0);
 | |
|   const MachineOperand &Base =    MI.getOperand(1 + X86::AddrBaseReg);
 | |
|   const MachineOperand &Scale =   MI.getOperand(1 + X86::AddrScaleAmt);
 | |
|   const MachineOperand &Index =   MI.getOperand(1 + X86::AddrIndexReg);
 | |
|   const MachineOperand &Offset =  MI.getOperand(1 + X86::AddrDisp);
 | |
|   const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
 | |
| 
 | |
|   if (!(TII->isThreeOperandsLEA(MI) || hasInefficientLEABaseReg(Base, Index)) ||
 | |
|       MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) !=
 | |
|           MachineBasicBlock::LQR_Dead ||
 | |
|       Segment.getReg() != X86::NoRegister)
 | |
|     return;
 | |
| 
 | |
|   Register DestReg = Dest.getReg();
 | |
|   Register BaseReg = Base.getReg();
 | |
|   Register IndexReg = Index.getReg();
 | |
| 
 | |
|   if (MI.getOpcode() == X86::LEA64_32r) {
 | |
|     if (BaseReg != 0)
 | |
|       BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
 | |
|     if (IndexReg != 0)
 | |
|       IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
 | |
|   }
 | |
| 
 | |
|   bool IsScale1 = Scale.getImm() == 1;
 | |
|   bool IsInefficientBase = isInefficientLEAReg(BaseReg);
 | |
|   bool IsInefficientIndex = isInefficientLEAReg(IndexReg);
 | |
| 
 | |
|   // Skip these cases since it takes more than 2 instructions
 | |
|   // to replace the LEA instruction.
 | |
|   if (IsInefficientBase && DestReg == BaseReg && !IsScale1)
 | |
|     return;
 | |
| 
 | |
|   LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump(););
 | |
|   LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
 | |
| 
 | |
|   MachineInstr *NewMI = nullptr;
 | |
| 
 | |
|   // First try to replace LEA with one or two (for the 3-op LEA case)
 | |
|   // add instructions:
 | |
|   // 1.lea (%base,%index,1), %base => add %index,%base
 | |
|   // 2.lea (%base,%index,1), %index => add %base,%index
 | |
|   if (IsScale1 && (DestReg == BaseReg || DestReg == IndexReg)) {
 | |
|     unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
 | |
|     if (DestReg != BaseReg)
 | |
|       std::swap(BaseReg, IndexReg);
 | |
| 
 | |
|     if (MI.getOpcode() == X86::LEA64_32r) {
 | |
|       // TODO: Do we need the super register implicit use?
 | |
|       NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
 | |
|                   .addReg(BaseReg)
 | |
|                   .addReg(IndexReg)
 | |
|                   .addReg(Base.getReg(), RegState::Implicit)
 | |
|                   .addReg(Index.getReg(), RegState::Implicit);
 | |
|     } else {
 | |
|       NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
 | |
|                   .addReg(BaseReg)
 | |
|                   .addReg(IndexReg);
 | |
|     }
 | |
|   } else if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) {
 | |
|     // If the base is inefficient try switching the index and base operands,
 | |
|     // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction:
 | |
|     // lea offset(%base,%index,scale),%dst =>
 | |
|     // lea (%base,%index,scale); add offset,%dst
 | |
|     NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))
 | |
|                 .add(Dest)
 | |
|                 .add(IsInefficientBase ? Index : Base)
 | |
|                 .add(Scale)
 | |
|                 .add(IsInefficientBase ? Base : Index)
 | |
|                 .addImm(0)
 | |
|                 .add(Segment);
 | |
|     LLVM_DEBUG(NewMI->dump(););
 | |
|   }
 | |
| 
 | |
|   // If either replacement succeeded above, add the offset if needed, then
 | |
|   // replace the instruction.
 | |
|   if (NewMI) {
 | |
|     // Create ADD instruction for the Offset in case of 3-Ops LEA.
 | |
|     if (hasLEAOffset(Offset)) {
 | |
|       if (OptIncDec && Offset.isImm() &&
 | |
|           (Offset.getImm() == 1 || Offset.getImm() == -1)) {
 | |
|         unsigned NewOpc =
 | |
|             getINCDECFromLEA(MI.getOpcode(), Offset.getImm() == 1);
 | |
|         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
 | |
|                     .addReg(DestReg);
 | |
|         LLVM_DEBUG(NewMI->dump(););
 | |
|       } else {
 | |
|         unsigned NewOpc = getADDriFromLEA(MI.getOpcode(), Offset);
 | |
|         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
 | |
|                     .addReg(DestReg)
 | |
|                     .add(Offset);
 | |
|         LLVM_DEBUG(NewMI->dump(););
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
 | |
|     MBB.erase(I);
 | |
|     I = NewMI;
 | |
|     return;
 | |
|   }
 | |
| 
 | |
|   // Handle the rest of the cases with inefficient base register:
 | |
|   assert(DestReg != BaseReg && "DestReg == BaseReg should be handled already!");
 | |
|   assert(IsInefficientBase && "efficient base should be handled already!");
 | |
| 
 | |
|   // FIXME: Handle LEA64_32r.
 | |
|   if (LEAOpcode == X86::LEA64_32r)
 | |
|     return;
 | |
| 
 | |
|   // lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst
 | |
|   if (IsScale1 && !hasLEAOffset(Offset)) {
 | |
|     bool BIK = Base.isKill() && BaseReg != IndexReg;
 | |
|     TII->copyPhysReg(MBB, MI, MI.getDebugLoc(), DestReg, BaseReg, BIK);
 | |
|     LLVM_DEBUG(MI.getPrevNode()->dump(););
 | |
| 
 | |
|     unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
 | |
|     NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
 | |
|                 .addReg(DestReg)
 | |
|                 .add(Index);
 | |
|     LLVM_DEBUG(NewMI->dump(););
 | |
| 
 | |
|     MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
 | |
|     MBB.erase(I);
 | |
|     I = NewMI;
 | |
|     return;
 | |
|   }
 | |
| 
 | |
|   // lea offset(%base,%index,scale), %dst =>
 | |
|   // lea offset( ,%index,scale), %dst; add %base,%dst
 | |
|   NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))
 | |
|               .add(Dest)
 | |
|               .addReg(0)
 | |
|               .add(Scale)
 | |
|               .add(Index)
 | |
|               .add(Offset)
 | |
|               .add(Segment);
 | |
|   LLVM_DEBUG(NewMI->dump(););
 | |
| 
 | |
|   unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
 | |
|   NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
 | |
|               .addReg(DestReg)
 | |
|               .add(Base);
 | |
|   LLVM_DEBUG(NewMI->dump(););
 | |
| 
 | |
|   MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
 | |
|   MBB.erase(I);
 | |
|   I = NewMI;
 | |
| }
 |