393 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			393 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			C++
		
	
	
	
| //===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===//
 | |
| //
 | |
| //                     The LLVM Compiler Infrastructure
 | |
| //
 | |
| // This file is distributed under the University of Illinois Open Source
 | |
| // License. See LICENSE.TXT for details.
 | |
| //
 | |
| //===---------------------------------------------------------------------===//
 | |
| //
 | |
| // This pass performs peephole optimizations to clean up ugly code
 | |
| // sequences at the MachineInstruction layer.  It runs at the end of
 | |
| // the SSA phases, following VSX swap removal.  A pass of dead code
 | |
| // elimination follows this one for quick clean-up of any dead
 | |
| // instructions introduced here.  Although we could do this as callbacks
 | |
| // from the generic peephole pass, this would have a couple of bad
 | |
| // effects:  it might remove optimization opportunities for VSX swap
 | |
| // removal, and it would miss cleanups made possible following VSX
 | |
| // swap removal.
 | |
| //
 | |
| //===---------------------------------------------------------------------===//
 | |
| 
 | |
| #include "PPCInstrInfo.h"
 | |
| #include "PPC.h"
 | |
| #include "PPCInstrBuilder.h"
 | |
| #include "PPCTargetMachine.h"
 | |
| #include "llvm/CodeGen/MachineFunctionPass.h"
 | |
| #include "llvm/CodeGen/MachineInstrBuilder.h"
 | |
| #include "llvm/CodeGen/MachineRegisterInfo.h"
 | |
| #include "llvm/Support/Debug.h"
 | |
| 
 | |
| using namespace llvm;
 | |
| 
 | |
| #define DEBUG_TYPE "ppc-mi-peepholes"
 | |
| 
 | |
| namespace llvm {
 | |
|   void initializePPCMIPeepholePass(PassRegistry&);
 | |
| }
 | |
| 
 | |
| namespace {
 | |
| 
 | |
| struct PPCMIPeephole : public MachineFunctionPass {
 | |
| 
 | |
|   static char ID;
 | |
|   const PPCInstrInfo *TII;
 | |
|   MachineFunction *MF;
 | |
|   MachineRegisterInfo *MRI;
 | |
| 
 | |
|   PPCMIPeephole() : MachineFunctionPass(ID) {
 | |
|     initializePPCMIPeepholePass(*PassRegistry::getPassRegistry());
 | |
|   }
 | |
| 
 | |
| private:
 | |
|   // Initialize class variables.
 | |
|   void initialize(MachineFunction &MFParm);
 | |
| 
 | |
|   // Perform peepholes.
 | |
|   bool simplifyCode(void);
 | |
| 
 | |
|   // Find the "true" register represented by SrcReg (following chains
 | |
|   // of copies and subreg_to_reg operations).
 | |
|   unsigned lookThruCopyLike(unsigned SrcReg);
 | |
| 
 | |
| public:
 | |
|   // Main entry point for this pass.
 | |
|   bool runOnMachineFunction(MachineFunction &MF) override {
 | |
|     if (skipFunction(*MF.getFunction()))
 | |
|       return false;
 | |
|     initialize(MF);
 | |
|     return simplifyCode();
 | |
|   }
 | |
| };
 | |
| 
 | |
| // Initialize class variables.
 | |
| void PPCMIPeephole::initialize(MachineFunction &MFParm) {
 | |
|   MF = &MFParm;
 | |
|   MRI = &MF->getRegInfo();
 | |
|   TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
 | |
|   DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
 | |
|   DEBUG(MF->dump());
 | |
| }
 | |
| 
 | |
| // Perform peephole optimizations.
 | |
| bool PPCMIPeephole::simplifyCode(void) {
 | |
|   bool Simplified = false;
 | |
|   MachineInstr* ToErase = nullptr;
 | |
| 
 | |
|   for (MachineBasicBlock &MBB : *MF) {
 | |
|     for (MachineInstr &MI : MBB) {
 | |
| 
 | |
|       // If the previous instruction was marked for elimination,
 | |
|       // remove it now.
 | |
|       if (ToErase) {
 | |
|         ToErase->eraseFromParent();
 | |
|         ToErase = nullptr;
 | |
|       }
 | |
| 
 | |
|       // Ignore debug instructions.
 | |
|       if (MI.isDebugValue())
 | |
|         continue;
 | |
| 
 | |
|       // Per-opcode peepholes.
 | |
|       switch (MI.getOpcode()) {
 | |
| 
 | |
|       default:
 | |
|         break;
 | |
| 
 | |
|       case PPC::XXPERMDI: {
 | |
|         // Perform simplifications of 2x64 vector swaps and splats.
 | |
|         // A swap is identified by an immediate value of 2, and a splat
 | |
|         // is identified by an immediate value of 0 or 3.
 | |
|         int Immed = MI.getOperand(3).getImm();
 | |
| 
 | |
|         if (Immed != 1) {
 | |
| 
 | |
|           // For each of these simplifications, we need the two source
 | |
|           // regs to match.  Unfortunately, MachineCSE ignores COPY and
 | |
|           // SUBREG_TO_REG, so for example we can see
 | |
|           //   XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
 | |
|           // We have to look through chains of COPY and SUBREG_TO_REG
 | |
|           // to find the real source values for comparison.
 | |
|           unsigned TrueReg1 = lookThruCopyLike(MI.getOperand(1).getReg());
 | |
|           unsigned TrueReg2 = lookThruCopyLike(MI.getOperand(2).getReg());
 | |
| 
 | |
|           if (TrueReg1 == TrueReg2
 | |
|               && TargetRegisterInfo::isVirtualRegister(TrueReg1)) {
 | |
|             MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
 | |
|             unsigned DefOpc = DefMI ? DefMI->getOpcode() : 0;
 | |
| 
 | |
|             // If this is a splat fed by a splatting load, the splat is
 | |
|             // redundant. Replace with a copy. This doesn't happen directly due
 | |
|             // to code in PPCDAGToDAGISel.cpp, but it can happen when converting
 | |
|             // a load of a double to a vector of 64-bit integers.
 | |
|             auto isConversionOfLoadAndSplat = [=]() -> bool {
 | |
|               if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
 | |
|                 return false;
 | |
|               unsigned DefReg = lookThruCopyLike(DefMI->getOperand(1).getReg());
 | |
|               if (TargetRegisterInfo::isVirtualRegister(DefReg)) {
 | |
|                 MachineInstr *LoadMI = MRI->getVRegDef(DefReg);
 | |
|                 if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
 | |
|                   return true;
 | |
|               }
 | |
|               return false;
 | |
|             };
 | |
|             if (DefMI && (Immed == 0 || Immed == 3)) {
 | |
|               if (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat()) {
 | |
|                 DEBUG(dbgs()
 | |
|                       << "Optimizing load-and-splat/splat "
 | |
|                       "to load-and-splat/copy: ");
 | |
|                 DEBUG(MI.dump());
 | |
|                 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
 | |
|                         MI.getOperand(0).getReg())
 | |
|                     .add(MI.getOperand(1));
 | |
|                 ToErase = &MI;
 | |
|                 Simplified = true;
 | |
|               }
 | |
|             }
 | |
| 
 | |
|             // If this is a splat or a swap fed by another splat, we
 | |
|             // can replace it with a copy.
 | |
|             if (DefOpc == PPC::XXPERMDI) {
 | |
|               unsigned FeedImmed = DefMI->getOperand(3).getImm();
 | |
|               unsigned FeedReg1
 | |
|                 = lookThruCopyLike(DefMI->getOperand(1).getReg());
 | |
|               unsigned FeedReg2
 | |
|                 = lookThruCopyLike(DefMI->getOperand(2).getReg());
 | |
| 
 | |
|               if ((FeedImmed == 0 || FeedImmed == 3) && FeedReg1 == FeedReg2) {
 | |
|                 DEBUG(dbgs()
 | |
|                       << "Optimizing splat/swap or splat/splat "
 | |
|                       "to splat/copy: ");
 | |
|                 DEBUG(MI.dump());
 | |
|                 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
 | |
|                         MI.getOperand(0).getReg())
 | |
|                     .add(MI.getOperand(1));
 | |
|                 ToErase = &MI;
 | |
|                 Simplified = true;
 | |
|               }
 | |
| 
 | |
|               // If this is a splat fed by a swap, we can simplify modify
 | |
|               // the splat to splat the other value from the swap's input
 | |
|               // parameter.
 | |
|               else if ((Immed == 0 || Immed == 3)
 | |
|                        && FeedImmed == 2 && FeedReg1 == FeedReg2) {
 | |
|                 DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
 | |
|                 DEBUG(MI.dump());
 | |
|                 MI.getOperand(1).setReg(DefMI->getOperand(1).getReg());
 | |
|                 MI.getOperand(2).setReg(DefMI->getOperand(2).getReg());
 | |
|                 MI.getOperand(3).setImm(3 - Immed);
 | |
|                 Simplified = true;
 | |
|               }
 | |
| 
 | |
|               // If this is a swap fed by a swap, we can replace it
 | |
|               // with a copy from the first swap's input.
 | |
|               else if (Immed == 2 && FeedImmed == 2 && FeedReg1 == FeedReg2) {
 | |
|                 DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
 | |
|                 DEBUG(MI.dump());
 | |
|                 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
 | |
|                         MI.getOperand(0).getReg())
 | |
|                     .add(DefMI->getOperand(1));
 | |
|                 ToErase = &MI;
 | |
|                 Simplified = true;
 | |
|               }
 | |
|             } else if ((Immed == 0 || Immed == 3) && DefOpc == PPC::XXPERMDIs &&
 | |
|                        (DefMI->getOperand(2).getImm() == 0 ||
 | |
|                         DefMI->getOperand(2).getImm() == 3)) {
 | |
|               // Splat fed by another splat - switch the output of the first
 | |
|               // and remove the second.
 | |
|               DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
 | |
|               ToErase = &MI;
 | |
|               Simplified = true;
 | |
|               DEBUG(dbgs() << "Removing redundant splat: ");
 | |
|               DEBUG(MI.dump());
 | |
|             }
 | |
|           }
 | |
|         }
 | |
|         break;
 | |
|       }
 | |
|       case PPC::VSPLTB:
 | |
|       case PPC::VSPLTH:
 | |
|       case PPC::XXSPLTW: {
 | |
|         unsigned MyOpcode = MI.getOpcode();
 | |
|         unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
 | |
|         unsigned TrueReg = lookThruCopyLike(MI.getOperand(OpNo).getReg());
 | |
|         if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
 | |
|           break;
 | |
|         MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
 | |
|         if (!DefMI)
 | |
|           break;
 | |
|         unsigned DefOpcode = DefMI->getOpcode();
 | |
|         auto isConvertOfSplat = [=]() -> bool {
 | |
|           if (DefOpcode != PPC::XVCVSPSXWS && DefOpcode != PPC::XVCVSPUXWS)
 | |
|             return false;
 | |
|           unsigned ConvReg = DefMI->getOperand(1).getReg();
 | |
|           if (!TargetRegisterInfo::isVirtualRegister(ConvReg))
 | |
|             return false;
 | |
|           MachineInstr *Splt = MRI->getVRegDef(ConvReg);
 | |
|           return Splt && (Splt->getOpcode() == PPC::LXVWSX ||
 | |
|             Splt->getOpcode() == PPC::XXSPLTW);
 | |
|         };
 | |
|         bool AlreadySplat = (MyOpcode == DefOpcode) ||
 | |
|           (MyOpcode == PPC::VSPLTB && DefOpcode == PPC::VSPLTBs) ||
 | |
|           (MyOpcode == PPC::VSPLTH && DefOpcode == PPC::VSPLTHs) ||
 | |
|           (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs) ||
 | |
|           (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::LXVWSX) ||
 | |
|           (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::MTVSRWS)||
 | |
|           (MyOpcode == PPC::XXSPLTW && isConvertOfSplat());
 | |
|         // If the instruction[s] that feed this splat have already splat
 | |
|         // the value, this splat is redundant.
 | |
|         if (AlreadySplat) {
 | |
|           DEBUG(dbgs() << "Changing redundant splat to a copy: ");
 | |
|           DEBUG(MI.dump());
 | |
|           BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
 | |
|                   MI.getOperand(0).getReg())
 | |
|               .add(MI.getOperand(OpNo));
 | |
|           ToErase = &MI;
 | |
|           Simplified = true;
 | |
|         }
 | |
|         // Splat fed by a shift. Usually when we align value to splat into
 | |
|         // vector element zero.
 | |
|         if (DefOpcode == PPC::XXSLDWI) {
 | |
|           unsigned ShiftRes = DefMI->getOperand(0).getReg();
 | |
|           unsigned ShiftOp1 = DefMI->getOperand(1).getReg();
 | |
|           unsigned ShiftOp2 = DefMI->getOperand(2).getReg();
 | |
|           unsigned ShiftImm = DefMI->getOperand(3).getImm();
 | |
|           unsigned SplatImm = MI.getOperand(2).getImm();
 | |
|           if (ShiftOp1 == ShiftOp2) {
 | |
|             unsigned NewElem = (SplatImm + ShiftImm) & 0x3;
 | |
|             if (MRI->hasOneNonDBGUse(ShiftRes)) {
 | |
|               DEBUG(dbgs() << "Removing redundant shift: ");
 | |
|               DEBUG(DefMI->dump());
 | |
|               ToErase = DefMI;
 | |
|             }
 | |
|             Simplified = true;
 | |
|             DEBUG(dbgs() << "Changing splat immediate from " << SplatImm <<
 | |
|                   " to " << NewElem << " in instruction: ");
 | |
|             DEBUG(MI.dump());
 | |
|             MI.getOperand(1).setReg(ShiftOp1);
 | |
|             MI.getOperand(2).setImm(NewElem);
 | |
|           }
 | |
|         }
 | |
|         break;
 | |
|       }
 | |
|       case PPC::XVCVDPSP: {
 | |
|         // If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant.
 | |
|         unsigned TrueReg = lookThruCopyLike(MI.getOperand(1).getReg());
 | |
|         if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
 | |
|           break;
 | |
|         MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
 | |
| 
 | |
|         // This can occur when building a vector of single precision or integer
 | |
|         // values.
 | |
|         if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
 | |
|           unsigned DefsReg1 = lookThruCopyLike(DefMI->getOperand(1).getReg());
 | |
|           unsigned DefsReg2 = lookThruCopyLike(DefMI->getOperand(2).getReg());
 | |
|           if (!TargetRegisterInfo::isVirtualRegister(DefsReg1) ||
 | |
|               !TargetRegisterInfo::isVirtualRegister(DefsReg2))
 | |
|             break;
 | |
|           MachineInstr *P1 = MRI->getVRegDef(DefsReg1);
 | |
|           MachineInstr *P2 = MRI->getVRegDef(DefsReg2);
 | |
| 
 | |
|           if (!P1 || !P2)
 | |
|             break;
 | |
| 
 | |
|           // Remove the passed FRSP instruction if it only feeds this MI and
 | |
|           // set any uses of that FRSP (in this MI) to the source of the FRSP.
 | |
|           auto removeFRSPIfPossible = [&](MachineInstr *RoundInstr) {
 | |
|             if (RoundInstr->getOpcode() == PPC::FRSP &&
 | |
|                 MRI->hasOneNonDBGUse(RoundInstr->getOperand(0).getReg())) {
 | |
|               Simplified = true;
 | |
|               unsigned ConvReg1 = RoundInstr->getOperand(1).getReg();
 | |
|               unsigned FRSPDefines = RoundInstr->getOperand(0).getReg();
 | |
|               MachineInstr &Use = *(MRI->use_instr_begin(FRSPDefines));
 | |
|               for (int i = 0, e = Use.getNumOperands(); i < e; ++i)
 | |
|                 if (Use.getOperand(i).isReg() &&
 | |
|                     Use.getOperand(i).getReg() == FRSPDefines)
 | |
|                   Use.getOperand(i).setReg(ConvReg1);
 | |
|               DEBUG(dbgs() << "Removing redundant FRSP:\n");
 | |
|               DEBUG(RoundInstr->dump());
 | |
|               DEBUG(dbgs() << "As it feeds instruction:\n");
 | |
|               DEBUG(MI.dump());
 | |
|               DEBUG(dbgs() << "Through instruction:\n");
 | |
|               DEBUG(DefMI->dump());
 | |
|               RoundInstr->eraseFromParent();
 | |
|             }
 | |
|           };
 | |
| 
 | |
|           // If the input to XVCVDPSP is a vector that was built (even
 | |
|           // partially) out of FRSP's, the FRSP(s) can safely be removed
 | |
|           // since this instruction performs the same operation.
 | |
|           if (P1 != P2) {
 | |
|             removeFRSPIfPossible(P1);
 | |
|             removeFRSPIfPossible(P2);
 | |
|             break;
 | |
|           }
 | |
|           removeFRSPIfPossible(P1);
 | |
|         }
 | |
|         break;
 | |
|       }
 | |
|       }
 | |
|     }
 | |
|     // If the last instruction was marked for elimination,
 | |
|     // remove it now.
 | |
|     if (ToErase) {
 | |
|       ToErase->eraseFromParent();
 | |
|       ToErase = nullptr;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return Simplified;
 | |
| }
 | |
| 
 | |
| // This is used to find the "true" source register for an
 | |
| // XXPERMDI instruction, since MachineCSE does not handle the
 | |
| // "copy-like" operations (Copy and SubregToReg).  Returns
 | |
| // the original SrcReg unless it is the target of a copy-like
 | |
| // operation, in which case we chain backwards through all
 | |
| // such operations to the ultimate source register.  If a
 | |
| // physical register is encountered, we stop the search.
 | |
| unsigned PPCMIPeephole::lookThruCopyLike(unsigned SrcReg) {
 | |
| 
 | |
|   while (true) {
 | |
| 
 | |
|     MachineInstr *MI = MRI->getVRegDef(SrcReg);
 | |
|     if (!MI->isCopyLike())
 | |
|       return SrcReg;
 | |
| 
 | |
|     unsigned CopySrcReg;
 | |
|     if (MI->isCopy())
 | |
|       CopySrcReg = MI->getOperand(1).getReg();
 | |
|     else {
 | |
|       assert(MI->isSubregToReg() && "bad opcode for lookThruCopyLike");
 | |
|       CopySrcReg = MI->getOperand(2).getReg();
 | |
|     }
 | |
| 
 | |
|     if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg))
 | |
|       return CopySrcReg;
 | |
| 
 | |
|     SrcReg = CopySrcReg;
 | |
|   }
 | |
| }
 | |
| 
 | |
| } // end default namespace
 | |
| 
 | |
| INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,
 | |
|                       "PowerPC MI Peephole Optimization", false, false)
 | |
| INITIALIZE_PASS_END(PPCMIPeephole, DEBUG_TYPE,
 | |
|                     "PowerPC MI Peephole Optimization", false, false)
 | |
| 
 | |
| char PPCMIPeephole::ID = 0;
 | |
| FunctionPass*
 | |
| llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); }
 | |
| 
 |