383 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			383 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			C++
		
	
	
	
| //===-- SystemZTDC.cpp - Utilize Test Data Class instruction --------------===//
 | |
| //
 | |
| //                     The LLVM Compiler Infrastructure
 | |
| //
 | |
| // This file is distributed under the University of Illinois Open Source
 | |
| // License. See LICENSE.TXT for details.
 | |
| //
 | |
| //===----------------------------------------------------------------------===//
 | |
| //
 | |
| // This pass looks for instructions that can be replaced by a Test Data Class
 | |
| // instruction, and replaces them when profitable.
 | |
| //
 | |
| // Roughly, the following rules are recognized:
 | |
| //
 | |
| // 1: fcmp pred X, 0 -> tdc X, mask
 | |
| // 2: fcmp pred X, +-inf -> tdc X, mask
 | |
| // 3: fcmp pred X, +-minnorm -> tdc X, mask
 | |
| // 4: tdc (fabs X), mask -> tdc X, newmask
 | |
| // 5: icmp slt (bitcast float X to int), 0 -> tdc X, mask [ie. signbit]
 | |
| // 6: icmp sgt (bitcast float X to int), -1 -> tdc X, mask
 | |
| // 7: icmp ne/eq (call @llvm.s390.tdc.*(X, mask)) -> tdc X, mask/~mask
 | |
| // 8: and i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 & M2)
 | |
| // 9: or i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 | M2)
 | |
| // 10: xor i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 ^ M2)
 | |
| //
 | |
| // The pass works in 4 steps:
 | |
| //
 | |
| // 1. All fcmp and icmp instructions in a function are checked for a match
 | |
| //    with rules 1-3 and 5-7.  Their TDC equivalents are stored in
 | |
| //    the ConvertedInsts mapping.  If the operand of a fcmp instruction is
 | |
| //    a fabs, it's also folded according to rule 4.
 | |
| // 2. All and/or/xor i1 instructions whose both operands have been already
 | |
| //    mapped are mapped according to rules 8-10.  LogicOpsWorklist is used
 | |
| //    as a queue of instructions to check.
 | |
| // 3. All mapped instructions that are considered worthy of conversion (ie.
 | |
| //    replacing them will actually simplify the final code) are replaced
 | |
| //    with a call to the s390.tdc intrinsic.
 | |
| // 4. All intermediate results of replaced instructions are removed if unused.
 | |
| //
 | |
| // Instructions that match rules 1-3 are considered unworthy of conversion
 | |
| // on their own (since a comparison instruction is superior), but are mapped
 | |
| // in the hopes of folding the result using rules 4 and 8-10 (likely removing
 | |
| // the original comparison in the process).
 | |
| //
 | |
| //===----------------------------------------------------------------------===//
 | |
| 
 | |
| #include "SystemZ.h"
 | |
| #include "llvm/ADT/MapVector.h"
 | |
| #include "llvm/IR/Constants.h"
 | |
| #include "llvm/IR/Instructions.h"
 | |
| #include "llvm/IR/InstIterator.h"
 | |
| #include "llvm/IR/IntrinsicInst.h"
 | |
| #include "llvm/IR/IRBuilder.h"
 | |
| #include "llvm/IR/LegacyPassManager.h"
 | |
| #include "llvm/IR/Module.h"
 | |
| #include <deque>
 | |
| #include <set>
 | |
| 
 | |
| using namespace llvm;
 | |
| 
 | |
| namespace llvm {
 | |
|   void initializeSystemZTDCPassPass(PassRegistry&);
 | |
| }
 | |
| 
 | |
| namespace {
 | |
| 
 | |
| class SystemZTDCPass : public FunctionPass {
 | |
| public:
 | |
|   static char ID;
 | |
|   SystemZTDCPass() : FunctionPass(ID) {
 | |
|     initializeSystemZTDCPassPass(*PassRegistry::getPassRegistry());
 | |
|   }
 | |
| 
 | |
|   bool runOnFunction(Function &F) override;
 | |
| private:
 | |
|   // Maps seen instructions that can be mapped to a TDC, values are
 | |
|   // (TDC operand, TDC mask, worthy flag) triples.
 | |
|   MapVector<Instruction *, std::tuple<Value *, int, bool>> ConvertedInsts;
 | |
|   // The queue of and/or/xor i1 instructions to be potentially folded.
 | |
|   std::vector<BinaryOperator *> LogicOpsWorklist;
 | |
|   // Instructions matched while folding, to be removed at the end if unused.
 | |
|   std::set<Instruction *> PossibleJunk;
 | |
| 
 | |
|   // Tries to convert a fcmp instruction.
 | |
|   void convertFCmp(CmpInst &I);
 | |
| 
 | |
|   // Tries to convert an icmp instruction.
 | |
|   void convertICmp(CmpInst &I);
 | |
| 
 | |
|   // Tries to convert an i1 and/or/xor instruction, whose both operands
 | |
|   // have been already converted.
 | |
|   void convertLogicOp(BinaryOperator &I);
 | |
| 
 | |
|   // Marks an instruction as converted - adds it to ConvertedInsts and adds
 | |
|   // any and/or/xor i1 users to the queue.
 | |
|   void converted(Instruction *I, Value *V, int Mask, bool Worthy) {
 | |
|     ConvertedInsts[I] = std::make_tuple(V, Mask, Worthy);
 | |
|     auto &M = *I->getFunction()->getParent();
 | |
|     auto &Ctx = M.getContext();
 | |
|     for (auto *U : I->users()) {
 | |
|       auto *LI = dyn_cast<BinaryOperator>(U);
 | |
|       if (LI && LI->getType() == Type::getInt1Ty(Ctx) &&
 | |
|           (LI->getOpcode() == Instruction::And ||
 | |
|            LI->getOpcode() == Instruction::Or ||
 | |
|            LI->getOpcode() == Instruction::Xor)) {
 | |
|         LogicOpsWorklist.push_back(LI);
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| };
 | |
| 
 | |
| } // end anonymous namespace
 | |
| 
 | |
| char SystemZTDCPass::ID = 0;
 | |
| INITIALIZE_PASS(SystemZTDCPass, "systemz-tdc",
 | |
|                 "SystemZ Test Data Class optimization", false, false)
 | |
| 
 | |
| FunctionPass *llvm::createSystemZTDCPass() {
 | |
|   return new SystemZTDCPass();
 | |
| }
 | |
| 
 | |
| void SystemZTDCPass::convertFCmp(CmpInst &I) {
 | |
|   Value *Op0 = I.getOperand(0);
 | |
|   auto *Const = dyn_cast<ConstantFP>(I.getOperand(1));
 | |
|   auto Pred = I.getPredicate();
 | |
|   // Only comparisons with consts are interesting.
 | |
|   if (!Const)
 | |
|     return;
 | |
|   // Compute the smallest normal number (and its negation).
 | |
|   auto &Sem = Op0->getType()->getFltSemantics();
 | |
|   APFloat Smallest = APFloat::getSmallestNormalized(Sem);
 | |
|   APFloat NegSmallest = Smallest;
 | |
|   NegSmallest.changeSign();
 | |
|   // Check if Const is one of our recognized consts.
 | |
|   int WhichConst;
 | |
|   if (Const->isZero()) {
 | |
|     // All comparisons with 0 can be converted.
 | |
|     WhichConst = 0;
 | |
|   } else if (Const->isInfinity()) {
 | |
|     // Likewise for infinities.
 | |
|     WhichConst = Const->isNegative() ? 2 : 1;
 | |
|   } else if (Const->isExactlyValue(Smallest)) {
 | |
|     // For Smallest, we cannot do EQ separately from GT.
 | |
|     if ((Pred & CmpInst::FCMP_OGE) != CmpInst::FCMP_OGE &&
 | |
|         (Pred & CmpInst::FCMP_OGE) != 0)
 | |
|       return;
 | |
|     WhichConst = 3;
 | |
|   } else if (Const->isExactlyValue(NegSmallest)) {
 | |
|     // Likewise for NegSmallest, we cannot do EQ separately from LT.
 | |
|     if ((Pred & CmpInst::FCMP_OLE) != CmpInst::FCMP_OLE &&
 | |
|         (Pred & CmpInst::FCMP_OLE) != 0)
 | |
|       return;
 | |
|     WhichConst = 4;
 | |
|   } else {
 | |
|     // Not one of our special constants.
 | |
|     return;
 | |
|   }
 | |
|   // Partial masks to use for EQ, GT, LT, UN comparisons, respectively.
 | |
|   static const int Masks[][4] = {
 | |
|     { // 0
 | |
|       SystemZ::TDCMASK_ZERO,              // eq
 | |
|       SystemZ::TDCMASK_POSITIVE,          // gt
 | |
|       SystemZ::TDCMASK_NEGATIVE,          // lt
 | |
|       SystemZ::TDCMASK_NAN,               // un
 | |
|     },
 | |
|     { // inf
 | |
|       SystemZ::TDCMASK_INFINITY_PLUS,     // eq
 | |
|       0,                                  // gt
 | |
|       (SystemZ::TDCMASK_ZERO |
 | |
|        SystemZ::TDCMASK_NEGATIVE |
 | |
|        SystemZ::TDCMASK_NORMAL_PLUS |
 | |
|        SystemZ::TDCMASK_SUBNORMAL_PLUS),  // lt
 | |
|       SystemZ::TDCMASK_NAN,               // un
 | |
|     },
 | |
|     { // -inf
 | |
|       SystemZ::TDCMASK_INFINITY_MINUS,    // eq
 | |
|       (SystemZ::TDCMASK_ZERO |
 | |
|        SystemZ::TDCMASK_POSITIVE |
 | |
|        SystemZ::TDCMASK_NORMAL_MINUS |
 | |
|        SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt
 | |
|       0,                                  // lt
 | |
|       SystemZ::TDCMASK_NAN,               // un
 | |
|     },
 | |
|     { // minnorm
 | |
|       0,                                  // eq (unsupported)
 | |
|       (SystemZ::TDCMASK_NORMAL_PLUS |
 | |
|        SystemZ::TDCMASK_INFINITY_PLUS),   // gt (actually ge)
 | |
|       (SystemZ::TDCMASK_ZERO |
 | |
|        SystemZ::TDCMASK_NEGATIVE |
 | |
|        SystemZ::TDCMASK_SUBNORMAL_PLUS),  // lt
 | |
|       SystemZ::TDCMASK_NAN,               // un
 | |
|     },
 | |
|     { // -minnorm
 | |
|       0,                                  // eq (unsupported)
 | |
|       (SystemZ::TDCMASK_ZERO |
 | |
|        SystemZ::TDCMASK_POSITIVE |
 | |
|        SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt
 | |
|       (SystemZ::TDCMASK_NORMAL_MINUS |
 | |
|        SystemZ::TDCMASK_INFINITY_MINUS),  // lt (actually le)
 | |
|       SystemZ::TDCMASK_NAN,               // un
 | |
|     }
 | |
|   };
 | |
|   // Construct the mask as a combination of the partial masks.
 | |
|   int Mask = 0;
 | |
|   if (Pred & CmpInst::FCMP_OEQ)
 | |
|     Mask |= Masks[WhichConst][0];
 | |
|   if (Pred & CmpInst::FCMP_OGT)
 | |
|     Mask |= Masks[WhichConst][1];
 | |
|   if (Pred & CmpInst::FCMP_OLT)
 | |
|     Mask |= Masks[WhichConst][2];
 | |
|   if (Pred & CmpInst::FCMP_UNO)
 | |
|     Mask |= Masks[WhichConst][3];
 | |
|   // A lone fcmp is unworthy of tdc conversion on its own, but may become
 | |
|   // worthy if combined with fabs.
 | |
|   bool Worthy = false;
 | |
|   if (CallInst *CI = dyn_cast<CallInst>(Op0)) {
 | |
|     Function *F = CI->getCalledFunction();
 | |
|     if (F && F->getIntrinsicID() == Intrinsic::fabs) {
 | |
|       // Fold with fabs - adjust the mask appropriately.
 | |
|       Mask &= SystemZ::TDCMASK_PLUS;
 | |
|       Mask |= Mask >> 1;
 | |
|       Op0 = CI->getArgOperand(0);
 | |
|       // A combination of fcmp with fabs is a win, unless the constant
 | |
|       // involved is 0 (which is handled by later passes).
 | |
|       Worthy = WhichConst != 0;
 | |
|       PossibleJunk.insert(CI);
 | |
|     }
 | |
|   }
 | |
|   converted(&I, Op0, Mask, Worthy);
 | |
| }
 | |
| 
 | |
| void SystemZTDCPass::convertICmp(CmpInst &I) {
 | |
|   Value *Op0 = I.getOperand(0);
 | |
|   auto *Const = dyn_cast<ConstantInt>(I.getOperand(1));
 | |
|   auto Pred = I.getPredicate();
 | |
|   // All our icmp rules involve comparisons with consts.
 | |
|   if (!Const)
 | |
|     return;
 | |
|   if (auto *Cast = dyn_cast<BitCastInst>(Op0)) {
 | |
|     // Check for icmp+bitcast used for signbit.
 | |
|     if (!Cast->getSrcTy()->isFloatTy() &&
 | |
|         !Cast->getSrcTy()->isDoubleTy() &&
 | |
|         !Cast->getSrcTy()->isFP128Ty())
 | |
|       return;
 | |
|     Value *V = Cast->getOperand(0);
 | |
|     int Mask;
 | |
|     if (Pred == CmpInst::ICMP_SLT && Const->isZero()) {
 | |
|       // icmp slt (bitcast X), 0 - set if sign bit true
 | |
|       Mask = SystemZ::TDCMASK_MINUS;
 | |
|     } else if (Pred == CmpInst::ICMP_SGT && Const->isMinusOne()) {
 | |
|       // icmp sgt (bitcast X), -1 - set if sign bit false
 | |
|       Mask = SystemZ::TDCMASK_PLUS;
 | |
|     } else {
 | |
|       // Not a sign bit check.
 | |
|       return;
 | |
|     }
 | |
|     PossibleJunk.insert(Cast);
 | |
|     converted(&I, V, Mask, true);
 | |
|   } else if (auto *CI = dyn_cast<CallInst>(Op0)) {
 | |
|     // Check if this is a pre-existing call of our tdc intrinsic.
 | |
|     Function *F = CI->getCalledFunction();
 | |
|     if (!F || F->getIntrinsicID() != Intrinsic::s390_tdc)
 | |
|       return;
 | |
|     if (!Const->isZero())
 | |
|       return;
 | |
|     Value *V = CI->getArgOperand(0);
 | |
|     auto *MaskC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
 | |
|     // Bail if the mask is not a constant.
 | |
|     if (!MaskC)
 | |
|       return;
 | |
|     int Mask = MaskC->getZExtValue();
 | |
|     Mask &= SystemZ::TDCMASK_ALL;
 | |
|     if (Pred == CmpInst::ICMP_NE) {
 | |
|       // icmp ne (call llvm.s390.tdc(...)), 0 -> simple TDC
 | |
|     } else if (Pred == CmpInst::ICMP_EQ) {
 | |
|       // icmp eq (call llvm.s390.tdc(...)), 0 -> TDC with inverted mask
 | |
|       Mask ^= SystemZ::TDCMASK_ALL;
 | |
|     } else {
 | |
|       // An unknown comparison - ignore.
 | |
|       return;
 | |
|     }
 | |
|     PossibleJunk.insert(CI);
 | |
|     converted(&I, V, Mask, false);
 | |
|   }
 | |
| }
 | |
| 
 | |
| void SystemZTDCPass::convertLogicOp(BinaryOperator &I) {
 | |
|   Value *Op0, *Op1;
 | |
|   int Mask0, Mask1;
 | |
|   bool Worthy0, Worthy1;
 | |
|   std::tie(Op0, Mask0, Worthy0) = ConvertedInsts[cast<Instruction>(I.getOperand(0))];
 | |
|   std::tie(Op1, Mask1, Worthy1) = ConvertedInsts[cast<Instruction>(I.getOperand(1))];
 | |
|   if (Op0 != Op1)
 | |
|     return;
 | |
|   int Mask;
 | |
|   switch (I.getOpcode()) {
 | |
|     case Instruction::And:
 | |
|       Mask = Mask0 & Mask1;
 | |
|       break;
 | |
|     case Instruction::Or:
 | |
|       Mask = Mask0 | Mask1;
 | |
|       break;
 | |
|     case Instruction::Xor:
 | |
|       Mask = Mask0 ^ Mask1;
 | |
|       break;
 | |
|     default:
 | |
|       llvm_unreachable("Unknown op in convertLogicOp");
 | |
|   }
 | |
|   converted(&I, Op0, Mask, true);
 | |
| }
 | |
| 
 | |
| bool SystemZTDCPass::runOnFunction(Function &F) {
 | |
|   ConvertedInsts.clear();
 | |
|   LogicOpsWorklist.clear();
 | |
|   PossibleJunk.clear();
 | |
| 
 | |
|   // Look for icmp+fcmp instructions.
 | |
|   for (auto &I : instructions(F)) {
 | |
|     if (I.getOpcode() == Instruction::FCmp)
 | |
|       convertFCmp(cast<CmpInst>(I));
 | |
|     else if (I.getOpcode() == Instruction::ICmp)
 | |
|       convertICmp(cast<CmpInst>(I));
 | |
|   }
 | |
| 
 | |
|   // If none found, bail already.
 | |
|   if (ConvertedInsts.empty())
 | |
|     return false;
 | |
| 
 | |
|   // Process the queue of logic instructions.
 | |
|   while (!LogicOpsWorklist.empty()) {
 | |
|     BinaryOperator *Op = LogicOpsWorklist.back();
 | |
|     LogicOpsWorklist.pop_back();
 | |
|     // If both operands mapped, and the instruction itself not yet mapped,
 | |
|     // convert it.
 | |
|     if (ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(0))) &&
 | |
|         ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(1))) &&
 | |
|         !ConvertedInsts.count(Op))
 | |
|       convertLogicOp(*Op);
 | |
|   }
 | |
| 
 | |
|   // Time to actually replace the instructions.  Do it in the reverse order
 | |
|   // of finding them, since there's a good chance the earlier ones will be
 | |
|   // unused (due to being folded into later ones).
 | |
|   Module &M = *F.getParent();
 | |
|   auto &Ctx = M.getContext();
 | |
|   Value *Zero32 = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
 | |
|   bool MadeChange = false;
 | |
|   for (auto &It : reverse(ConvertedInsts)) {
 | |
|     Instruction *I = It.first;
 | |
|     Value *V;
 | |
|     int Mask;
 | |
|     bool Worthy;
 | |
|     std::tie(V, Mask, Worthy) = It.second;
 | |
|     if (!I->user_empty()) {
 | |
|       // If used and unworthy of conversion, skip it.
 | |
|       if (!Worthy)
 | |
|         continue;
 | |
|       // Call the intrinsic, compare result with 0.
 | |
|       Value *TDCFunc = Intrinsic::getDeclaration(&M, Intrinsic::s390_tdc,
 | |
|                                                  V->getType());
 | |
|       IRBuilder<> IRB(I);
 | |
|       Value *MaskVal = ConstantInt::get(Type::getInt64Ty(Ctx), Mask);
 | |
|       Instruction *TDC = IRB.CreateCall(TDCFunc, {V, MaskVal});
 | |
|       Value *ICmp = IRB.CreateICmp(CmpInst::ICMP_NE, TDC, Zero32);
 | |
|       I->replaceAllUsesWith(ICmp);
 | |
|     }
 | |
|     // If unused, or used and converted, remove it.
 | |
|     I->eraseFromParent();
 | |
|     MadeChange = true;
 | |
|   }
 | |
| 
 | |
|   if (!MadeChange)
 | |
|     return false;
 | |
| 
 | |
|   // We've actually done something - now clear misc accumulated junk (fabs,
 | |
|   // bitcast).
 | |
|   for (auto *I : PossibleJunk)
 | |
|     if (I->user_empty())
 | |
|       I->eraseFromParent();
 | |
| 
 | |
|   return true;
 | |
| }
 |