149 lines
		
	
	
		
			4.6 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			149 lines
		
	
	
		
			4.6 KiB
		
	
	
	
		
			C++
		
	
	
	
| //===- NVPTXLowerAggrCopies.cpp - ------------------------------*- C++ -*--===//
 | |
| //
 | |
| //                     The LLVM Compiler Infrastructure
 | |
| //
 | |
| // This file is distributed under the University of Illinois Open Source
 | |
| // License. See LICENSE.TXT for details.
 | |
| //
 | |
| //===----------------------------------------------------------------------===//
 | |
| //
 | |
| // \file
 | |
| // Lower aggregate copies, memset, memcpy, memmov intrinsics into loops when
 | |
| // the size is large or is not a compile-time constant.
 | |
| //
 | |
| //===----------------------------------------------------------------------===//
 | |
| 
 | |
| #include "NVPTXLowerAggrCopies.h"
 | |
| #include "llvm/CodeGen/StackProtector.h"
 | |
| #include "llvm/IR/Constants.h"
 | |
| #include "llvm/IR/DataLayout.h"
 | |
| #include "llvm/IR/Function.h"
 | |
| #include "llvm/IR/IRBuilder.h"
 | |
| #include "llvm/IR/Instructions.h"
 | |
| #include "llvm/IR/IntrinsicInst.h"
 | |
| #include "llvm/IR/Intrinsics.h"
 | |
| #include "llvm/IR/LLVMContext.h"
 | |
| #include "llvm/IR/Module.h"
 | |
| #include "llvm/Support/Debug.h"
 | |
| #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 | |
| #include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
 | |
| 
 | |
| #define DEBUG_TYPE "nvptx"
 | |
| 
 | |
| using namespace llvm;
 | |
| 
 | |
| namespace {
 | |
| 
 | |
| // actual analysis class, which is a functionpass
 | |
| struct NVPTXLowerAggrCopies : public FunctionPass {
 | |
|   static char ID;
 | |
| 
 | |
|   NVPTXLowerAggrCopies() : FunctionPass(ID) {}
 | |
| 
 | |
|   void getAnalysisUsage(AnalysisUsage &AU) const override {
 | |
|     AU.addPreserved<StackProtector>();
 | |
|   }
 | |
| 
 | |
|   bool runOnFunction(Function &F) override;
 | |
| 
 | |
|   static const unsigned MaxAggrCopySize = 128;
 | |
| 
 | |
|   StringRef getPassName() const override {
 | |
|     return "Lower aggregate copies/intrinsics into loops";
 | |
|   }
 | |
| };
 | |
| 
 | |
| char NVPTXLowerAggrCopies::ID = 0;
 | |
| 
 | |
| bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
 | |
|   SmallVector<LoadInst *, 4> AggrLoads;
 | |
|   SmallVector<MemIntrinsic *, 4> MemCalls;
 | |
| 
 | |
|   const DataLayout &DL = F.getParent()->getDataLayout();
 | |
|   LLVMContext &Context = F.getParent()->getContext();
 | |
| 
 | |
|   // Collect all aggregate loads and mem* calls.
 | |
|   for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
 | |
|     for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
 | |
|          ++II) {
 | |
|       if (LoadInst *LI = dyn_cast<LoadInst>(II)) {
 | |
|         if (!LI->hasOneUse())
 | |
|           continue;
 | |
| 
 | |
|         if (DL.getTypeStoreSize(LI->getType()) < MaxAggrCopySize)
 | |
|           continue;
 | |
| 
 | |
|         if (StoreInst *SI = dyn_cast<StoreInst>(LI->user_back())) {
 | |
|           if (SI->getOperand(0) != LI)
 | |
|             continue;
 | |
|           AggrLoads.push_back(LI);
 | |
|         }
 | |
|       } else if (MemIntrinsic *IntrCall = dyn_cast<MemIntrinsic>(II)) {
 | |
|         // Convert intrinsic calls with variable size or with constant size
 | |
|         // larger than the MaxAggrCopySize threshold.
 | |
|         if (ConstantInt *LenCI = dyn_cast<ConstantInt>(IntrCall->getLength())) {
 | |
|           if (LenCI->getZExtValue() >= MaxAggrCopySize) {
 | |
|             MemCalls.push_back(IntrCall);
 | |
|           }
 | |
|         } else {
 | |
|           MemCalls.push_back(IntrCall);
 | |
|         }
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   if (AggrLoads.size() == 0 && MemCalls.size() == 0) {
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   //
 | |
|   // Do the transformation of an aggr load/copy/set to a loop
 | |
|   //
 | |
|   for (LoadInst *LI : AggrLoads) {
 | |
|     StoreInst *SI = dyn_cast<StoreInst>(*LI->user_begin());
 | |
|     Value *SrcAddr = LI->getOperand(0);
 | |
|     Value *DstAddr = SI->getOperand(1);
 | |
|     unsigned NumLoads = DL.getTypeStoreSize(LI->getType());
 | |
|     Value *CopyLen = ConstantInt::get(Type::getInt32Ty(Context), NumLoads);
 | |
| 
 | |
|     createMemCpyLoop(/* ConvertedInst */ SI,
 | |
|                      /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr,
 | |
|                      /* CopyLen */ CopyLen,
 | |
|                      /* SrcAlign */ LI->getAlignment(),
 | |
|                      /* DestAlign */ SI->getAlignment(),
 | |
|                      /* SrcIsVolatile */ LI->isVolatile(),
 | |
|                      /* DstIsVolatile */ SI->isVolatile());
 | |
| 
 | |
|     SI->eraseFromParent();
 | |
|     LI->eraseFromParent();
 | |
|   }
 | |
| 
 | |
|   // Transform mem* intrinsic calls.
 | |
|   for (MemIntrinsic *MemCall : MemCalls) {
 | |
|     if (MemCpyInst *Memcpy = dyn_cast<MemCpyInst>(MemCall)) {
 | |
|       expandMemCpyAsLoop(Memcpy);
 | |
|     } else if (MemMoveInst *Memmove = dyn_cast<MemMoveInst>(MemCall)) {
 | |
|       expandMemMoveAsLoop(Memmove);
 | |
|     } else if (MemSetInst *Memset = dyn_cast<MemSetInst>(MemCall)) {
 | |
|       expandMemSetAsLoop(Memset);
 | |
|     }
 | |
|     MemCall->eraseFromParent();
 | |
|   }
 | |
| 
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| } // namespace
 | |
| 
 | |
| namespace llvm {
 | |
| void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
 | |
| }
 | |
| 
 | |
| INITIALIZE_PASS(NVPTXLowerAggrCopies, "nvptx-lower-aggr-copies",
 | |
|                 "Lower aggregate copies, and llvm.mem* intrinsics into loops",
 | |
|                 false, false)
 | |
| 
 | |
| FunctionPass *llvm::createLowerAggrCopies() {
 | |
|   return new NVPTXLowerAggrCopies();
 | |
| }
 |