142 lines
		
	
	
		
			5.2 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			142 lines
		
	
	
		
			5.2 KiB
		
	
	
	
		
			C++
		
	
	
	
//===-- WebAssemblyTargetTransformInfo.cpp - WebAssembly-specific TTI -----===//
 | 
						|
//
 | 
						|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 | 
						|
// See https://llvm.org/LICENSE.txt for license information.
 | 
						|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 | 
						|
//
 | 
						|
//===----------------------------------------------------------------------===//
 | 
						|
///
 | 
						|
/// \file
 | 
						|
/// This file defines the WebAssembly-specific TargetTransformInfo
 | 
						|
/// implementation.
 | 
						|
///
 | 
						|
//===----------------------------------------------------------------------===//
 | 
						|
 | 
						|
#include "WebAssemblyTargetTransformInfo.h"
 | 
						|
#include "llvm/CodeGen/CostTable.h"
 | 
						|
#include "llvm/Support/Debug.h"
 | 
						|
using namespace llvm;
 | 
						|
 | 
						|
#define DEBUG_TYPE "wasmtti"
 | 
						|
 | 
						|
TargetTransformInfo::PopcntSupportKind
 | 
						|
WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const {
 | 
						|
  assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
 | 
						|
  return TargetTransformInfo::PSK_FastHardware;
 | 
						|
}
 | 
						|
 | 
						|
unsigned WebAssemblyTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
 | 
						|
  unsigned Result = BaseT::getNumberOfRegisters(ClassID);
 | 
						|
 | 
						|
  // For SIMD, use at least 16 registers, as a rough guess.
 | 
						|
  bool Vector = (ClassID == 1);
 | 
						|
  if (Vector)
 | 
						|
    Result = std::max(Result, 16u);
 | 
						|
 | 
						|
  return Result;
 | 
						|
}
 | 
						|
 | 
						|
TypeSize WebAssemblyTTIImpl::getRegisterBitWidth(
 | 
						|
    TargetTransformInfo::RegisterKind K) const {
 | 
						|
  switch (K) {
 | 
						|
  case TargetTransformInfo::RGK_Scalar:
 | 
						|
    return TypeSize::getFixed(64);
 | 
						|
  case TargetTransformInfo::RGK_FixedWidthVector:
 | 
						|
    return TypeSize::getFixed(getST()->hasSIMD128() ? 128 : 64);
 | 
						|
  case TargetTransformInfo::RGK_ScalableVector:
 | 
						|
    return TypeSize::getScalable(0);
 | 
						|
  }
 | 
						|
 | 
						|
  llvm_unreachable("Unsupported register kind");
 | 
						|
}
 | 
						|
 | 
						|
InstructionCost WebAssemblyTTIImpl::getArithmeticInstrCost(
 | 
						|
    unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
 | 
						|
    TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info,
 | 
						|
    TTI::OperandValueProperties Opd1PropInfo,
 | 
						|
    TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
 | 
						|
    const Instruction *CxtI) {
 | 
						|
 | 
						|
  InstructionCost Cost =
 | 
						|
      BasicTTIImplBase<WebAssemblyTTIImpl>::getArithmeticInstrCost(
 | 
						|
          Opcode, Ty, CostKind, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
 | 
						|
 | 
						|
  if (auto *VTy = dyn_cast<VectorType>(Ty)) {
 | 
						|
    switch (Opcode) {
 | 
						|
    case Instruction::LShr:
 | 
						|
    case Instruction::AShr:
 | 
						|
    case Instruction::Shl:
 | 
						|
      // SIMD128's shifts currently only accept a scalar shift count. For each
 | 
						|
      // element, we'll need to extract, op, insert. The following is a rough
 | 
						|
      // approxmation.
 | 
						|
      if (Opd2Info != TTI::OK_UniformValue &&
 | 
						|
          Opd2Info != TTI::OK_UniformConstantValue)
 | 
						|
        Cost =
 | 
						|
            cast<FixedVectorType>(VTy)->getNumElements() *
 | 
						|
            (TargetTransformInfo::TCC_Basic +
 | 
						|
             getArithmeticInstrCost(Opcode, VTy->getElementType(), CostKind) +
 | 
						|
             TargetTransformInfo::TCC_Basic);
 | 
						|
      break;
 | 
						|
    }
 | 
						|
  }
 | 
						|
  return Cost;
 | 
						|
}
 | 
						|
 | 
						|
InstructionCost WebAssemblyTTIImpl::getVectorInstrCost(unsigned Opcode,
 | 
						|
                                                       Type *Val,
 | 
						|
                                                       unsigned Index) {
 | 
						|
  InstructionCost Cost =
 | 
						|
      BasicTTIImplBase::getVectorInstrCost(Opcode, Val, Index);
 | 
						|
 | 
						|
  // SIMD128's insert/extract currently only take constant indices.
 | 
						|
  if (Index == -1u)
 | 
						|
    return Cost + 25 * TargetTransformInfo::TCC_Expensive;
 | 
						|
 | 
						|
  return Cost;
 | 
						|
}
 | 
						|
 | 
						|
bool WebAssemblyTTIImpl::areInlineCompatible(const Function *Caller,
 | 
						|
                                             const Function *Callee) const {
 | 
						|
  // Allow inlining only when the Callee has a subset of the Caller's
 | 
						|
  // features. In principle, we should be able to inline regardless of any
 | 
						|
  // features because WebAssembly supports features at module granularity, not
 | 
						|
  // function granularity, but without this restriction it would be possible for
 | 
						|
  // a module to "forget" about features if all the functions that used them
 | 
						|
  // were inlined.
 | 
						|
  const TargetMachine &TM = getTLI()->getTargetMachine();
 | 
						|
 | 
						|
  const FeatureBitset &CallerBits =
 | 
						|
      TM.getSubtargetImpl(*Caller)->getFeatureBits();
 | 
						|
  const FeatureBitset &CalleeBits =
 | 
						|
      TM.getSubtargetImpl(*Callee)->getFeatureBits();
 | 
						|
 | 
						|
  return (CallerBits & CalleeBits) == CalleeBits;
 | 
						|
}
 | 
						|
 | 
						|
void WebAssemblyTTIImpl::getUnrollingPreferences(
 | 
						|
    Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP,
 | 
						|
    OptimizationRemarkEmitter *ORE) const {
 | 
						|
  // Scan the loop: don't unroll loops with calls. This is a standard approach
 | 
						|
  // for most (all?) targets.
 | 
						|
  for (BasicBlock *BB : L->blocks())
 | 
						|
    for (Instruction &I : *BB)
 | 
						|
      if (isa<CallInst>(I) || isa<InvokeInst>(I))
 | 
						|
        if (const Function *F = cast<CallBase>(I).getCalledFunction())
 | 
						|
          if (isLoweredToCall(F))
 | 
						|
            return;
 | 
						|
 | 
						|
  // The chosen threshold is within the range of 'LoopMicroOpBufferSize' of
 | 
						|
  // the various microarchitectures that use the BasicTTI implementation and
 | 
						|
  // has been selected through heuristics across multiple cores and runtimes.
 | 
						|
  UP.Partial = UP.Runtime = UP.UpperBound = true;
 | 
						|
  UP.PartialThreshold = 30;
 | 
						|
 | 
						|
  // Avoid unrolling when optimizing for size.
 | 
						|
  UP.OptSizeThreshold = 0;
 | 
						|
  UP.PartialOptSizeThreshold = 0;
 | 
						|
 | 
						|
  // Set number of instructions optimized when "back edge"
 | 
						|
  // becomes "fall through" to default value of 2.
 | 
						|
  UP.BEInsns = 2;
 | 
						|
}
 |