forked from OSchip/llvm-project
				
			[Constant Hoisting][ARM64] Enable constant hoisting for ARM64.
This implements the target-hooks for ARM64 to enable constant hoisting. This fixes <rdar://problem/14774662> and <rdar://problem/16381500>. llvm-svn: 205791
This commit is contained in:
		
							parent
							
								
									7e9eff664c
								
							
						
					
					
						commit
						c11e8b67bb
					
				| 
						 | 
					@ -22,6 +22,7 @@
 | 
				
			||||||
#include "llvm/Support/Debug.h"
 | 
					#include "llvm/Support/Debug.h"
 | 
				
			||||||
#include "llvm/Target/CostTable.h"
 | 
					#include "llvm/Target/CostTable.h"
 | 
				
			||||||
#include "llvm/Target/TargetLowering.h"
 | 
					#include "llvm/Target/TargetLowering.h"
 | 
				
			||||||
 | 
					#include <algorithm>
 | 
				
			||||||
using namespace llvm;
 | 
					using namespace llvm;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Declare the pass initialization routine locally as target-specific passes
 | 
					// Declare the pass initialization routine locally as target-specific passes
 | 
				
			||||||
| 
						 | 
					@ -71,8 +72,12 @@ public:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  /// \name Scalar TTI Implementations
 | 
					  /// \name Scalar TTI Implementations
 | 
				
			||||||
  /// @{
 | 
					  /// @{
 | 
				
			||||||
 | 
					  unsigned getIntImmCost(int64_t Val) const;
 | 
				
			||||||
  unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override;
 | 
					  unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override;
 | 
				
			||||||
 | 
					  unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
 | 
				
			||||||
 | 
					                         Type *Ty) const override;
 | 
				
			||||||
 | 
					  unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
 | 
				
			||||||
 | 
					                         Type *Ty) const override;
 | 
				
			||||||
  PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override;
 | 
					  PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  /// @}
 | 
					  /// @}
 | 
				
			||||||
| 
						 | 
					@ -128,26 +133,139 @@ llvm::createARM64TargetTransformInfoPass(const ARM64TargetMachine *TM) {
 | 
				
			||||||
  return new ARM64TTI(TM);
 | 
					  return new ARM64TTI(TM);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/// \brief Calculate the cost of materializing a 64-bit value. This helper
 | 
				
			||||||
 | 
					/// method might only calculate a fraction of a larger immediate. Therefore it
 | 
				
			||||||
 | 
					/// is valid to return a cost of ZERO.
 | 
				
			||||||
 | 
					unsigned ARM64TTI::getIntImmCost(int64_t Val) const {
 | 
				
			||||||
 | 
					  // Check if the immediate can be encoded within an instruction.
 | 
				
			||||||
 | 
					  if (Val == 0 || ARM64_AM::isLogicalImmediate(Val, 64))
 | 
				
			||||||
 | 
					    return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (Val < 0)
 | 
				
			||||||
 | 
					    Val = ~Val;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Calculate how many moves we will need to materialize this constant.
 | 
				
			||||||
 | 
					  unsigned LZ = countLeadingZeros((uint64_t)Val);
 | 
				
			||||||
 | 
					  return (64 - LZ + 15) / 16;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/// \brief Calculate the cost of materializing the given constant.
 | 
				
			||||||
unsigned ARM64TTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
 | 
					unsigned ARM64TTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
 | 
				
			||||||
  assert(Ty->isIntegerTy());
 | 
					  assert(Ty->isIntegerTy());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  unsigned BitSize = Ty->getPrimitiveSizeInBits();
 | 
					  unsigned BitSize = Ty->getPrimitiveSizeInBits();
 | 
				
			||||||
  if (BitSize == 0)
 | 
					  if (BitSize == 0 || BitSize > 128)
 | 
				
			||||||
    return ~0U;
 | 
					    return ~0U;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  int64_t Val = Imm.getSExtValue();
 | 
					  // Sign-extend all constants to a multiple of 64-bit.
 | 
				
			||||||
  if (Val == 0 || ARM64_AM::isLogicalImmediate(Val, BitSize))
 | 
					  APInt ImmVal = Imm;
 | 
				
			||||||
    return 1;
 | 
					  if (BitSize & 0x3f)
 | 
				
			||||||
 | 
					    ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  if ((int64_t)Val < 0)
 | 
					  // Split the constant into 64-bit chunks and calculate the cost for each
 | 
				
			||||||
    Val = ~Val;
 | 
					  // chunk.
 | 
				
			||||||
  if (BitSize == 32)
 | 
					  unsigned Cost = 0;
 | 
				
			||||||
    Val &= (1LL << 32) - 1;
 | 
					  for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
 | 
				
			||||||
 | 
					    APInt Tmp = ImmVal.ashr(ShiftVal).getLoBits(64);
 | 
				
			||||||
 | 
					    int64_t Val = Tmp.getSExtValue();
 | 
				
			||||||
 | 
					    Cost += getIntImmCost(Val);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  // We need at least one instruction to materialze the constant.
 | 
				
			||||||
 | 
					  return std::max(1U, Cost);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  unsigned LZ = countLeadingZeros((uint64_t)Val);
 | 
					unsigned ARM64TTI::getIntImmCost(unsigned Opcode, unsigned Idx,
 | 
				
			||||||
  unsigned Shift = (63 - LZ) / 16;
 | 
					                                 const APInt &Imm, Type *Ty) const {
 | 
				
			||||||
  // MOVZ is free so return true for one or fewer MOVK.
 | 
					  assert(Ty->isIntegerTy());
 | 
				
			||||||
  return (Shift == 0) ? 1 : Shift;
 | 
					
 | 
				
			||||||
 | 
					  unsigned BitSize = Ty->getPrimitiveSizeInBits();
 | 
				
			||||||
 | 
					  if (BitSize == 0 || BitSize > 128)
 | 
				
			||||||
 | 
					    return ~0U;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  unsigned ImmIdx = ~0U;
 | 
				
			||||||
 | 
					  switch (Opcode) {
 | 
				
			||||||
 | 
					  default:
 | 
				
			||||||
 | 
					    return TCC_Free;
 | 
				
			||||||
 | 
					  case Instruction::GetElementPtr:
 | 
				
			||||||
 | 
					    // Always hoist the base address of a GetElementPtr.
 | 
				
			||||||
 | 
					    if (Idx == 0)
 | 
				
			||||||
 | 
					      return 2 * TCC_Basic;
 | 
				
			||||||
 | 
					    return TCC_Free;
 | 
				
			||||||
 | 
					  case Instruction::Store:
 | 
				
			||||||
 | 
					    ImmIdx = 0;
 | 
				
			||||||
 | 
					    break;
 | 
				
			||||||
 | 
					  case Instruction::Add:
 | 
				
			||||||
 | 
					  case Instruction::Sub:
 | 
				
			||||||
 | 
					  case Instruction::Mul:
 | 
				
			||||||
 | 
					  case Instruction::UDiv:
 | 
				
			||||||
 | 
					  case Instruction::SDiv:
 | 
				
			||||||
 | 
					  case Instruction::URem:
 | 
				
			||||||
 | 
					  case Instruction::SRem:
 | 
				
			||||||
 | 
					  case Instruction::Shl:
 | 
				
			||||||
 | 
					  case Instruction::LShr:
 | 
				
			||||||
 | 
					  case Instruction::AShr:
 | 
				
			||||||
 | 
					  case Instruction::And:
 | 
				
			||||||
 | 
					  case Instruction::Or:
 | 
				
			||||||
 | 
					  case Instruction::Xor:
 | 
				
			||||||
 | 
					  case Instruction::ICmp:
 | 
				
			||||||
 | 
					    ImmIdx = 1;
 | 
				
			||||||
 | 
					    break;
 | 
				
			||||||
 | 
					  case Instruction::Trunc:
 | 
				
			||||||
 | 
					  case Instruction::ZExt:
 | 
				
			||||||
 | 
					  case Instruction::SExt:
 | 
				
			||||||
 | 
					  case Instruction::IntToPtr:
 | 
				
			||||||
 | 
					  case Instruction::PtrToInt:
 | 
				
			||||||
 | 
					  case Instruction::BitCast:
 | 
				
			||||||
 | 
					  case Instruction::PHI:
 | 
				
			||||||
 | 
					  case Instruction::Call:
 | 
				
			||||||
 | 
					  case Instruction::Select:
 | 
				
			||||||
 | 
					  case Instruction::Ret:
 | 
				
			||||||
 | 
					  case Instruction::Load:
 | 
				
			||||||
 | 
					    break;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (Idx == ImmIdx) {
 | 
				
			||||||
 | 
					    unsigned NumConstants = (BitSize + 63) / 64;
 | 
				
			||||||
 | 
					    unsigned Cost = ARM64TTI::getIntImmCost(Imm, Ty);
 | 
				
			||||||
 | 
					    return (Cost <= NumConstants * TCC_Basic) ? TCC_Free : Cost;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  return ARM64TTI::getIntImmCost(Imm, Ty);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					unsigned ARM64TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
 | 
				
			||||||
 | 
					                                 const APInt &Imm, Type *Ty) const {
 | 
				
			||||||
 | 
					  assert(Ty->isIntegerTy());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  unsigned BitSize = Ty->getPrimitiveSizeInBits();
 | 
				
			||||||
 | 
					  if (BitSize == 0 || BitSize > 128)
 | 
				
			||||||
 | 
					    return ~0U;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  switch (IID) {
 | 
				
			||||||
 | 
					  default:
 | 
				
			||||||
 | 
					    return TCC_Free;
 | 
				
			||||||
 | 
					  case Intrinsic::sadd_with_overflow:
 | 
				
			||||||
 | 
					  case Intrinsic::uadd_with_overflow:
 | 
				
			||||||
 | 
					  case Intrinsic::ssub_with_overflow:
 | 
				
			||||||
 | 
					  case Intrinsic::usub_with_overflow:
 | 
				
			||||||
 | 
					  case Intrinsic::smul_with_overflow:
 | 
				
			||||||
 | 
					  case Intrinsic::umul_with_overflow:
 | 
				
			||||||
 | 
					    if (Idx == 1) {
 | 
				
			||||||
 | 
					      unsigned NumConstants = (BitSize + 63) / 64;
 | 
				
			||||||
 | 
					      unsigned Cost = ARM64TTI::getIntImmCost(Imm, Ty);
 | 
				
			||||||
 | 
					      return (Cost <= NumConstants * TCC_Basic) ? TCC_Free : Cost;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    break;
 | 
				
			||||||
 | 
					  case Intrinsic::experimental_stackmap:
 | 
				
			||||||
 | 
					    if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
 | 
				
			||||||
 | 
					      return TCC_Free;
 | 
				
			||||||
 | 
					    break;
 | 
				
			||||||
 | 
					  case Intrinsic::experimental_patchpoint_void:
 | 
				
			||||||
 | 
					  case Intrinsic::experimental_patchpoint_i64:
 | 
				
			||||||
 | 
					    if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
 | 
				
			||||||
 | 
					      return TCC_Free;
 | 
				
			||||||
 | 
					    break;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  return ARM64TTI::getIntImmCost(Imm, Ty);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ARM64TTI::PopcntSupportKind ARM64TTI::getPopcntSupport(unsigned TyWidth) const {
 | 
					ARM64TTI::PopcntSupportKind ARM64TTI::getPopcntSupport(unsigned TyWidth) const {
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,23 @@
 | 
				
			||||||
 | 
					; RUN: llc -mtriple=arm64-darwin-unknown < %s | FileCheck %s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					%T = type { i32, i32, i32, i32 }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; Test if the constant base address gets only materialized once.
 | 
				
			||||||
 | 
					define i32 @test1() nounwind {
 | 
				
			||||||
 | 
					; CHECK-LABEL:  test1
 | 
				
			||||||
 | 
					; CHECK:        movz  x8, #1039, lsl #16
 | 
				
			||||||
 | 
					; CHECK-NEXT:   movk  x8, #49152
 | 
				
			||||||
 | 
					; CHECK-NEXT:   ldp w9, w10, [x8, #4]
 | 
				
			||||||
 | 
					; CHECK:        ldr w8, [x8, #12]
 | 
				
			||||||
 | 
					  %at = inttoptr i64 68141056 to %T*
 | 
				
			||||||
 | 
					  %o1 = getelementptr %T* %at, i32 0, i32 1
 | 
				
			||||||
 | 
					  %t1 = load i32* %o1
 | 
				
			||||||
 | 
					  %o2 = getelementptr %T* %at, i32 0, i32 2
 | 
				
			||||||
 | 
					  %t2 = load i32* %o2
 | 
				
			||||||
 | 
					  %a1 = add i32 %t1, %t2
 | 
				
			||||||
 | 
					  %o3 = getelementptr %T* %at, i32 0, i32 3
 | 
				
			||||||
 | 
					  %t3 = load i32* %o3
 | 
				
			||||||
 | 
					  %a2 = add i32 %a1, %t3
 | 
				
			||||||
 | 
					  ret i32 %a2
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,23 @@
 | 
				
			||||||
 | 
					; RUN: opt -mtriple=arm64-darwin-unknown -S -consthoist < %s | FileCheck %s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					%T = type { i32, i32, i32, i32 }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					define i32 @test1() nounwind {
 | 
				
			||||||
 | 
					; CHECK-LABEL: test1
 | 
				
			||||||
 | 
					; CHECK: %const = bitcast i64 68141056 to i64
 | 
				
			||||||
 | 
					; CHECK: %1 = inttoptr i64 %const to %T*
 | 
				
			||||||
 | 
					; CHECK: %o1 = getelementptr %T* %1, i32 0, i32 1
 | 
				
			||||||
 | 
					; CHECK: %o2 = getelementptr %T* %1, i32 0, i32 2
 | 
				
			||||||
 | 
					; CHECK: %o3 = getelementptr %T* %1, i32 0, i32 3
 | 
				
			||||||
 | 
					  %at = inttoptr i64 68141056 to %T*
 | 
				
			||||||
 | 
					  %o1 = getelementptr %T* %at, i32 0, i32 1
 | 
				
			||||||
 | 
					  %t1 = load i32* %o1
 | 
				
			||||||
 | 
					  %o2 = getelementptr %T* %at, i32 0, i32 2
 | 
				
			||||||
 | 
					  %t2 = load i32* %o2
 | 
				
			||||||
 | 
					  %a1 = add i32 %t1, %t2
 | 
				
			||||||
 | 
					  %o3 = getelementptr %T* %at, i32 0, i32 3
 | 
				
			||||||
 | 
					  %t3 = load i32* %o3
 | 
				
			||||||
 | 
					  %a2 = add i32 %a1, %t3
 | 
				
			||||||
 | 
					  ret i32 %a2
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,3 @@
 | 
				
			||||||
 | 
					targets = set(config.root.targets_to_build.split())
 | 
				
			||||||
 | 
					if not 'ARM64' in targets:
 | 
				
			||||||
 | 
					    config.unsupported = True
 | 
				
			||||||
		Loading…
	
		Reference in New Issue