208 lines
		
	
	
		
			8.4 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			208 lines
		
	
	
		
			8.4 KiB
		
	
	
	
		
			C++
		
	
	
	
//=== AArch64CallingConvention.cpp - AArch64 CC impl ------------*- C++ -*-===//
 | 
						|
//
 | 
						|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 | 
						|
// See https://llvm.org/LICENSE.txt for license information.
 | 
						|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 | 
						|
//
 | 
						|
//===----------------------------------------------------------------------===//
 | 
						|
//
 | 
						|
// This file contains the table-generated and custom routines for the AArch64
 | 
						|
// Calling Convention.
 | 
						|
//
 | 
						|
//===----------------------------------------------------------------------===//
 | 
						|
 | 
						|
#include "AArch64CallingConvention.h"
 | 
						|
#include "AArch64.h"
 | 
						|
#include "AArch64InstrInfo.h"
 | 
						|
#include "AArch64Subtarget.h"
 | 
						|
#include "llvm/CodeGen/CallingConvLower.h"
 | 
						|
#include "llvm/CodeGen/TargetInstrInfo.h"
 | 
						|
#include "llvm/IR/CallingConv.h"
 | 
						|
using namespace llvm;
 | 
						|
 | 
						|
static const MCPhysReg XRegList[] = {AArch64::X0, AArch64::X1, AArch64::X2,
 | 
						|
                                     AArch64::X3, AArch64::X4, AArch64::X5,
 | 
						|
                                     AArch64::X6, AArch64::X7};
 | 
						|
static const MCPhysReg HRegList[] = {AArch64::H0, AArch64::H1, AArch64::H2,
 | 
						|
                                     AArch64::H3, AArch64::H4, AArch64::H5,
 | 
						|
                                     AArch64::H6, AArch64::H7};
 | 
						|
static const MCPhysReg SRegList[] = {AArch64::S0, AArch64::S1, AArch64::S2,
 | 
						|
                                     AArch64::S3, AArch64::S4, AArch64::S5,
 | 
						|
                                     AArch64::S6, AArch64::S7};
 | 
						|
static const MCPhysReg DRegList[] = {AArch64::D0, AArch64::D1, AArch64::D2,
 | 
						|
                                     AArch64::D3, AArch64::D4, AArch64::D5,
 | 
						|
                                     AArch64::D6, AArch64::D7};
 | 
						|
static const MCPhysReg QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2,
 | 
						|
                                     AArch64::Q3, AArch64::Q4, AArch64::Q5,
 | 
						|
                                     AArch64::Q6, AArch64::Q7};
 | 
						|
static const MCPhysReg ZRegList[] = {AArch64::Z0, AArch64::Z1, AArch64::Z2,
 | 
						|
                                     AArch64::Z3, AArch64::Z4, AArch64::Z5,
 | 
						|
                                     AArch64::Z6, AArch64::Z7};
 | 
						|
 | 
						|
static bool finishStackBlock(SmallVectorImpl<CCValAssign> &PendingMembers,
 | 
						|
                             MVT LocVT, ISD::ArgFlagsTy &ArgFlags,
 | 
						|
                             CCState &State, Align SlotAlign) {
 | 
						|
  if (LocVT.isScalableVector()) {
 | 
						|
    const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>(
 | 
						|
        State.getMachineFunction().getSubtarget());
 | 
						|
    const AArch64TargetLowering *TLI = Subtarget.getTargetLowering();
 | 
						|
 | 
						|
    // We are about to reinvoke the CCAssignFn auto-generated handler. If we
 | 
						|
    // don't unset these flags we will get stuck in an infinite loop forever
 | 
						|
    // invoking the custom handler.
 | 
						|
    ArgFlags.setInConsecutiveRegs(false);
 | 
						|
    ArgFlags.setInConsecutiveRegsLast(false);
 | 
						|
 | 
						|
    // The calling convention for passing SVE tuples states that in the event
 | 
						|
    // we cannot allocate enough registers for the tuple we should still leave
 | 
						|
    // any remaining registers unallocated. However, when we call the
 | 
						|
    // CCAssignFn again we want it to behave as if all remaining registers are
 | 
						|
    // allocated. This will force the code to pass the tuple indirectly in
 | 
						|
    // accordance with the PCS.
 | 
						|
    bool RegsAllocated[8];
 | 
						|
    for (int I = 0; I < 8; I++) {
 | 
						|
      RegsAllocated[I] = State.isAllocated(ZRegList[I]);
 | 
						|
      State.AllocateReg(ZRegList[I]);
 | 
						|
    }
 | 
						|
 | 
						|
    auto &It = PendingMembers[0];
 | 
						|
    CCAssignFn *AssignFn =
 | 
						|
        TLI->CCAssignFnForCall(State.getCallingConv(), /*IsVarArg=*/false);
 | 
						|
    if (AssignFn(It.getValNo(), It.getValVT(), It.getValVT(), CCValAssign::Full,
 | 
						|
                 ArgFlags, State))
 | 
						|
      llvm_unreachable("Call operand has unhandled type");
 | 
						|
 | 
						|
    // Return the flags to how they were before.
 | 
						|
    ArgFlags.setInConsecutiveRegs(true);
 | 
						|
    ArgFlags.setInConsecutiveRegsLast(true);
 | 
						|
 | 
						|
    // Return the register state back to how it was before, leaving any
 | 
						|
    // unallocated registers available for other smaller types.
 | 
						|
    for (int I = 0; I < 8; I++)
 | 
						|
      if (!RegsAllocated[I])
 | 
						|
        State.DeallocateReg(ZRegList[I]);
 | 
						|
 | 
						|
    // All pending members have now been allocated
 | 
						|
    PendingMembers.clear();
 | 
						|
    return true;
 | 
						|
  }
 | 
						|
 | 
						|
  unsigned Size = LocVT.getSizeInBits() / 8;
 | 
						|
  const Align StackAlign =
 | 
						|
      State.getMachineFunction().getDataLayout().getStackAlignment();
 | 
						|
  const Align OrigAlign = ArgFlags.getNonZeroOrigAlign();
 | 
						|
  const Align Alignment = std::min(OrigAlign, StackAlign);
 | 
						|
 | 
						|
  for (auto &It : PendingMembers) {
 | 
						|
    It.convertToMem(State.AllocateStack(Size, std::max(Alignment, SlotAlign)));
 | 
						|
    State.addLoc(It);
 | 
						|
    SlotAlign = Align(1);
 | 
						|
  }
 | 
						|
 | 
						|
  // All pending members have now been allocated
 | 
						|
  PendingMembers.clear();
 | 
						|
  return true;
 | 
						|
}
 | 
						|
 | 
						|
/// The Darwin variadic PCS places anonymous arguments in 8-byte stack slots. An
 | 
						|
/// [N x Ty] type must still be contiguous in memory though.
 | 
						|
static bool CC_AArch64_Custom_Stack_Block(
 | 
						|
      unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo,
 | 
						|
      ISD::ArgFlagsTy &ArgFlags, CCState &State) {
 | 
						|
  SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
 | 
						|
 | 
						|
  // Add the argument to the list to be allocated once we know the size of the
 | 
						|
  // block.
 | 
						|
  PendingMembers.push_back(
 | 
						|
      CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
 | 
						|
 | 
						|
  if (!ArgFlags.isInConsecutiveRegsLast())
 | 
						|
    return true;
 | 
						|
 | 
						|
  return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, Align(8));
 | 
						|
}
 | 
						|
 | 
						|
/// Given an [N x Ty] block, it should be passed in a consecutive sequence of
 | 
						|
/// registers. If no such sequence is available, mark the rest of the registers
 | 
						|
/// of that type as used and place the argument on the stack.
 | 
						|
static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
 | 
						|
                                    CCValAssign::LocInfo &LocInfo,
 | 
						|
                                    ISD::ArgFlagsTy &ArgFlags, CCState &State) {
 | 
						|
  const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>(
 | 
						|
      State.getMachineFunction().getSubtarget());
 | 
						|
  bool IsDarwinILP32 = Subtarget.isTargetILP32() && Subtarget.isTargetMachO();
 | 
						|
 | 
						|
  // Try to allocate a contiguous block of registers, each of the correct
 | 
						|
  // size to hold one member.
 | 
						|
  ArrayRef<MCPhysReg> RegList;
 | 
						|
  if (LocVT.SimpleTy == MVT::i64 || (IsDarwinILP32 && LocVT.SimpleTy == MVT::i32))
 | 
						|
    RegList = XRegList;
 | 
						|
  else if (LocVT.SimpleTy == MVT::f16)
 | 
						|
    RegList = HRegList;
 | 
						|
  else if (LocVT.SimpleTy == MVT::f32 || LocVT.is32BitVector())
 | 
						|
    RegList = SRegList;
 | 
						|
  else if (LocVT.SimpleTy == MVT::f64 || LocVT.is64BitVector())
 | 
						|
    RegList = DRegList;
 | 
						|
  else if (LocVT.SimpleTy == MVT::f128 || LocVT.is128BitVector())
 | 
						|
    RegList = QRegList;
 | 
						|
  else if (LocVT.isScalableVector())
 | 
						|
    RegList = ZRegList;
 | 
						|
  else {
 | 
						|
    // Not an array we want to split up after all.
 | 
						|
    return false;
 | 
						|
  }
 | 
						|
 | 
						|
  SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
 | 
						|
 | 
						|
  // Add the argument to the list to be allocated once we know the size of the
 | 
						|
  // block.
 | 
						|
  PendingMembers.push_back(
 | 
						|
      CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
 | 
						|
 | 
						|
  if (!ArgFlags.isInConsecutiveRegsLast())
 | 
						|
    return true;
 | 
						|
 | 
						|
  // [N x i32] arguments get packed into x-registers on Darwin's arm64_32
 | 
						|
  // because that's how the armv7k Clang front-end emits small structs.
 | 
						|
  unsigned EltsPerReg = (IsDarwinILP32 && LocVT.SimpleTy == MVT::i32) ? 2 : 1;
 | 
						|
  unsigned RegResult = State.AllocateRegBlock(
 | 
						|
      RegList, alignTo(PendingMembers.size(), EltsPerReg) / EltsPerReg);
 | 
						|
  if (RegResult && EltsPerReg == 1) {
 | 
						|
    for (auto &It : PendingMembers) {
 | 
						|
      It.convertToReg(RegResult);
 | 
						|
      State.addLoc(It);
 | 
						|
      ++RegResult;
 | 
						|
    }
 | 
						|
    PendingMembers.clear();
 | 
						|
    return true;
 | 
						|
  } else if (RegResult) {
 | 
						|
    assert(EltsPerReg == 2 && "unexpected ABI");
 | 
						|
    bool UseHigh = false;
 | 
						|
    CCValAssign::LocInfo Info;
 | 
						|
    for (auto &It : PendingMembers) {
 | 
						|
      Info = UseHigh ? CCValAssign::AExtUpper : CCValAssign::ZExt;
 | 
						|
      State.addLoc(CCValAssign::getReg(It.getValNo(), MVT::i32, RegResult,
 | 
						|
                                       MVT::i64, Info));
 | 
						|
      UseHigh = !UseHigh;
 | 
						|
      if (!UseHigh)
 | 
						|
        ++RegResult;
 | 
						|
    }
 | 
						|
    PendingMembers.clear();
 | 
						|
    return true;
 | 
						|
  }
 | 
						|
 | 
						|
  if (!LocVT.isScalableVector()) {
 | 
						|
    // Mark all regs in the class as unavailable
 | 
						|
    for (auto Reg : RegList)
 | 
						|
      State.AllocateReg(Reg);
 | 
						|
  }
 | 
						|
 | 
						|
  const Align SlotAlign = Subtarget.isTargetDarwin() ? Align(1) : Align(8);
 | 
						|
 | 
						|
  return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign);
 | 
						|
}
 | 
						|
 | 
						|
// TableGen provides definitions of the calling convention analysis entry
 | 
						|
// points.
 | 
						|
#include "AArch64GenCallingConv.inc"
 |