1361 lines
46 KiB
C++
1361 lines
46 KiB
C++
//===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file implements a function pass that inserts VSETVLI instructions where
|
|
// needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL
|
|
// instructions.
|
|
//
|
|
// This pass consists of 3 phases:
|
|
//
|
|
// Phase 1 collects how each basic block affects VL/VTYPE.
|
|
//
|
|
// Phase 2 uses the information from phase 1 to do a data flow analysis to
|
|
// propagate the VL/VTYPE changes through the function. This gives us the
|
|
// VL/VTYPE at the start of each basic block.
|
|
//
|
|
// Phase 3 inserts VSETVLI instructions in each basic block. Information from
|
|
// phase 2 is used to prevent inserting a VSETVLI before the first vector
|
|
// instruction in the block if possible.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "RISCV.h"
|
|
#include "RISCVSubtarget.h"
|
|
#include "llvm/CodeGen/LiveIntervals.h"
|
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
|
#include <queue>
|
|
using namespace llvm;
|
|
|
|
#define DEBUG_TYPE "riscv-insert-vsetvli"
|
|
#define RISCV_INSERT_VSETVLI_NAME "RISCV Insert VSETVLI pass"
|
|
|
|
static cl::opt<bool> DisableInsertVSETVLPHIOpt(
|
|
"riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden,
|
|
cl::desc("Disable looking through phis when inserting vsetvlis."));
|
|
|
|
static cl::opt<bool> UseStrictAsserts(
|
|
"riscv-insert-vsetvl-strict-asserts", cl::init(true), cl::Hidden,
|
|
cl::desc("Enable strict assertion checking for the dataflow algorithm"));
|
|
|
|
namespace {
|
|
|
|
static unsigned getVLOpNum(const MachineInstr &MI) {
|
|
return RISCVII::getVLOpNum(MI.getDesc());
|
|
}
|
|
|
|
static unsigned getSEWOpNum(const MachineInstr &MI) {
|
|
return RISCVII::getSEWOpNum(MI.getDesc());
|
|
}
|
|
|
|
static bool isVectorConfigInstr(const MachineInstr &MI) {
|
|
return MI.getOpcode() == RISCV::PseudoVSETVLI ||
|
|
MI.getOpcode() == RISCV::PseudoVSETVLIX0 ||
|
|
MI.getOpcode() == RISCV::PseudoVSETIVLI;
|
|
}
|
|
|
|
/// Return true if this is 'vsetvli x0, x0, vtype' which preserves
|
|
/// VL and only sets VTYPE.
|
|
static bool isVLPreservingConfig(const MachineInstr &MI) {
|
|
if (MI.getOpcode() != RISCV::PseudoVSETVLIX0)
|
|
return false;
|
|
assert(RISCV::X0 == MI.getOperand(1).getReg());
|
|
return RISCV::X0 == MI.getOperand(0).getReg();
|
|
}
|
|
|
|
static uint16_t getRVVMCOpcode(uint16_t RVVPseudoOpcode) {
|
|
const RISCVVPseudosTable::PseudoInfo *RVV =
|
|
RISCVVPseudosTable::getPseudoInfo(RVVPseudoOpcode);
|
|
if (!RVV)
|
|
return 0;
|
|
return RVV->BaseInstr;
|
|
}
|
|
|
|
static bool isScalarMoveInstr(const MachineInstr &MI) {
|
|
switch (getRVVMCOpcode(MI.getOpcode())) {
|
|
default:
|
|
return false;
|
|
case RISCV::VMV_S_X:
|
|
case RISCV::VFMV_S_F:
|
|
return true;
|
|
}
|
|
}
|
|
|
|
/// Get the EEW for a load or store instruction. Return None if MI is not
|
|
/// a load or store which ignores SEW.
|
|
static Optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) {
|
|
switch (getRVVMCOpcode(MI.getOpcode())) {
|
|
default:
|
|
return std::nullopt;
|
|
case RISCV::VLE8_V:
|
|
case RISCV::VLSE8_V:
|
|
case RISCV::VSE8_V:
|
|
case RISCV::VSSE8_V:
|
|
return 8;
|
|
case RISCV::VLE16_V:
|
|
case RISCV::VLSE16_V:
|
|
case RISCV::VSE16_V:
|
|
case RISCV::VSSE16_V:
|
|
return 16;
|
|
case RISCV::VLE32_V:
|
|
case RISCV::VLSE32_V:
|
|
case RISCV::VSE32_V:
|
|
case RISCV::VSSE32_V:
|
|
return 32;
|
|
case RISCV::VLE64_V:
|
|
case RISCV::VLSE64_V:
|
|
case RISCV::VSE64_V:
|
|
case RISCV::VSSE64_V:
|
|
return 64;
|
|
}
|
|
}
|
|
|
|
/// Return true if this is an operation on mask registers. Note that
|
|
/// this includes both arithmetic/logical ops and load/store (vlm/vsm).
|
|
static bool isMaskRegOp(const MachineInstr &MI) {
|
|
if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags))
|
|
return false;
|
|
const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
|
|
// A Log2SEW of 0 is an operation on mask registers only.
|
|
return Log2SEW == 0;
|
|
}
|
|
|
|
/// Which subfields of VL or VTYPE have values we need to preserve?
|
|
struct DemandedFields {
|
|
bool VL = false;
|
|
bool SEW = false;
|
|
bool LMUL = false;
|
|
bool SEWLMULRatio = false;
|
|
bool TailPolicy = false;
|
|
bool MaskPolicy = false;
|
|
|
|
// Return true if any part of VTYPE was used
|
|
bool usedVTYPE() {
|
|
return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy;
|
|
}
|
|
|
|
// Mark all VTYPE subfields and properties as demanded
|
|
void demandVTYPE() {
|
|
SEW = true;
|
|
LMUL = true;
|
|
SEWLMULRatio = true;
|
|
TailPolicy = true;
|
|
MaskPolicy = true;
|
|
}
|
|
};
|
|
|
|
/// Return true if the two values of the VTYPE register provided are
|
|
/// indistinguishable from the perspective of an instruction (or set of
|
|
/// instructions) which use only the Used subfields and properties.
|
|
static bool areCompatibleVTYPEs(uint64_t VType1,
|
|
uint64_t VType2,
|
|
const DemandedFields &Used) {
|
|
if (Used.SEW &&
|
|
RISCVVType::getSEW(VType1) != RISCVVType::getSEW(VType2))
|
|
return false;
|
|
|
|
if (Used.LMUL &&
|
|
RISCVVType::getVLMUL(VType1) != RISCVVType::getVLMUL(VType2))
|
|
return false;
|
|
|
|
if (Used.SEWLMULRatio) {
|
|
auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(VType1),
|
|
RISCVVType::getVLMUL(VType1));
|
|
auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(VType2),
|
|
RISCVVType::getVLMUL(VType2));
|
|
if (Ratio1 != Ratio2)
|
|
return false;
|
|
}
|
|
|
|
if (Used.TailPolicy &&
|
|
RISCVVType::isTailAgnostic(VType1) != RISCVVType::isTailAgnostic(VType2))
|
|
return false;
|
|
if (Used.MaskPolicy &&
|
|
RISCVVType::isMaskAgnostic(VType1) != RISCVVType::isMaskAgnostic(VType2))
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
/// Return the fields and properties demanded by the provided instruction.
|
|
static DemandedFields getDemanded(const MachineInstr &MI) {
|
|
// Warning: This function has to work on both the lowered (i.e. post
|
|
// emitVSETVLIs) and pre-lowering forms. The main implication of this is
|
|
// that it can't use the value of a SEW, VL, or Policy operand as they might
|
|
// be stale after lowering.
|
|
|
|
// Most instructions don't use any of these subfeilds.
|
|
DemandedFields Res;
|
|
// Start conservative if registers are used
|
|
if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VL))
|
|
Res.VL = true;
|
|
if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VTYPE))
|
|
Res.demandVTYPE();
|
|
// Start conservative on the unlowered form too
|
|
uint64_t TSFlags = MI.getDesc().TSFlags;
|
|
if (RISCVII::hasSEWOp(TSFlags)) {
|
|
Res.demandVTYPE();
|
|
if (RISCVII::hasVLOp(TSFlags))
|
|
Res.VL = true;
|
|
|
|
// Behavior is independent of mask policy.
|
|
if (!RISCVII::usesMaskPolicy(TSFlags))
|
|
Res.MaskPolicy = false;
|
|
}
|
|
|
|
// Loads and stores with implicit EEW do not demand SEW or LMUL directly.
|
|
// They instead demand the ratio of the two which is used in computing
|
|
// EMUL, but which allows us the flexibility to change SEW and LMUL
|
|
// provided we don't change the ratio.
|
|
// Note: We assume that the instructions initial SEW is the EEW encoded
|
|
// in the opcode. This is asserted when constructing the VSETVLIInfo.
|
|
if (getEEWForLoadStore(MI)) {
|
|
Res.SEW = false;
|
|
Res.LMUL = false;
|
|
}
|
|
|
|
// Store instructions don't use the policy fields.
|
|
if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) {
|
|
Res.TailPolicy = false;
|
|
Res.MaskPolicy = false;
|
|
}
|
|
|
|
// If this is a mask reg operation, it only cares about VLMAX.
|
|
// TODO: Possible extensions to this logic
|
|
// * Probably ok if available VLMax is larger than demanded
|
|
// * The policy bits can probably be ignored..
|
|
if (isMaskRegOp(MI)) {
|
|
Res.SEW = false;
|
|
Res.LMUL = false;
|
|
}
|
|
|
|
return Res;
|
|
}
|
|
|
|
/// Defines the abstract state with which the forward dataflow models the
|
|
/// values of the VL and VTYPE registers after insertion.
|
|
class VSETVLIInfo {
|
|
union {
|
|
Register AVLReg;
|
|
unsigned AVLImm;
|
|
};
|
|
|
|
enum : uint8_t {
|
|
Uninitialized,
|
|
AVLIsReg,
|
|
AVLIsImm,
|
|
Unknown,
|
|
} State = Uninitialized;
|
|
|
|
// Fields from VTYPE.
|
|
RISCVII::VLMUL VLMul = RISCVII::LMUL_1;
|
|
uint8_t SEW = 0;
|
|
uint8_t TailAgnostic : 1;
|
|
uint8_t MaskAgnostic : 1;
|
|
uint8_t SEWLMULRatioOnly : 1;
|
|
|
|
public:
|
|
VSETVLIInfo()
|
|
: AVLImm(0), TailAgnostic(false), MaskAgnostic(false),
|
|
SEWLMULRatioOnly(false) {}
|
|
|
|
static VSETVLIInfo getUnknown() {
|
|
VSETVLIInfo Info;
|
|
Info.setUnknown();
|
|
return Info;
|
|
}
|
|
|
|
bool isValid() const { return State != Uninitialized; }
|
|
void setUnknown() { State = Unknown; }
|
|
bool isUnknown() const { return State == Unknown; }
|
|
|
|
void setAVLReg(Register Reg) {
|
|
AVLReg = Reg;
|
|
State = AVLIsReg;
|
|
}
|
|
|
|
void setAVLImm(unsigned Imm) {
|
|
AVLImm = Imm;
|
|
State = AVLIsImm;
|
|
}
|
|
|
|
bool hasAVLImm() const { return State == AVLIsImm; }
|
|
bool hasAVLReg() const { return State == AVLIsReg; }
|
|
Register getAVLReg() const {
|
|
assert(hasAVLReg());
|
|
return AVLReg;
|
|
}
|
|
unsigned getAVLImm() const {
|
|
assert(hasAVLImm());
|
|
return AVLImm;
|
|
}
|
|
|
|
unsigned getSEW() const { return SEW; }
|
|
RISCVII::VLMUL getVLMUL() const { return VLMul; }
|
|
|
|
bool hasNonZeroAVL() const {
|
|
if (hasAVLImm())
|
|
return getAVLImm() > 0;
|
|
if (hasAVLReg())
|
|
return getAVLReg() == RISCV::X0;
|
|
return false;
|
|
}
|
|
|
|
bool hasSameAVL(const VSETVLIInfo &Other) const {
|
|
assert(isValid() && Other.isValid() &&
|
|
"Can't compare invalid VSETVLIInfos");
|
|
assert(!isUnknown() && !Other.isUnknown() &&
|
|
"Can't compare AVL in unknown state");
|
|
if (hasAVLReg() && Other.hasAVLReg())
|
|
return getAVLReg() == Other.getAVLReg();
|
|
|
|
if (hasAVLImm() && Other.hasAVLImm())
|
|
return getAVLImm() == Other.getAVLImm();
|
|
|
|
return false;
|
|
}
|
|
|
|
void setVTYPE(unsigned VType) {
|
|
assert(isValid() && !isUnknown() &&
|
|
"Can't set VTYPE for uninitialized or unknown");
|
|
VLMul = RISCVVType::getVLMUL(VType);
|
|
SEW = RISCVVType::getSEW(VType);
|
|
TailAgnostic = RISCVVType::isTailAgnostic(VType);
|
|
MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
|
|
}
|
|
void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) {
|
|
assert(isValid() && !isUnknown() &&
|
|
"Can't set VTYPE for uninitialized or unknown");
|
|
VLMul = L;
|
|
SEW = S;
|
|
TailAgnostic = TA;
|
|
MaskAgnostic = MA;
|
|
}
|
|
|
|
unsigned encodeVTYPE() const {
|
|
assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
|
|
"Can't encode VTYPE for uninitialized or unknown");
|
|
return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
|
|
}
|
|
|
|
bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
|
|
|
|
bool hasSameSEW(const VSETVLIInfo &Other) const {
|
|
assert(isValid() && Other.isValid() &&
|
|
"Can't compare invalid VSETVLIInfos");
|
|
assert(!isUnknown() && !Other.isUnknown() &&
|
|
"Can't compare VTYPE in unknown state");
|
|
assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
|
|
"Can't compare when only LMUL/SEW ratio is valid.");
|
|
return SEW == Other.SEW;
|
|
}
|
|
|
|
bool hasSameVTYPE(const VSETVLIInfo &Other) const {
|
|
assert(isValid() && Other.isValid() &&
|
|
"Can't compare invalid VSETVLIInfos");
|
|
assert(!isUnknown() && !Other.isUnknown() &&
|
|
"Can't compare VTYPE in unknown state");
|
|
assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
|
|
"Can't compare when only LMUL/SEW ratio is valid.");
|
|
return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) ==
|
|
std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic,
|
|
Other.MaskAgnostic);
|
|
}
|
|
|
|
unsigned getSEWLMULRatio() const {
|
|
assert(isValid() && !isUnknown() &&
|
|
"Can't use VTYPE for uninitialized or unknown");
|
|
return RISCVVType::getSEWLMULRatio(SEW, VLMul);
|
|
}
|
|
|
|
// Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX.
|
|
// Note that having the same VLMAX ensures that both share the same
|
|
// function from AVL to VL; that is, they must produce the same VL value
|
|
// for any given AVL value.
|
|
bool hasSameVLMAX(const VSETVLIInfo &Other) const {
|
|
assert(isValid() && Other.isValid() &&
|
|
"Can't compare invalid VSETVLIInfos");
|
|
assert(!isUnknown() && !Other.isUnknown() &&
|
|
"Can't compare VTYPE in unknown state");
|
|
return getSEWLMULRatio() == Other.getSEWLMULRatio();
|
|
}
|
|
|
|
bool hasSamePolicy(const VSETVLIInfo &Other) const {
|
|
assert(isValid() && Other.isValid() &&
|
|
"Can't compare invalid VSETVLIInfos");
|
|
assert(!isUnknown() && !Other.isUnknown() &&
|
|
"Can't compare VTYPE in unknown state");
|
|
return TailAgnostic == Other.TailAgnostic &&
|
|
MaskAgnostic == Other.MaskAgnostic;
|
|
}
|
|
|
|
bool hasCompatibleVTYPE(const MachineInstr &MI,
|
|
const VSETVLIInfo &Require) const {
|
|
const DemandedFields Used = getDemanded(MI);
|
|
return areCompatibleVTYPEs(encodeVTYPE(), Require.encodeVTYPE(), Used);
|
|
}
|
|
|
|
// Determine whether the vector instructions requirements represented by
|
|
// Require are compatible with the previous vsetvli instruction represented
|
|
// by this. MI is the instruction whose requirements we're considering.
|
|
bool isCompatible(const MachineInstr &MI, const VSETVLIInfo &Require) const {
|
|
assert(isValid() && Require.isValid() &&
|
|
"Can't compare invalid VSETVLIInfos");
|
|
assert(!Require.SEWLMULRatioOnly &&
|
|
"Expected a valid VTYPE for instruction!");
|
|
// Nothing is compatible with Unknown.
|
|
if (isUnknown() || Require.isUnknown())
|
|
return false;
|
|
|
|
// If only our VLMAX ratio is valid, then this isn't compatible.
|
|
if (SEWLMULRatioOnly)
|
|
return false;
|
|
|
|
// If the instruction doesn't need an AVLReg and the SEW matches, consider
|
|
// it compatible.
|
|
if (Require.hasAVLReg() && Require.AVLReg == RISCV::NoRegister)
|
|
if (SEW == Require.SEW)
|
|
return true;
|
|
|
|
return hasSameAVL(Require) && hasCompatibleVTYPE(MI, Require);
|
|
}
|
|
|
|
bool operator==(const VSETVLIInfo &Other) const {
|
|
// Uninitialized is only equal to another Uninitialized.
|
|
if (!isValid())
|
|
return !Other.isValid();
|
|
if (!Other.isValid())
|
|
return !isValid();
|
|
|
|
// Unknown is only equal to another Unknown.
|
|
if (isUnknown())
|
|
return Other.isUnknown();
|
|
if (Other.isUnknown())
|
|
return isUnknown();
|
|
|
|
if (!hasSameAVL(Other))
|
|
return false;
|
|
|
|
// If the SEWLMULRatioOnly bits are different, then they aren't equal.
|
|
if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly)
|
|
return false;
|
|
|
|
// If only the VLMAX is valid, check that it is the same.
|
|
if (SEWLMULRatioOnly)
|
|
return hasSameVLMAX(Other);
|
|
|
|
// If the full VTYPE is valid, check that it is the same.
|
|
return hasSameVTYPE(Other);
|
|
}
|
|
|
|
bool operator!=(const VSETVLIInfo &Other) const {
|
|
return !(*this == Other);
|
|
}
|
|
|
|
// Calculate the VSETVLIInfo visible to a block assuming this and Other are
|
|
// both predecessors.
|
|
VSETVLIInfo intersect(const VSETVLIInfo &Other) const {
|
|
// If the new value isn't valid, ignore it.
|
|
if (!Other.isValid())
|
|
return *this;
|
|
|
|
// If this value isn't valid, this must be the first predecessor, use it.
|
|
if (!isValid())
|
|
return Other;
|
|
|
|
// If either is unknown, the result is unknown.
|
|
if (isUnknown() || Other.isUnknown())
|
|
return VSETVLIInfo::getUnknown();
|
|
|
|
// If we have an exact, match return this.
|
|
if (*this == Other)
|
|
return *this;
|
|
|
|
// Not an exact match, but maybe the AVL and VLMAX are the same. If so,
|
|
// return an SEW/LMUL ratio only value.
|
|
if (hasSameAVL(Other) && hasSameVLMAX(Other)) {
|
|
VSETVLIInfo MergeInfo = *this;
|
|
MergeInfo.SEWLMULRatioOnly = true;
|
|
return MergeInfo;
|
|
}
|
|
|
|
// Otherwise the result is unknown.
|
|
return VSETVLIInfo::getUnknown();
|
|
}
|
|
|
|
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
|
/// Support for debugging, callable in GDB: V->dump()
|
|
LLVM_DUMP_METHOD void dump() const {
|
|
print(dbgs());
|
|
dbgs() << "\n";
|
|
}
|
|
|
|
/// Implement operator<<.
|
|
/// @{
|
|
void print(raw_ostream &OS) const {
|
|
OS << "{";
|
|
if (!isValid())
|
|
OS << "Uninitialized";
|
|
if (isUnknown())
|
|
OS << "unknown";
|
|
if (hasAVLReg())
|
|
OS << "AVLReg=" << (unsigned)AVLReg;
|
|
if (hasAVLImm())
|
|
OS << "AVLImm=" << (unsigned)AVLImm;
|
|
OS << ", "
|
|
<< "VLMul=" << (unsigned)VLMul << ", "
|
|
<< "SEW=" << (unsigned)SEW << ", "
|
|
<< "TailAgnostic=" << (bool)TailAgnostic << ", "
|
|
<< "MaskAgnostic=" << (bool)MaskAgnostic << ", "
|
|
<< "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}";
|
|
}
|
|
#endif
|
|
};
|
|
|
|
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
|
LLVM_ATTRIBUTE_USED
|
|
inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) {
|
|
V.print(OS);
|
|
return OS;
|
|
}
|
|
#endif
|
|
|
|
struct BlockData {
|
|
// The VSETVLIInfo that represents the net changes to the VL/VTYPE registers
|
|
// made by this block. Calculated in Phase 1.
|
|
VSETVLIInfo Change;
|
|
|
|
// The VSETVLIInfo that represents the VL/VTYPE settings on exit from this
|
|
// block. Calculated in Phase 2.
|
|
VSETVLIInfo Exit;
|
|
|
|
// The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor
|
|
// blocks. Calculated in Phase 2, and used by Phase 3.
|
|
VSETVLIInfo Pred;
|
|
|
|
// Keeps track of whether the block is already in the queue.
|
|
bool InQueue = false;
|
|
|
|
BlockData() = default;
|
|
};
|
|
|
|
class RISCVInsertVSETVLI : public MachineFunctionPass {
|
|
const TargetInstrInfo *TII;
|
|
MachineRegisterInfo *MRI;
|
|
|
|
std::vector<BlockData> BlockInfo;
|
|
std::queue<const MachineBasicBlock *> WorkList;
|
|
|
|
public:
|
|
static char ID;
|
|
|
|
RISCVInsertVSETVLI() : MachineFunctionPass(ID) {
|
|
initializeRISCVInsertVSETVLIPass(*PassRegistry::getPassRegistry());
|
|
}
|
|
bool runOnMachineFunction(MachineFunction &MF) override;
|
|
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
|
AU.setPreservesCFG();
|
|
MachineFunctionPass::getAnalysisUsage(AU);
|
|
}
|
|
|
|
StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; }
|
|
|
|
private:
|
|
bool needVSETVLI(const MachineInstr &MI, const VSETVLIInfo &Require,
|
|
const VSETVLIInfo &CurInfo) const;
|
|
bool needVSETVLIPHI(const VSETVLIInfo &Require,
|
|
const MachineBasicBlock &MBB) const;
|
|
void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
|
|
const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
|
|
void insertVSETVLI(MachineBasicBlock &MBB,
|
|
MachineBasicBlock::iterator InsertPt, DebugLoc DL,
|
|
const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
|
|
|
|
void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI);
|
|
void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI);
|
|
bool computeVLVTYPEChanges(const MachineBasicBlock &MBB);
|
|
void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
|
|
void emitVSETVLIs(MachineBasicBlock &MBB);
|
|
void doLocalPostpass(MachineBasicBlock &MBB);
|
|
void doPRE(MachineBasicBlock &MBB);
|
|
void insertReadVL(MachineBasicBlock &MBB);
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
char RISCVInsertVSETVLI::ID = 0;
|
|
|
|
INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME,
|
|
false, false)
|
|
|
|
static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
|
|
const MachineRegisterInfo *MRI) {
|
|
VSETVLIInfo InstrInfo;
|
|
|
|
bool TailAgnostic, MaskAgnostic;
|
|
unsigned UseOpIdx;
|
|
if (MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
|
|
// Start with undisturbed.
|
|
TailAgnostic = false;
|
|
MaskAgnostic = false;
|
|
|
|
// If there is a policy operand, use it.
|
|
if (RISCVII::hasVecPolicyOp(TSFlags)) {
|
|
const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1);
|
|
uint64_t Policy = Op.getImm();
|
|
assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) &&
|
|
"Invalid Policy Value");
|
|
TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC;
|
|
MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC;
|
|
}
|
|
|
|
// If the tied operand is an IMPLICIT_DEF we can use TailAgnostic and
|
|
// MaskAgnostic.
|
|
const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
|
|
MachineInstr *UseMI = MRI->getVRegDef(UseMO.getReg());
|
|
if (UseMI && UseMI->isImplicitDef()) {
|
|
TailAgnostic = true;
|
|
MaskAgnostic = true;
|
|
}
|
|
// Some pseudo instructions force a tail agnostic policy despite having a
|
|
// tied def.
|
|
if (RISCVII::doesForceTailAgnostic(TSFlags))
|
|
TailAgnostic = true;
|
|
|
|
if (!RISCVII::usesMaskPolicy(TSFlags))
|
|
MaskAgnostic = true;
|
|
} else {
|
|
// If there is no tied operand,, there shouldn't be a policy operand.
|
|
assert(!RISCVII::hasVecPolicyOp(TSFlags) && "Unexpected policy operand");
|
|
// No tied operand use agnostic policies.
|
|
TailAgnostic = true;
|
|
MaskAgnostic = true;
|
|
}
|
|
|
|
RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags);
|
|
|
|
unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
|
|
// A Log2SEW of 0 is an operation on mask registers only.
|
|
unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
|
|
assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
|
|
|
|
if (RISCVII::hasVLOp(TSFlags)) {
|
|
const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
|
|
if (VLOp.isImm()) {
|
|
int64_t Imm = VLOp.getImm();
|
|
// Conver the VLMax sentintel to X0 register.
|
|
if (Imm == RISCV::VLMaxSentinel)
|
|
InstrInfo.setAVLReg(RISCV::X0);
|
|
else
|
|
InstrInfo.setAVLImm(Imm);
|
|
} else {
|
|
InstrInfo.setAVLReg(VLOp.getReg());
|
|
}
|
|
} else {
|
|
InstrInfo.setAVLReg(RISCV::NoRegister);
|
|
}
|
|
#ifndef NDEBUG
|
|
if (Optional<unsigned> EEW = getEEWForLoadStore(MI)) {
|
|
assert(SEW == EEW && "Initial SEW doesn't match expected EEW");
|
|
}
|
|
#endif
|
|
InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
|
|
|
|
return InstrInfo;
|
|
}
|
|
|
|
void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
|
|
const VSETVLIInfo &Info,
|
|
const VSETVLIInfo &PrevInfo) {
|
|
DebugLoc DL = MI.getDebugLoc();
|
|
insertVSETVLI(MBB, MachineBasicBlock::iterator(&MI), DL, Info, PrevInfo);
|
|
}
|
|
|
|
void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
|
|
MachineBasicBlock::iterator InsertPt, DebugLoc DL,
|
|
const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) {
|
|
|
|
// Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
|
|
// VLMAX.
|
|
if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
|
|
Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) {
|
|
BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
|
|
.addReg(RISCV::X0, RegState::Define | RegState::Dead)
|
|
.addReg(RISCV::X0, RegState::Kill)
|
|
.addImm(Info.encodeVTYPE())
|
|
.addReg(RISCV::VL, RegState::Implicit);
|
|
return;
|
|
}
|
|
|
|
if (Info.hasAVLImm()) {
|
|
BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
|
|
.addReg(RISCV::X0, RegState::Define | RegState::Dead)
|
|
.addImm(Info.getAVLImm())
|
|
.addImm(Info.encodeVTYPE());
|
|
return;
|
|
}
|
|
|
|
Register AVLReg = Info.getAVLReg();
|
|
if (AVLReg == RISCV::NoRegister) {
|
|
// We can only use x0, x0 if there's no chance of the vtype change causing
|
|
// the previous vl to become invalid.
|
|
if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
|
|
Info.hasSameVLMAX(PrevInfo)) {
|
|
BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
|
|
.addReg(RISCV::X0, RegState::Define | RegState::Dead)
|
|
.addReg(RISCV::X0, RegState::Kill)
|
|
.addImm(Info.encodeVTYPE())
|
|
.addReg(RISCV::VL, RegState::Implicit);
|
|
return;
|
|
}
|
|
// Otherwise use an AVL of 0 to avoid depending on previous vl.
|
|
BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
|
|
.addReg(RISCV::X0, RegState::Define | RegState::Dead)
|
|
.addImm(0)
|
|
.addImm(Info.encodeVTYPE());
|
|
return;
|
|
}
|
|
|
|
if (AVLReg.isVirtual())
|
|
MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass);
|
|
|
|
// Use X0 as the DestReg unless AVLReg is X0. We also need to change the
|
|
// opcode if the AVLReg is X0 as they have different register classes for
|
|
// the AVL operand.
|
|
Register DestReg = RISCV::X0;
|
|
unsigned Opcode = RISCV::PseudoVSETVLI;
|
|
if (AVLReg == RISCV::X0) {
|
|
DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass);
|
|
Opcode = RISCV::PseudoVSETVLIX0;
|
|
}
|
|
BuildMI(MBB, InsertPt, DL, TII->get(Opcode))
|
|
.addReg(DestReg, RegState::Define | RegState::Dead)
|
|
.addReg(AVLReg)
|
|
.addImm(Info.encodeVTYPE());
|
|
}
|
|
|
|
// Return a VSETVLIInfo representing the changes made by this VSETVLI or
|
|
// VSETIVLI instruction.
|
|
static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
|
|
VSETVLIInfo NewInfo;
|
|
if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
|
|
NewInfo.setAVLImm(MI.getOperand(1).getImm());
|
|
} else {
|
|
assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
|
|
MI.getOpcode() == RISCV::PseudoVSETVLIX0);
|
|
Register AVLReg = MI.getOperand(1).getReg();
|
|
assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) &&
|
|
"Can't handle X0, X0 vsetvli yet");
|
|
NewInfo.setAVLReg(AVLReg);
|
|
}
|
|
NewInfo.setVTYPE(MI.getOperand(2).getImm());
|
|
|
|
return NewInfo;
|
|
}
|
|
|
|
/// Return true if a VSETVLI is required to transition from CurInfo to Require
|
|
/// before MI.
|
|
bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
|
|
const VSETVLIInfo &Require,
|
|
const VSETVLIInfo &CurInfo) const {
|
|
assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, MRI));
|
|
|
|
if (CurInfo.isCompatible(MI, Require))
|
|
return false;
|
|
|
|
if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly())
|
|
return true;
|
|
|
|
// For vmv.s.x and vfmv.s.f, there is only two behaviors, VL = 0 and VL > 0.
|
|
// VL=0 is uninteresting (as it should have been deleted already), so it is
|
|
// compatible if we can prove both are non-zero. Additionally, if writing
|
|
// to an implicit_def operand, we don't need to preserve any other bits and
|
|
// are thus compatible with any larger etype, and can disregard policy bits.
|
|
if (isScalarMoveInstr(MI) &&
|
|
CurInfo.hasNonZeroAVL() && Require.hasNonZeroAVL()) {
|
|
auto *VRegDef = MRI->getVRegDef(MI.getOperand(1).getReg());
|
|
if (VRegDef && VRegDef->isImplicitDef() &&
|
|
CurInfo.getSEW() >= Require.getSEW())
|
|
return false;
|
|
if (CurInfo.hasSameSEW(Require) && CurInfo.hasSamePolicy(Require))
|
|
return false;
|
|
}
|
|
|
|
// We didn't find a compatible value. If our AVL is a virtual register,
|
|
// it might be defined by a VSET(I)VLI. If it has the same VLMAX we need
|
|
// and the last VL/VTYPE we observed is the same, we don't need a
|
|
// VSETVLI here.
|
|
if (Require.hasAVLReg() && Require.getAVLReg().isVirtual() &&
|
|
CurInfo.hasCompatibleVTYPE(MI, Require)) {
|
|
if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) {
|
|
if (isVectorConfigInstr(*DefMI)) {
|
|
VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
|
|
if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVLMAX(CurInfo))
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
// Given an incoming state reaching MI, modifies that state so that it is minimally
|
|
// compatible with MI. The resulting state is guaranteed to be semantically legal
|
|
// for MI, but may not be the state requested by MI.
|
|
void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) {
|
|
uint64_t TSFlags = MI.getDesc().TSFlags;
|
|
if (!RISCVII::hasSEWOp(TSFlags))
|
|
return;
|
|
|
|
const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI);
|
|
if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info))
|
|
return;
|
|
|
|
const VSETVLIInfo PrevInfo = Info;
|
|
Info = NewInfo;
|
|
|
|
if (!RISCVII::hasVLOp(TSFlags))
|
|
return;
|
|
|
|
// For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and
|
|
// VL > 0. We can discard the user requested AVL and just use the last
|
|
// one if we can prove it equally zero. This removes a vsetvli entirely
|
|
// if the types match or allows use of cheaper avl preserving variant
|
|
// if VLMAX doesn't change. If VLMAX might change, we couldn't use
|
|
// the 'vsetvli x0, x0, vtype" variant, so we avoid the transform to
|
|
// prevent extending live range of an avl register operand.
|
|
// TODO: We can probably relax this for immediates.
|
|
if (isScalarMoveInstr(MI) && PrevInfo.isValid() &&
|
|
PrevInfo.hasNonZeroAVL() && Info.hasNonZeroAVL() &&
|
|
Info.hasSameVLMAX(PrevInfo)) {
|
|
if (PrevInfo.hasAVLImm())
|
|
Info.setAVLImm(PrevInfo.getAVLImm());
|
|
else
|
|
Info.setAVLReg(PrevInfo.getAVLReg());
|
|
return;
|
|
}
|
|
|
|
// If AVL is defined by a vsetvli with the same VLMAX, we can
|
|
// replace the AVL operand with the AVL of the defining vsetvli.
|
|
// We avoid general register AVLs to avoid extending live ranges
|
|
// without being sure we can kill the original source reg entirely.
|
|
if (!Info.hasAVLReg() || !Info.getAVLReg().isVirtual())
|
|
return;
|
|
MachineInstr *DefMI = MRI->getVRegDef(Info.getAVLReg());
|
|
if (!DefMI || !isVectorConfigInstr(*DefMI))
|
|
return;
|
|
|
|
VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
|
|
if (DefInfo.hasSameVLMAX(Info) &&
|
|
(DefInfo.hasAVLImm() || DefInfo.getAVLReg() == RISCV::X0)) {
|
|
if (DefInfo.hasAVLImm())
|
|
Info.setAVLImm(DefInfo.getAVLImm());
|
|
else
|
|
Info.setAVLReg(DefInfo.getAVLReg());
|
|
return;
|
|
}
|
|
}
|
|
|
|
// Given a state with which we evaluated MI (see transferBefore above for why
|
|
// this might be different that the state MI requested), modify the state to
|
|
// reflect the changes MI might make.
|
|
void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) {
|
|
if (isVectorConfigInstr(MI)) {
|
|
Info = getInfoForVSETVLI(MI);
|
|
return;
|
|
}
|
|
|
|
if (RISCV::isFaultFirstLoad(MI)) {
|
|
// Update AVL to vl-output of the fault first load.
|
|
Info.setAVLReg(MI.getOperand(1).getReg());
|
|
return;
|
|
}
|
|
|
|
// If this is something that updates VL/VTYPE that we don't know about, set
|
|
// the state to unknown.
|
|
if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
|
|
MI.modifiesRegister(RISCV::VTYPE))
|
|
Info = VSETVLIInfo::getUnknown();
|
|
}
|
|
|
|
bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) {
|
|
bool HadVectorOp = false;
|
|
|
|
BlockData &BBInfo = BlockInfo[MBB.getNumber()];
|
|
BBInfo.Change = BBInfo.Pred;
|
|
for (const MachineInstr &MI : MBB) {
|
|
transferBefore(BBInfo.Change, MI);
|
|
|
|
if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags))
|
|
HadVectorOp = true;
|
|
|
|
transferAfter(BBInfo.Change, MI);
|
|
}
|
|
|
|
return HadVectorOp;
|
|
}
|
|
|
|
void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
|
|
|
|
BlockData &BBInfo = BlockInfo[MBB.getNumber()];
|
|
|
|
BBInfo.InQueue = false;
|
|
|
|
// Start with the previous entry so that we keep the most conservative state
|
|
// we have ever found.
|
|
VSETVLIInfo InInfo = BBInfo.Pred;
|
|
if (MBB.pred_empty()) {
|
|
// There are no predecessors, so use the default starting status.
|
|
InInfo.setUnknown();
|
|
} else {
|
|
for (MachineBasicBlock *P : MBB.predecessors())
|
|
InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit);
|
|
}
|
|
|
|
// If we don't have any valid predecessor value, wait until we do.
|
|
if (!InInfo.isValid())
|
|
return;
|
|
|
|
// If no change, no need to rerun block
|
|
if (InInfo == BBInfo.Pred)
|
|
return;
|
|
|
|
BBInfo.Pred = InInfo;
|
|
LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB)
|
|
<< " changed to " << BBInfo.Pred << "\n");
|
|
|
|
// Note: It's tempting to cache the state changes here, but due to the
|
|
// compatibility checks performed a blocks output state can change based on
|
|
// the input state. To cache, we'd have to add logic for finding
|
|
// never-compatible state changes.
|
|
computeVLVTYPEChanges(MBB);
|
|
VSETVLIInfo TmpStatus = BBInfo.Change;
|
|
|
|
// If the new exit value matches the old exit value, we don't need to revisit
|
|
// any blocks.
|
|
if (BBInfo.Exit == TmpStatus)
|
|
return;
|
|
|
|
BBInfo.Exit = TmpStatus;
|
|
LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB)
|
|
<< " changed to " << BBInfo.Exit << "\n");
|
|
|
|
// Add the successors to the work list so we can propagate the changed exit
|
|
// status.
|
|
for (MachineBasicBlock *S : MBB.successors())
|
|
if (!BlockInfo[S->getNumber()].InQueue) {
|
|
BlockInfo[S->getNumber()].InQueue = true;
|
|
WorkList.push(S);
|
|
}
|
|
}
|
|
|
|
// If we weren't able to prove a vsetvli was directly unneeded, it might still
|
|
// be unneeded if the AVL is a phi node where all incoming values are VL
|
|
// outputs from the last VSETVLI in their respective basic blocks.
|
|
bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
|
|
const MachineBasicBlock &MBB) const {
|
|
if (DisableInsertVSETVLPHIOpt)
|
|
return true;
|
|
|
|
if (!Require.hasAVLReg())
|
|
return true;
|
|
|
|
Register AVLReg = Require.getAVLReg();
|
|
if (!AVLReg.isVirtual())
|
|
return true;
|
|
|
|
// We need the AVL to be produce by a PHI node in this basic block.
|
|
MachineInstr *PHI = MRI->getVRegDef(AVLReg);
|
|
if (!PHI || PHI->getOpcode() != RISCV::PHI || PHI->getParent() != &MBB)
|
|
return true;
|
|
|
|
for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
|
|
PHIOp += 2) {
|
|
Register InReg = PHI->getOperand(PHIOp).getReg();
|
|
MachineBasicBlock *PBB = PHI->getOperand(PHIOp + 1).getMBB();
|
|
const BlockData &PBBInfo = BlockInfo[PBB->getNumber()];
|
|
// If the exit from the predecessor has the VTYPE we are looking for
|
|
// we might be able to avoid a VSETVLI.
|
|
if (PBBInfo.Exit.isUnknown() || !PBBInfo.Exit.hasSameVTYPE(Require))
|
|
return true;
|
|
|
|
// We need the PHI input to the be the output of a VSET(I)VLI.
|
|
MachineInstr *DefMI = MRI->getVRegDef(InReg);
|
|
if (!DefMI || !isVectorConfigInstr(*DefMI))
|
|
return true;
|
|
|
|
// We found a VSET(I)VLI make sure it matches the output of the
|
|
// predecessor block.
|
|
VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
|
|
if (!DefInfo.hasSameAVL(PBBInfo.Exit) ||
|
|
!DefInfo.hasSameVTYPE(PBBInfo.Exit))
|
|
return true;
|
|
}
|
|
|
|
// If all the incoming values to the PHI checked out, we don't need
|
|
// to insert a VSETVLI.
|
|
return false;
|
|
}
|
|
|
|
void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
|
|
VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred;
|
|
// Track whether the prefix of the block we've scanned is transparent
|
|
// (meaning has not yet changed the abstract state).
|
|
bool PrefixTransparent = true;
|
|
for (MachineInstr &MI : MBB) {
|
|
const VSETVLIInfo PrevInfo = CurInfo;
|
|
transferBefore(CurInfo, MI);
|
|
|
|
// If this is an explicit VSETVLI or VSETIVLI, update our state.
|
|
if (isVectorConfigInstr(MI)) {
|
|
// Conservatively, mark the VL and VTYPE as live.
|
|
assert(MI.getOperand(3).getReg() == RISCV::VL &&
|
|
MI.getOperand(4).getReg() == RISCV::VTYPE &&
|
|
"Unexpected operands where VL and VTYPE should be");
|
|
MI.getOperand(3).setIsDead(false);
|
|
MI.getOperand(4).setIsDead(false);
|
|
PrefixTransparent = false;
|
|
}
|
|
|
|
uint64_t TSFlags = MI.getDesc().TSFlags;
|
|
if (RISCVII::hasSEWOp(TSFlags)) {
|
|
if (PrevInfo != CurInfo) {
|
|
// If this is the first implicit state change, and the state change
|
|
// requested can be proven to produce the same register contents, we
|
|
// can skip emitting the actual state change and continue as if we
|
|
// had since we know the GPR result of the implicit state change
|
|
// wouldn't be used and VL/VTYPE registers are correct. Note that
|
|
// we *do* need to model the state as if it changed as while the
|
|
// register contents are unchanged, the abstract model can change.
|
|
if (!PrefixTransparent || needVSETVLIPHI(CurInfo, MBB))
|
|
insertVSETVLI(MBB, MI, CurInfo, PrevInfo);
|
|
PrefixTransparent = false;
|
|
}
|
|
|
|
if (RISCVII::hasVLOp(TSFlags)) {
|
|
MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
|
|
if (VLOp.isReg()) {
|
|
// Erase the AVL operand from the instruction.
|
|
VLOp.setReg(RISCV::NoRegister);
|
|
VLOp.setIsKill(false);
|
|
}
|
|
MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false,
|
|
/*isImp*/ true));
|
|
}
|
|
MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
|
|
/*isImp*/ true));
|
|
}
|
|
|
|
if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
|
|
MI.modifiesRegister(RISCV::VTYPE))
|
|
PrefixTransparent = false;
|
|
|
|
transferAfter(CurInfo, MI);
|
|
}
|
|
|
|
// If we reach the end of the block and our current info doesn't match the
|
|
// expected info, insert a vsetvli to correct.
|
|
if (!UseStrictAsserts) {
|
|
const VSETVLIInfo &ExitInfo = BlockInfo[MBB.getNumber()].Exit;
|
|
if (CurInfo.isValid() && ExitInfo.isValid() && !ExitInfo.isUnknown() &&
|
|
CurInfo != ExitInfo) {
|
|
// Note there's an implicit assumption here that terminators never use
|
|
// or modify VL or VTYPE. Also, fallthrough will return end().
|
|
auto InsertPt = MBB.getFirstInstrTerminator();
|
|
insertVSETVLI(MBB, InsertPt, MBB.findDebugLoc(InsertPt), ExitInfo,
|
|
CurInfo);
|
|
CurInfo = ExitInfo;
|
|
}
|
|
}
|
|
|
|
if (UseStrictAsserts && CurInfo.isValid()) {
|
|
const auto &Info = BlockInfo[MBB.getNumber()];
|
|
if (CurInfo != Info.Exit) {
|
|
LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n");
|
|
LLVM_DEBUG(dbgs() << " begin state: " << Info.Pred << "\n");
|
|
LLVM_DEBUG(dbgs() << " expected end state: " << Info.Exit << "\n");
|
|
LLVM_DEBUG(dbgs() << " actual end state: " << CurInfo << "\n");
|
|
}
|
|
assert(CurInfo == Info.Exit &&
|
|
"InsertVSETVLI dataflow invariant violated");
|
|
}
|
|
}
|
|
|
|
/// Return true if the VL value configured must be equal to the requested one.
|
|
static bool hasFixedResult(const VSETVLIInfo &Info, const RISCVSubtarget &ST) {
|
|
if (!Info.hasAVLImm())
|
|
// VLMAX is always the same value.
|
|
// TODO: Could extend to other registers by looking at the associated vreg
|
|
// def placement.
|
|
return RISCV::X0 == Info.getAVLReg();
|
|
|
|
unsigned AVL = Info.getAVLImm();
|
|
unsigned SEW = Info.getSEW();
|
|
unsigned AVLInBits = AVL * SEW;
|
|
|
|
unsigned LMul;
|
|
bool Fractional;
|
|
std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(Info.getVLMUL());
|
|
|
|
if (Fractional)
|
|
return ST.getRealMinVLen() / LMul >= AVLInBits;
|
|
return ST.getRealMinVLen() * LMul >= AVLInBits;
|
|
}
|
|
|
|
/// Perform simple partial redundancy elimination of the VSETVLI instructions
|
|
/// we're about to insert by looking for cases where we can PRE from the
|
|
/// beginning of one block to the end of one of its predecessors. Specifically,
|
|
/// this is geared to catch the common case of a fixed length vsetvl in a single
|
|
/// block loop when it could execute once in the preheader instead.
|
|
void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
|
|
const MachineFunction &MF = *MBB.getParent();
|
|
const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
|
|
|
|
if (!BlockInfo[MBB.getNumber()].Pred.isUnknown())
|
|
return;
|
|
|
|
MachineBasicBlock *UnavailablePred = nullptr;
|
|
VSETVLIInfo AvailableInfo;
|
|
for (MachineBasicBlock *P : MBB.predecessors()) {
|
|
const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit;
|
|
if (PredInfo.isUnknown()) {
|
|
if (UnavailablePred)
|
|
return;
|
|
UnavailablePred = P;
|
|
} else if (!AvailableInfo.isValid()) {
|
|
AvailableInfo = PredInfo;
|
|
} else if (AvailableInfo != PredInfo) {
|
|
return;
|
|
}
|
|
}
|
|
|
|
// Unreachable, single pred, or full redundancy. Note that FRE is handled by
|
|
// phase 3.
|
|
if (!UnavailablePred || !AvailableInfo.isValid())
|
|
return;
|
|
|
|
// Critical edge - TODO: consider splitting?
|
|
if (UnavailablePred->succ_size() != 1)
|
|
return;
|
|
|
|
// If VL can be less than AVL, then we can't reduce the frequency of exec.
|
|
if (!hasFixedResult(AvailableInfo, ST))
|
|
return;
|
|
|
|
// Does it actually let us remove an implicit transition in MBB?
|
|
bool Found = false;
|
|
for (auto &MI : MBB) {
|
|
if (isVectorConfigInstr(MI))
|
|
return;
|
|
|
|
const uint64_t TSFlags = MI.getDesc().TSFlags;
|
|
if (RISCVII::hasSEWOp(TSFlags)) {
|
|
if (AvailableInfo != computeInfoForInstr(MI, TSFlags, MRI))
|
|
return;
|
|
Found = true;
|
|
break;
|
|
}
|
|
}
|
|
if (!Found)
|
|
return;
|
|
|
|
// Finally, update both data flow state and insert the actual vsetvli.
|
|
// Doing both keeps the code in sync with the dataflow results, which
|
|
// is critical for correctness of phase 3.
|
|
auto OldInfo = BlockInfo[UnavailablePred->getNumber()].Exit;
|
|
LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to "
|
|
<< UnavailablePred->getName() << " with state "
|
|
<< AvailableInfo << "\n");
|
|
BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo;
|
|
BlockInfo[MBB.getNumber()].Pred = AvailableInfo;
|
|
|
|
// Note there's an implicit assumption here that terminators never use
|
|
// or modify VL or VTYPE. Also, fallthrough will return end().
|
|
auto InsertPt = UnavailablePred->getFirstInstrTerminator();
|
|
insertVSETVLI(*UnavailablePred, InsertPt,
|
|
UnavailablePred->findDebugLoc(InsertPt),
|
|
AvailableInfo, OldInfo);
|
|
}
|
|
|
|
static void doUnion(DemandedFields &A, DemandedFields B) {
|
|
A.VL |= B.VL;
|
|
A.SEW |= B.SEW;
|
|
A.LMUL |= B.LMUL;
|
|
A.SEWLMULRatio |= B.SEWLMULRatio;
|
|
A.TailPolicy |= B.TailPolicy;
|
|
A.MaskPolicy |= B.MaskPolicy;
|
|
}
|
|
|
|
// Return true if we can mutate PrevMI's VTYPE to match MI's
|
|
// without changing any the fields which have been used.
|
|
// TODO: Restructure code to allow code reuse between this and isCompatible
|
|
// above.
|
|
static bool canMutatePriorConfig(const MachineInstr &PrevMI,
|
|
const MachineInstr &MI,
|
|
const DemandedFields &Used) {
|
|
// TODO: Extend this to handle cases where VL does change, but VL
|
|
// has not been used. (e.g. over a vmv.x.s)
|
|
if (!isVLPreservingConfig(MI))
|
|
// Note: `vsetvli x0, x0, vtype' is the canonical instruction
|
|
// for this case. If you find yourself wanting to add other forms
|
|
// to this "unused VTYPE" case, we're probably missing a
|
|
// canonicalization earlier.
|
|
return false;
|
|
|
|
if (!PrevMI.getOperand(2).isImm() || !MI.getOperand(2).isImm())
|
|
return false;
|
|
|
|
auto PriorVType = PrevMI.getOperand(2).getImm();
|
|
auto VType = MI.getOperand(2).getImm();
|
|
return areCompatibleVTYPEs(PriorVType, VType, Used);
|
|
}
|
|
|
|
void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
|
|
MachineInstr *PrevMI = nullptr;
|
|
DemandedFields Used;
|
|
SmallVector<MachineInstr*> ToDelete;
|
|
for (MachineInstr &MI : MBB) {
|
|
// Note: Must be *before* vsetvli handling to account for config cases
|
|
// which only change some subfields.
|
|
doUnion(Used, getDemanded(MI));
|
|
|
|
if (!isVectorConfigInstr(MI))
|
|
continue;
|
|
|
|
if (PrevMI) {
|
|
if (!Used.VL && !Used.usedVTYPE()) {
|
|
ToDelete.push_back(PrevMI);
|
|
// fallthrough
|
|
} else if (canMutatePriorConfig(*PrevMI, MI, Used)) {
|
|
PrevMI->getOperand(2).setImm(MI.getOperand(2).getImm());
|
|
ToDelete.push_back(&MI);
|
|
// Leave PrevMI unchanged
|
|
continue;
|
|
}
|
|
}
|
|
PrevMI = &MI;
|
|
Used = getDemanded(MI);
|
|
Register VRegDef = MI.getOperand(0).getReg();
|
|
if (VRegDef != RISCV::X0 &&
|
|
!(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef)))
|
|
Used.VL = true;
|
|
}
|
|
|
|
for (auto *MI : ToDelete)
|
|
MI->eraseFromParent();
|
|
}
|
|
|
|
void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
|
|
for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
|
|
MachineInstr &MI = *I++;
|
|
if (RISCV::isFaultFirstLoad(MI)) {
|
|
Register VLOutput = MI.getOperand(1).getReg();
|
|
if (!MRI->use_nodbg_empty(VLOutput))
|
|
BuildMI(MBB, I, MI.getDebugLoc(), TII->get(RISCV::PseudoReadVL),
|
|
VLOutput);
|
|
// We don't use the vl output of the VLEFF/VLSEGFF anymore.
|
|
MI.getOperand(1).setReg(RISCV::X0);
|
|
}
|
|
}
|
|
}
|
|
|
|
bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
|
|
// Skip if the vector extension is not enabled.
|
|
const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
|
|
if (!ST.hasVInstructions())
|
|
return false;
|
|
|
|
LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n");
|
|
|
|
TII = ST.getInstrInfo();
|
|
MRI = &MF.getRegInfo();
|
|
|
|
assert(BlockInfo.empty() && "Expect empty block infos");
|
|
BlockInfo.resize(MF.getNumBlockIDs());
|
|
|
|
bool HaveVectorOp = false;
|
|
|
|
// Phase 1 - determine how VL/VTYPE are affected by the each block.
|
|
for (const MachineBasicBlock &MBB : MF) {
|
|
HaveVectorOp |= computeVLVTYPEChanges(MBB);
|
|
// Initial exit state is whatever change we found in the block.
|
|
BlockData &BBInfo = BlockInfo[MBB.getNumber()];
|
|
BBInfo.Exit = BBInfo.Change;
|
|
LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB)
|
|
<< " is " << BBInfo.Exit << "\n");
|
|
|
|
}
|
|
|
|
// If we didn't find any instructions that need VSETVLI, we're done.
|
|
if (!HaveVectorOp) {
|
|
BlockInfo.clear();
|
|
return false;
|
|
}
|
|
|
|
// Phase 2 - determine the exit VL/VTYPE from each block. We add all
|
|
// blocks to the list here, but will also add any that need to be revisited
|
|
// during Phase 2 processing.
|
|
for (const MachineBasicBlock &MBB : MF) {
|
|
WorkList.push(&MBB);
|
|
BlockInfo[MBB.getNumber()].InQueue = true;
|
|
}
|
|
while (!WorkList.empty()) {
|
|
const MachineBasicBlock &MBB = *WorkList.front();
|
|
WorkList.pop();
|
|
computeIncomingVLVTYPE(MBB);
|
|
}
|
|
|
|
// Perform partial redundancy elimination of vsetvli transitions.
|
|
for (MachineBasicBlock &MBB : MF)
|
|
doPRE(MBB);
|
|
|
|
// Phase 3 - add any vsetvli instructions needed in the block. Use the
|
|
// Phase 2 information to avoid adding vsetvlis before the first vector
|
|
// instruction in the block if the VL/VTYPE is satisfied by its
|
|
// predecessors.
|
|
for (MachineBasicBlock &MBB : MF)
|
|
emitVSETVLIs(MBB);
|
|
|
|
// Now that all vsetvlis are explicit, go through and do block local
|
|
// DSE and peephole based demanded fields based transforms. Note that
|
|
// this *must* be done outside the main dataflow so long as we allow
|
|
// any cross block analysis within the dataflow. We can't have both
|
|
// demanded fields based mutation and non-local analysis in the
|
|
// dataflow at the same time without introducing inconsistencies.
|
|
for (MachineBasicBlock &MBB : MF)
|
|
doLocalPostpass(MBB);
|
|
|
|
// Once we're fully done rewriting all the instructions, do a final pass
|
|
// through to check for VSETVLIs which write to an unused destination.
|
|
// For the non X0, X0 variant, we can replace the destination register
|
|
// with X0 to reduce register pressure. This is really a generic
|
|
// optimization which can be applied to any dead def (TODO: generalize).
|
|
for (MachineBasicBlock &MBB : MF) {
|
|
for (MachineInstr &MI : MBB) {
|
|
if (MI.getOpcode() == RISCV::PseudoVSETVLI ||
|
|
MI.getOpcode() == RISCV::PseudoVSETIVLI) {
|
|
Register VRegDef = MI.getOperand(0).getReg();
|
|
if (VRegDef != RISCV::X0 && MRI->use_nodbg_empty(VRegDef))
|
|
MI.getOperand(0).setReg(RISCV::X0);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
|
|
// of VLEFF/VLSEGFF.
|
|
for (MachineBasicBlock &MBB : MF)
|
|
insertReadVL(MBB);
|
|
|
|
BlockInfo.clear();
|
|
return HaveVectorOp;
|
|
}
|
|
|
|
/// Returns an instance of the Insert VSETVLI pass.
|
|
FunctionPass *llvm::createRISCVInsertVSETVLIPass() {
|
|
return new RISCVInsertVSETVLI();
|
|
}
|