diff --git a/llvm/include/llvm/Support/TargetParser.h b/llvm/include/llvm/Support/TargetParser.h index bf954b909cf2..99839129e86a 100644 --- a/llvm/include/llvm/Support/TargetParser.h +++ b/llvm/include/llvm/Support/TargetParser.h @@ -157,8 +157,10 @@ IsaVersion getIsaVersion(StringRef GPU); namespace RISCV { +// ARIES: FIXME: Change this to 32 to support zve32* which is needed by +// ventus-gpgpu. // We use 64 bits as the known part in the scalable vector types. -static constexpr unsigned RVVBitsPerBlock = 64; +static constexpr unsigned RVVBitsPerBlock = 32; enum CPUKind : unsigned { #define PROC(ENUM, NAME, FEATURES, DEFAULT_MARCH) CK_##ENUM, diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index e1297a4a02f5..fa864a51d539 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -211,49 +211,6 @@ static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef Regs, return SDValue(nullptr, 0); } -void RISCVDAGToDAGISel::addVectorLoadStoreOperands( - SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp, - bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl &Operands, - bool IsLoad, MVT *IndexVT) { - SDValue Chain = Node->getOperand(0); - SDValue Glue; - - Operands.push_back(Node->getOperand(CurOp++)); // Base pointer. - - if (IsStridedOrIndexed) { - Operands.push_back(Node->getOperand(CurOp++)); // Index. - if (IndexVT) - *IndexVT = Operands.back()->getSimpleValueType(0); - } - - if (IsMasked) { - // Mask needs to be copied to V0. - SDValue Mask = Node->getOperand(CurOp++); - Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue()); - Glue = Chain.getValue(1); - Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType())); - } - SDValue VL; - selectVLOp(Node->getOperand(CurOp++), VL); - Operands.push_back(VL); - - MVT XLenVT = Subtarget->getXLenVT(); - SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); - Operands.push_back(SEWOp); - - // Masked load has the tail policy argument. - if (IsMasked && IsLoad) { - // Policy must be a constant. - uint64_t Policy = Node->getConstantOperandVal(CurOp++); - SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT); - Operands.push_back(PolicyOp); - } - - Operands.push_back(Chain); // Chain. - if (Glue) - Operands.push_back(Glue); -} - static bool isAllUndef(ArrayRef Values) { return llvm::all_of(Values, [](SDValue V) { return V->isUndef(); }); } @@ -766,388 +723,6 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { ReplaceNode(Node, MULHU); return; } - case ISD::INTRINSIC_WO_CHAIN: { - unsigned IntNo = Node->getConstantOperandVal(0); - switch (IntNo) { - // By default we do not custom select any intrinsic. - default: - break; - case Intrinsic::riscv_vmsgeu: - case Intrinsic::riscv_vmsge: { - assert(0 && "TODO"); - SDValue Src1 = Node->getOperand(1); - SDValue Src2 = Node->getOperand(2); - bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu; - bool IsCmpUnsignedZero = false; - // Only custom select scalar second operand. - if (Src2.getValueType() != XLenVT) - break; - // Small constants are handled with patterns. - if (auto *C = dyn_cast(Src2)) { - int64_t CVal = C->getSExtValue(); - if (CVal >= -15 && CVal <= 16) { - if (!IsUnsigned || CVal != 0) - break; - IsCmpUnsignedZero = true; - } - } - MVT Src1VT = Src1.getSimpleValueType(); - unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode; - SDValue SEW = CurDAG->getTargetConstant( - Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); - SDValue VL; - selectVLOp(Node->getOperand(3), VL); - - // If vmsgeu with 0 immediate, expand it to vmset. - if (IsCmpUnsignedZero) { - ReplaceNode(Node, CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW)); - return; - } - - // Expand to - // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd - SDValue Cmp = SDValue( - CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}), - 0); - ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT, - {Cmp, Cmp, VL, SEW})); - return; - } - case Intrinsic::riscv_vmsgeu_mask: - case Intrinsic::riscv_vmsge_mask: { - assert(0 && "TODO"); - SDValue Src1 = Node->getOperand(2); - SDValue Src2 = Node->getOperand(3); - bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask; - bool IsCmpUnsignedZero = false; - // Only custom select scalar second operand. - if (Src2.getValueType() != XLenVT) - break; - // Small constants are handled with patterns. - if (auto *C = dyn_cast(Src2)) { - int64_t CVal = C->getSExtValue(); - if (CVal >= -15 && CVal <= 16) { - if (!IsUnsigned || CVal != 0) - break; - IsCmpUnsignedZero = true; - } - } - MVT Src1VT = Src1.getSimpleValueType(); - unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode, - VMOROpcode; - - SDValue SEW = CurDAG->getTargetConstant( - Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); - SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT); - SDValue VL; - selectVLOp(Node->getOperand(5), VL); - SDValue MaskedOff = Node->getOperand(1); - SDValue Mask = Node->getOperand(4); - - // If vmsgeu_mask with 0 immediate, expand it to vmor mask, maskedoff. - if (IsCmpUnsignedZero) { - // We don't need vmor if the MaskedOff and the Mask are the same - // value. - if (Mask == MaskedOff) { - ReplaceUses(Node, Mask.getNode()); - return; - } - ReplaceNode(Node, - CurDAG->getMachineNode(VMOROpcode, DL, VT, - {Mask, MaskedOff, VL, MaskSEW})); - return; - } - - // If the MaskedOff value and the Mask are the same value use - // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt - // This avoids needing to copy v0 to vd before starting the next sequence. - if (Mask == MaskedOff) { - SDValue Cmp = SDValue( - CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}), - 0); - ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT, - {Mask, Cmp, VL, MaskSEW})); - return; - } - - // Mask needs to be copied to V0. - SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, - RISCV::V0, Mask, SDValue()); - SDValue Glue = Chain.getValue(1); - SDValue V0 = CurDAG->getRegister(RISCV::V0, VT); - - // Otherwise use - // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0 - // The result is mask undisturbed. - // We use the same instructions to emulate mask agnostic behavior, because - // the agnostic result can be either undisturbed or all 1. - SDValue Cmp = SDValue( - CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT, - {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}), - 0); - // vmxor.mm vd, vd, v0 is used to update active value. - ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT, - {Cmp, Mask, VL, MaskSEW})); - return; - } - } - break; - } - case ISD::INTRINSIC_W_CHAIN: { - unsigned IntNo = cast(Node->getOperand(1))->getZExtValue(); - switch (IntNo) { - // By default we do not custom select any intrinsic. - default: - break; - case Intrinsic::riscv_vlm: - case Intrinsic::riscv_vle: - case Intrinsic::riscv_vle_mask: - case Intrinsic::riscv_vlse: - case Intrinsic::riscv_vlse_mask: { - bool IsMasked = IntNo == Intrinsic::riscv_vle_mask || - IntNo == Intrinsic::riscv_vlse_mask; - bool IsStrided = - IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask; - - MVT VT = Node->getSimpleValueType(0); - unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); - - unsigned CurOp = 2; - // The riscv_vlm intrinsic are always tail agnostic and no passthru operand. - bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm; - // Masked intrinsic only have TU version pseduo instructions. - bool IsTU = HasPassthruOperand && - (IsMasked || !Node->getOperand(CurOp).isUndef()); - SmallVector Operands; - if (IsTU) - Operands.push_back(Node->getOperand(CurOp++)); - else if (HasPassthruOperand) - // Skip the undef passthru operand for nomask TA version pseudo - CurOp++; - - addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, - Operands, /*IsLoad=*/true); - - RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); - assert(0 && "TODO: cut!"); - /* - const RISCV::VLEPseudo *P = - RISCV::getVLEPseudo(IsMasked, IsTU, IsStrided, false, Log2SEW, - static_cast(LMUL)); - MachineSDNode *Load = - CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); - - if (auto *MemOp = dyn_cast(Node)) - CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); - - ReplaceNode(Node, Load); - */ - return; - } - } - break; - } - - case ISD::INTRINSIC_VOID: { - unsigned IntNo = cast(Node->getOperand(1))->getZExtValue(); - switch (IntNo) { - case Intrinsic::riscv_vsm: - case Intrinsic::riscv_vse: { - MVT VT = Node->getOperand(2)->getSimpleValueType(0); - unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); - - unsigned CurOp = 2; - SmallVector Operands; - Operands.push_back(Node->getOperand(CurOp++)); // Store value. - - addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, 0, 0, - Operands); - - RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); - assert(0 && "TODO: Gen vALU load/store inst."); - /* - const RISCV::VSEPseudo *P = RISCV::getVSEPseudo( - 0, 0, Log2SEW, static_cast(LMUL)); - MachineSDNode *Store = - CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); - if (auto *MemOp = dyn_cast(Node)) - CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); - - ReplaceNode(Node, Store); - */ - return; - } - } - break; - } - case ISD::BITCAST: { - MVT SrcVT = Node->getOperand(0).getSimpleValueType(); - // Just drop bitcasts between vectors if both are fixed or both are - // scalable. - if ((VT.isScalableVector() && SrcVT.isScalableVector()) || - (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) { - ReplaceUses(SDValue(Node, 0), Node->getOperand(0)); - CurDAG->RemoveDeadNode(Node); - return; - } - break; - } - case ISD::INSERT_SUBVECTOR: { - SDValue V = Node->getOperand(0); - SDValue SubV = Node->getOperand(1); - SDLoc DL(SubV); - auto Idx = Node->getConstantOperandVal(2); - MVT SubVecVT = SubV.getSimpleValueType(); - - const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); - MVT SubVecContainerVT = SubVecVT; - // Establish the correct scalable-vector types for any fixed-length type. - if (SubVecVT.isFixedLengthVector()) - SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT); - if (VT.isFixedLengthVector()) - VT = TLI.getContainerForFixedLengthVector(VT); - - const auto *TRI = Subtarget->getRegisterInfo(); - unsigned SubRegIdx; - std::tie(SubRegIdx, Idx) = - RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( - VT, SubVecContainerVT, Idx, TRI); - - // If the Idx hasn't been completely eliminated then this is a subvector - // insert which doesn't naturally align to a vector register. These must - // be handled using instructions to manipulate the vector registers. - if (Idx != 0) - break; - - RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT); - bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 || - SubVecLMUL == RISCVII::VLMUL::LMUL_F4 || - SubVecLMUL == RISCVII::VLMUL::LMUL_F8; - (void)IsSubVecPartReg; // Silence unused variable warning without asserts. - assert((!IsSubVecPartReg || V.isUndef()) && - "Expecting lowering to have created legal INSERT_SUBVECTORs when " - "the subvector is smaller than a full-sized register"); - - // If we haven't set a SubRegIdx, then we must be going between - // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy. - if (SubRegIdx == RISCV::NoSubRegister) { - unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VT); - assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == - InRegClassID && - "Unexpected subvector extraction"); - SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT); - SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, - DL, VT, SubV, RC); - ReplaceNode(Node, NewNode); - return; - } - - SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV); - ReplaceNode(Node, Insert.getNode()); - return; - } - case ISD::EXTRACT_SUBVECTOR: { - SDValue V = Node->getOperand(0); - auto Idx = Node->getConstantOperandVal(1); - MVT InVT = V.getSimpleValueType(); - SDLoc DL(V); - - const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); - MVT SubVecContainerVT = VT; - // Establish the correct scalable-vector types for any fixed-length type. - if (VT.isFixedLengthVector()) - SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT); - if (InVT.isFixedLengthVector()) - InVT = TLI.getContainerForFixedLengthVector(InVT); - - const auto *TRI = Subtarget->getRegisterInfo(); - unsigned SubRegIdx; - std::tie(SubRegIdx, Idx) = - RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( - InVT, SubVecContainerVT, Idx, TRI); - - // If the Idx hasn't been completely eliminated then this is a subvector - // extract which doesn't naturally align to a vector register. These must - // be handled using instructions to manipulate the vector registers. - if (Idx != 0) - break; - - // If we haven't set a SubRegIdx, then we must be going between - // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy. - if (SubRegIdx == RISCV::NoSubRegister) { - unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT); - assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == - InRegClassID && - "Unexpected subvector extraction"); - SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT); - SDNode *NewNode = - CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); - ReplaceNode(Node, NewNode); - return; - } - - SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V); - ReplaceNode(Node, Extract.getNode()); - return; - } - case RISCVISD::VMV_S_X_VL: - case RISCVISD::VFMV_S_F_VL: - case RISCVISD::VMV_V_X_VL: - case RISCVISD::VFMV_V_F_VL: { - // Only if we have optimized zero-stride vector load. - if (!Subtarget->hasOptimizedZeroStrideLoad()) - break; - - // Try to match splat of a scalar load to a strided load with stride of x0. - bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL || - Node->getOpcode() == RISCVISD::VFMV_S_F_VL; - if (!Node->getOperand(0).isUndef()) - break; - SDValue Src = Node->getOperand(1); - auto *Ld = dyn_cast(Src); - if (!Ld) - break; - EVT MemVT = Ld->getMemoryVT(); - // The memory VT should be the same size as the element type. - if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize()) - break; - if (!IsProfitableToFold(Src, Node, Node) || - !IsLegalToFold(Src, Node, Node, TM.getOptLevel())) - break; - - SDValue VL; - if (IsScalarMove) { - // We could deal with more VL if we update the VSETVLI insert pass to - // avoid introducing more VSETVLI. - if (!isOneConstant(Node->getOperand(2))) - break; - selectVLOp(Node->getOperand(2), VL); - } else - selectVLOp(Node->getOperand(2), VL); - - unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); - SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); - - SDValue Operands[] = {Ld->getBasePtr(), - CurDAG->getRegister(RISCV::X0, XLenVT), VL, SEW, - Ld->getChain()}; - - RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); - assert(0 && "TODO: cut!"); - /* - const RISCV::VLEPseudo *P = RISCV::getVLEPseudo( - false, false, true, false, Log2SEW, static_cast(LMUL)); - MachineSDNode *Load = - CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands); - // Update the chain. - ReplaceUses(Src.getValue(1), SDValue(Load, 1)); - // Record the mem-refs - CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()}); - // Replace the splat with the vlse. - ReplaceNode(Node, Load); - */ - return; - } } // Select the default instruction. SelectCode(Node); @@ -1710,126 +1285,6 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const { return true; } -// Select VL as a 5 bit immediate or a value that will become a register. This -// allows us to choose betwen VSETIVLI or VSETVLI later. -bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) { - auto *C = dyn_cast(N); - if (C && isUInt<5>(C->getZExtValue())) { - VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N), - N->getValueType(0)); - } else if (C && C->isAllOnesValue()) { - // Treat all ones as VLMax. - VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N), - N->getValueType(0)); - } else if (isa(N) && - cast(N)->getReg() == RISCV::X0) { - // All our VL operands use an operand that allows GPRNoX0 or an immediate - // as the register class. Convert X0 to a special immediate to pass the - // MachineVerifier. This is recognized specially by the vsetvli insertion - // pass. - VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N), - N->getValueType(0)); - } else { - VL = N; - } - - return true; -} - -bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) { - if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef()) - return false; - assert(N.getNumOperands() == 3 && "Unexpected number of operands"); - SplatVal = N.getOperand(1); - return true; -} - -using ValidateFn = bool (*)(int64_t); - -static bool selectVSplatSimmHelper(SDValue N, SDValue &SplatVal, - SelectionDAG &DAG, - const RISCVSubtarget &Subtarget, - ValidateFn ValidateImm) { - if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() || - !isa(N.getOperand(1))) - return false; - assert(N.getNumOperands() == 3 && "Unexpected number of operands"); - - int64_t SplatImm = - cast(N.getOperand(1))->getSExtValue(); - - // The semantics of RISCVISD::VMV_V_X_VL is that when the operand - // type is wider than the resulting vector element type: an implicit - // truncation first takes place. Therefore, perform a manual - // truncation/sign-extension in order to ignore any truncated bits and catch - // any zero-extended immediate. - // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first - // sign-extending to (XLenVT -1). - MVT XLenVT = Subtarget.getXLenVT(); - assert(XLenVT == N.getOperand(1).getSimpleValueType() && - "Unexpected splat operand type"); - MVT EltVT = N.getSimpleValueType().getVectorElementType(); - if (EltVT.bitsLT(XLenVT)) - SplatImm = SignExtend64(SplatImm, EltVT.getSizeInBits()); - - if (!ValidateImm(SplatImm)) - return false; - - SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), XLenVT); - return true; -} - -bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) { - return selectVSplatSimmHelper(N, SplatVal, *CurDAG, *Subtarget, - [](int64_t Imm) { return isInt<5>(Imm); }); -} - -bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) { - return selectVSplatSimmHelper( - N, SplatVal, *CurDAG, *Subtarget, - [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; }); -} - -bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N, - SDValue &SplatVal) { - return selectVSplatSimmHelper( - N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) { - return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16); - }); -} - -bool RISCVDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &SplatVal) { - if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() || - !isa(N.getOperand(1))) - return false; - - int64_t SplatImm = - cast(N.getOperand(1))->getSExtValue(); - - if (!isUInt<5>(SplatImm)) - return false; - - SplatVal = - CurDAG->getTargetConstant(SplatImm, SDLoc(N), Subtarget->getXLenVT()); - - return true; -} - -bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width, - SDValue &Imm) { - if (auto *C = dyn_cast(N)) { - int64_t ImmVal = SignExtend64(C->getSExtValue(), Width); - - if (!isInt<5>(ImmVal)) - return false; - - Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT()); - return true; - } - - return false; -} - // Try to remove sext.w if the input is a W instruction or can be made into // a W instruction cheaply. bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) { @@ -1892,307 +1347,6 @@ bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) { return false; } -// Return true if we can make sure mask of N is all-ones mask. -static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) { - // Check that we're using V0 as a mask register. - if (!isa(N->getOperand(MaskOpIdx)) || - cast(N->getOperand(MaskOpIdx))->getReg() != RISCV::V0) - return false; - - // The glued user defines V0. - const auto *Glued = N->getGluedNode(); - - if (!Glued || Glued->getOpcode() != ISD::CopyToReg) - return false; - - // Check that we're defining V0 as a mask register. - if (!isa(Glued->getOperand(1)) || - cast(Glued->getOperand(1))->getReg() != RISCV::V0) - return false; - - // Check the instruction defining V0; it needs to be a VMSET pseudo. - SDValue MaskSetter = Glued->getOperand(2); - - const auto IsVMSet = [](unsigned Opc) { - return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 || - Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 || - Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 || - Opc == RISCV::PseudoVMSET_M_B8; - }; - - // TODO: Check that the VMSET is the expected bitwidth? The pseudo has - // undefined behaviour if it's the wrong bitwidth, so we could choose to - // assume that it's all-ones? Same applies to its VL. - return MaskSetter->isMachineOpcode() && - IsVMSet(MaskSetter.getMachineOpcode()); -} - -// Optimize masked RVV pseudo instructions with a known all-ones mask to their -// corresponding "unmasked" pseudo versions. The mask we're interested in will -// take the form of a V0 physical register operand, with a glued -// register-setting instruction. -bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) { - const RISCV::RISCVMaskedPseudoInfo *I = - RISCV::getMaskedPseudoInfo(N->getMachineOpcode()); - if (!I) - return false; - - unsigned MaskOpIdx = I->MaskOpIdx; - - if (!usesAllOnesMask(N, MaskOpIdx)) - return false; - - // Retrieve the tail policy operand index, if any. - std::optional TailPolicyOpIdx; - const RISCVInstrInfo &TII = *Subtarget->getInstrInfo(); - const MCInstrDesc &MaskedMCID = TII.get(N->getMachineOpcode()); - - bool IsTA = true; - if (RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags)) { - TailPolicyOpIdx = getVecPolicyOpIdx(N, MaskedMCID); - if (!(N->getConstantOperandVal(*TailPolicyOpIdx) & - RISCVII::TAIL_AGNOSTIC)) { - // Keep the true-masked instruction when there is no unmasked TU - // instruction - if (I->UnmaskedTUPseudo == I->MaskedPseudo && !N->getOperand(0).isUndef()) - return false; - // We can't use TA if the tie-operand is not IMPLICIT_DEF - if (!N->getOperand(0).isUndef()) - IsTA = false; - } - } - - unsigned Opc = IsTA ? I->UnmaskedPseudo : I->UnmaskedTUPseudo; - - // Check that we're dropping the mask operand and any policy operand - // when we transform to this unmasked pseudo. Additionally, if this insturtion - // is tail agnostic, the unmasked instruction should not have a merge op. - uint64_t TSFlags = TII.get(Opc).TSFlags; - assert((IsTA != RISCVII::hasMergeOp(TSFlags)) && - RISCVII::hasDummyMaskOp(TSFlags) && - !RISCVII::hasVecPolicyOp(TSFlags) && - "Unexpected pseudo to transform to"); - (void)TSFlags; - - SmallVector Ops; - // Skip the merge operand at index 0 if IsTA - for (unsigned I = IsTA, E = N->getNumOperands(); I != E; I++) { - // Skip the mask, the policy, and the Glue. - SDValue Op = N->getOperand(I); - if (I == MaskOpIdx || I == TailPolicyOpIdx || - Op.getValueType() == MVT::Glue) - continue; - Ops.push_back(Op); - } - - // Transitively apply any node glued to our new node. - const auto *Glued = N->getGluedNode(); - if (auto *TGlued = Glued->getGluedNode()) - Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1)); - - SDNode *Result = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); - Result->setFlags(N->getFlags()); - ReplaceUses(N, Result); - - return true; -} - -// Try to fold VMERGE_VVM with unmasked intrinsic to masked intrinsic. The -// peephole only deals with VMERGE_VVM which is TU and has false operand same as -// its true operand now. E.g. (VMERGE_VVM_M1_TU False, False, (VADD_M1 ...), -// ...) -> (VADD_VV_M1_MASK) -bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N, bool IsTA) { - unsigned Offset = IsTA ? 0 : 1; - uint64_t Policy = IsTA ? RISCVII::TAIL_AGNOSTIC : /*TUMU*/ 0; - - SDValue False = N->getOperand(0 + Offset); - SDValue True = N->getOperand(1 + Offset); - SDValue Mask = N->getOperand(2 + Offset); - SDValue VL = N->getOperand(3 + Offset); - - assert(True.getResNo() == 0 && - "Expect True is the first output of an instruction."); - - // Need N is the exactly one using True. - if (!True.hasOneUse()) - return false; - - if (!True.isMachineOpcode()) - return false; - - unsigned TrueOpc = True.getMachineOpcode(); - - // Skip if True has merge operand. - // TODO: Deal with True having same merge operand with N. - if (RISCVII::hasMergeOp(TII->get(TrueOpc).TSFlags)) - return false; - - // Skip if True has side effect. - // TODO: Support velff and vlsegff. - if (TII->get(TrueOpc).hasUnmodeledSideEffects()) - return false; - - // Only deal with True when True is unmasked intrinsic now. - const RISCV::RISCVMaskedPseudoInfo *Info = - RISCV::lookupMaskedIntrinsicByUnmaskedTA(TrueOpc); - - if (!Info) - return false; - - // The last operand of unmasked intrinsic should be sew or chain. - bool HasChainOp = - True.getOperand(True.getNumOperands() - 1).getValueType() == MVT::Other; - - if (HasChainOp) { - // Avoid creating cycles in the DAG. We must ensure that none of the other - // operands depend on True through it's Chain. - SmallVector LoopWorklist; - SmallPtrSet Visited; - LoopWorklist.push_back(False.getNode()); - LoopWorklist.push_back(Mask.getNode()); - LoopWorklist.push_back(VL.getNode()); - if (SDNode *Glued = N->getGluedNode()) - LoopWorklist.push_back(Glued); - if (SDNode::hasPredecessorHelper(True.getNode(), Visited, LoopWorklist)) - return false; - } - - // Need True has same VL with N. - unsigned TrueVLIndex = True.getNumOperands() - HasChainOp - 2; - SDValue TrueVL = True.getOperand(TrueVLIndex); - - auto IsNoFPExcept = [this](SDValue N) { - return !this->mayRaiseFPException(N.getNode()) || - N->getFlags().hasNoFPExcept(); - }; - - // Allow the peephole for non-exception True with VLMAX vector length, since - // all the values after VL of N are dependent on Merge. VLMAX should be - // lowered to (XLenVT -1). - if (TrueVL != VL && !(IsNoFPExcept(True) && isAllOnesConstant(TrueVL))) - return false; - - SDLoc DL(N); - unsigned MaskedOpc = Info->MaskedPseudo; - assert(RISCVII::hasVecPolicyOp(TII->get(MaskedOpc).TSFlags) && - "Expected instructions with mask have policy operand."); - assert(RISCVII::hasMergeOp(TII->get(MaskedOpc).TSFlags) && - "Expected instructions with mask have merge operand."); - - SmallVector Ops; - Ops.push_back(False); - Ops.append(True->op_begin(), True->op_begin() + TrueVLIndex); - Ops.append({Mask, VL, /* SEW */ True.getOperand(TrueVLIndex + 1)}); - Ops.push_back(CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT())); - - // Result node should have chain operand of True. - if (HasChainOp) - Ops.push_back(True.getOperand(True.getNumOperands() - 1)); - - // Result node should take over glued node of N. - if (N->getGluedNode()) - Ops.push_back(N->getOperand(N->getNumOperands() - 1)); - - SDNode *Result = - CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops); - Result->setFlags(True->getFlags()); - - // Replace vmerge.vvm node by Result. - ReplaceUses(SDValue(N, 0), SDValue(Result, 0)); - - // Replace another value of True. E.g. chain and VL. - for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx) - ReplaceUses(True.getValue(Idx), SDValue(Result, Idx)); - - // Try to transform Result to unmasked intrinsic. - doPeepholeMaskedRVV(Result); - return true; -} - -// Transform (VMERGE_VVM__TU false, false, true, allones, vl, sew) to -// (VADD_VI__TU false, true, 0, vl, sew). It may decrease uses of VMSET. -bool RISCVDAGToDAGISel::performVMergeToVAdd(SDNode *N) { - unsigned NewOpc; - switch (N->getMachineOpcode()) { - default: - llvm_unreachable("Expected VMERGE_VVM__TU instruction."); - case RISCV::PseudoVMERGE_VVM_MF8_TU: - NewOpc = RISCV::PseudoVADD_VI_MF8_TU; - break; - case RISCV::PseudoVMERGE_VVM_MF4_TU: - NewOpc = RISCV::PseudoVADD_VI_MF4_TU; - break; - case RISCV::PseudoVMERGE_VVM_MF2_TU: - NewOpc = RISCV::PseudoVADD_VI_MF2_TU; - break; - case RISCV::PseudoVMERGE_VVM_M1_TU: - NewOpc = RISCV::PseudoVADD_VI_M1_TU; - break; - case RISCV::PseudoVMERGE_VVM_M2_TU: - NewOpc = RISCV::PseudoVADD_VI_M2_TU; - break; - case RISCV::PseudoVMERGE_VVM_M4_TU: - NewOpc = RISCV::PseudoVADD_VI_M4_TU; - break; - case RISCV::PseudoVMERGE_VVM_M8_TU: - NewOpc = RISCV::PseudoVADD_VI_M8_TU; - break; - } - - if (!usesAllOnesMask(N, /* MaskOpIdx */ 3)) - return false; - - SDLoc DL(N); - EVT VT = N->getValueType(0); - SDValue Ops[] = {N->getOperand(1), N->getOperand(2), - CurDAG->getTargetConstant(0, DL, Subtarget->getXLenVT()), - N->getOperand(4), N->getOperand(5)}; - SDNode *Result = CurDAG->getMachineNode(NewOpc, DL, VT, Ops); - ReplaceUses(N, Result); - return true; -} - -bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() { - bool MadeChange = false; - SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); - - while (Position != CurDAG->allnodes_begin()) { - SDNode *N = &*--Position; - if (N->use_empty() || !N->isMachineOpcode()) - continue; - - auto IsVMergeTU = [](unsigned Opcode) { - return Opcode == RISCV::PseudoVMERGE_VVM_MF8_TU || - Opcode == RISCV::PseudoVMERGE_VVM_MF4_TU || - Opcode == RISCV::PseudoVMERGE_VVM_MF2_TU || - Opcode == RISCV::PseudoVMERGE_VVM_M1_TU || - Opcode == RISCV::PseudoVMERGE_VVM_M2_TU || - Opcode == RISCV::PseudoVMERGE_VVM_M4_TU || - Opcode == RISCV::PseudoVMERGE_VVM_M8_TU; - }; - - auto IsVMergeTA = [](unsigned Opcode) { - return Opcode == RISCV::PseudoVMERGE_VVM_MF8 || - Opcode == RISCV::PseudoVMERGE_VVM_MF4 || - Opcode == RISCV::PseudoVMERGE_VVM_MF2 || - Opcode == RISCV::PseudoVMERGE_VVM_M1 || - Opcode == RISCV::PseudoVMERGE_VVM_M2 || - Opcode == RISCV::PseudoVMERGE_VVM_M4 || - Opcode == RISCV::PseudoVMERGE_VVM_M8; - }; - - unsigned Opc = N->getMachineOpcode(); - // The following optimizations require that the merge operand of N is same - // as the false operand of N. - if ((IsVMergeTU(Opc) && N->getOperand(0) == N->getOperand(1)) || - IsVMergeTA(Opc)) - MadeChange |= performCombineVMergeAndVOps(N, IsVMergeTA(Opc)); - if (IsVMergeTU(Opc) && N->getOperand(0) == N->getOperand(1)) - MadeChange |= performVMergeToVAdd(N); - } - return MadeChange; -} - // This pass converts a legalized DAG into a RISCV-specific DAG, ready // for instruction scheduling. FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM, diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h index 7fe15b4c583a..341989ea3a6c 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -79,25 +79,6 @@ public: bool hasAllHUsers(SDNode *Node) const { return hasAllNBitUsers(Node, 16); } bool hasAllWUsers(SDNode *Node) const { return hasAllNBitUsers(Node, 32); } - bool selectVLOp(SDValue N, SDValue &VL); - - bool selectVSplat(SDValue N, SDValue &SplatVal); - bool selectVSplatSimm5(SDValue N, SDValue &SplatVal); - bool selectVSplatUimm5(SDValue N, SDValue &SplatVal); - bool selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal); - bool selectVSplatSimm5Plus1NonZero(SDValue N, SDValue &SplatVal); - - bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm); - template bool selectRVVSimm5(SDValue N, SDValue &Imm) { - return selectRVVSimm5(N, Width, Imm); - } - - void addVectorLoadStoreOperands(SDNode *Node, unsigned SEWImm, - const SDLoc &DL, unsigned CurOp, - bool IsMasked, bool IsStridedOrIndexed, - SmallVectorImpl &Operands, - bool IsLoad = false, MVT *IndexVT = nullptr); - // Return the RISC-V condition code that matches the given DAG integer // condition code. The CondCode must be one of those supported by the RISC-V // ISA (see translateSetCCForBranch). diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 41071c1e01e3..e426e4903006 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -434,8 +434,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, if (Subtarget.hasVInstructions()) { setBooleanVectorContents(ZeroOrOneBooleanContent); - setOperationAction(ISD::VSCALE, XLenVT, Custom); - // RVV intrinsics may have illegal operands. // We also need to custom legalize vmv.x.s. setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN}, @@ -449,83 +447,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID}, MVT::Other, Custom); - static const unsigned IntegerVPOps[] = { - ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL, - ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM, - ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR, - ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR, - ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND, - ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX, - ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN, - ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT, - ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND, - ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN, - ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX}; - - static const unsigned FloatingPointVPOps[] = { - ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL, - ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS, - ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD, - ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE, - ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP, - ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND, - ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM, - ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND, - ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, - ISD::VP_FRINT, ISD::VP_FNEARBYINT}; - - static const unsigned IntegerVecReduceOps[] = { - ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, - ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN, - ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN}; - - static const unsigned FloatingPointVecReduceOps[] = { - ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_FMIN, - ISD::VECREDUCE_FMAX}; - - if (!Subtarget.is64Bit()) { - // We must custom-lower certain vXi64 operations on RV32 due to the vector - // element type being illegal. - setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, - MVT::i64, Custom); - - setOperationAction(IntegerVecReduceOps, MVT::i64, Custom); - - setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND, - ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, - ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN, - ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN}, - MVT::i64, Custom); - } - for (MVT VT : BoolVecVTs) { if (!isTypeLegal(VT)) continue; - setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); - - // Mask VTs are custom-expanded into a series of standard nodes - setOperationAction({ISD::TRUNCATE, ISD::CONCAT_VECTORS, - ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, - VT, Custom); - - setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT, - Custom); - setOperationAction(ISD::SELECT, VT, Custom); - setOperationAction( - {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT, - Expand); - - setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom); - - setOperationAction( - {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT, - Custom); - - setOperationAction( - {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT, - Custom); // RVV has native int->float & float->int conversions where the // element type sizes are within one power-of-two of each other. Any @@ -594,15 +520,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, // Sets common operation actions on RVV floating-point vector types. const auto SetCommonVFPActions = [&](MVT VT) { - setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); // RVV has native FP_ROUND & FP_EXTEND conversions where the element type // sizes are within one power-of-two of each other. Therefore conversions // between vXf16 and vXf64 must be lowered as sequences which convert via // vXf32. setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); - // Custom-lower insert/extract operations to simplify patterns. - setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT, - Custom); // Expand various condition codes (explained above). setCondCodeAction(VFPCCToExpand, VT, Expand); @@ -612,8 +534,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, {ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN}, VT, Custom); - setOperationAction(FloatingPointVecReduceOps, VT, Custom); - // Expand FP operations that need libcalls. setOperationAction(ISD::FREM, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); @@ -632,24 +552,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); - setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, - VT, Custom); - - setOperationAction( - {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, - ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, - VT, Custom); - setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::SELECT_CC, VT, Expand); - - setOperationAction( - {ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, - VT, Custom); - - setOperationAction({ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE}, VT, Custom); - - setOperationAction(FloatingPointVPOps, VT, Custom); }; // Sets common extload/truncstore actions on RVV floating-point vector @@ -709,7 +613,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction( {ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}, VT, Custom); - setOperationAction(IntegerVPOps, VT, Custom); // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point // type that can represent the value exactly. @@ -745,10 +648,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SELECT_CC, VT, Expand); setOperationAction(ISD::BITCAST, VT, Custom); - - setOperationAction(FloatingPointVecReduceOps, VT, Custom); - - setOperationAction(FloatingPointVPOps, VT, Custom); } // Custom-legalize bitcasts from fixed-length vectors to scalar types. @@ -825,10 +724,6 @@ EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, return VT.changeVectorElementTypeToInteger(); } -MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const { - return Subtarget.getXLenVT(); -} - bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, @@ -1360,12 +1255,6 @@ unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) { return RISCV::VGPRRegClassID; } -// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar -// stores for those types. -bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const { - return !Subtarget.useRVVForFixedLengthVectors() || - (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1); -} // Grow V to consume an entire RVV register. static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, @@ -1522,12 +1411,6 @@ static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc) { return RISCVFPRndMode::Invalid; } -struct VIDSequence { - int64_t StepNumerator; - unsigned StepDenominator; - int64_t Addend; -}; - static std::optional getExactInteger(const APFloat &APF, uint32_t BitWidth) { APSInt ValInt(BitWidth, !APF.isNegative()); @@ -1546,1215 +1429,6 @@ static std::optional getExactInteger(const APFloat &APF, return ValInt.extractBitsAsZExtValue(BitWidth, 0); } -// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S] -// to the (non-zero) step S and start value X. This can be then lowered as the -// RVV sequence (VID * S) + X, for example. -// The step S is represented as an integer numerator divided by a positive -// denominator. Note that the implementation currently only identifies -// sequences in which either the numerator is +/- 1 or the denominator is 1. It -// cannot detect 2/3, for example. -// Note that this method will also match potentially unappealing index -// sequences, like , however it is left to the caller to -// determine whether this is worth generating code for. -static std::optional isSimpleVIDSequence(SDValue Op) { - unsigned NumElts = Op.getNumOperands(); - assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR"); - bool IsInteger = Op.getValueType().isInteger(); - - std::optional SeqStepDenom; - std::optional SeqStepNum, SeqAddend; - std::optional> PrevElt; - unsigned EltSizeInBits = Op.getValueType().getScalarSizeInBits(); - for (unsigned Idx = 0; Idx < NumElts; Idx++) { - // Assume undef elements match the sequence; we just have to be careful - // when interpolating across them. - if (Op.getOperand(Idx).isUndef()) - continue; - - uint64_t Val; - if (IsInteger) { - // The BUILD_VECTOR must be all constants. - if (!isa(Op.getOperand(Idx))) - return std::nullopt; - Val = Op.getConstantOperandVal(Idx) & - maskTrailingOnes(EltSizeInBits); - } else { - // The BUILD_VECTOR must be all constants. - if (!isa(Op.getOperand(Idx))) - return std::nullopt; - if (auto ExactInteger = getExactInteger( - cast(Op.getOperand(Idx))->getValueAPF(), - EltSizeInBits)) - Val = *ExactInteger; - else - return std::nullopt; - } - - if (PrevElt) { - // Calculate the step since the last non-undef element, and ensure - // it's consistent across the entire sequence. - unsigned IdxDiff = Idx - PrevElt->second; - int64_t ValDiff = SignExtend64(Val - PrevElt->first, EltSizeInBits); - - // A zero-value value difference means that we're somewhere in the middle - // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a - // step change before evaluating the sequence. - if (ValDiff == 0) - continue; - - int64_t Remainder = ValDiff % IdxDiff; - // Normalize the step if it's greater than 1. - if (Remainder != ValDiff) { - // The difference must cleanly divide the element span. - if (Remainder != 0) - return std::nullopt; - ValDiff /= IdxDiff; - IdxDiff = 1; - } - - if (!SeqStepNum) - SeqStepNum = ValDiff; - else if (ValDiff != SeqStepNum) - return std::nullopt; - - if (!SeqStepDenom) - SeqStepDenom = IdxDiff; - else if (IdxDiff != *SeqStepDenom) - return std::nullopt; - } - - // Record this non-undef element for later. - if (!PrevElt || PrevElt->first != Val) - PrevElt = std::make_pair(Val, Idx); - } - - // We need to have logged a step for this to count as a legal index sequence. - if (!SeqStepNum || !SeqStepDenom) - return std::nullopt; - - // Loop back through the sequence and validate elements we might have skipped - // while waiting for a valid step. While doing this, log any sequence addend. - for (unsigned Idx = 0; Idx < NumElts; Idx++) { - if (Op.getOperand(Idx).isUndef()) - continue; - uint64_t Val; - if (IsInteger) { - Val = Op.getConstantOperandVal(Idx) & - maskTrailingOnes(EltSizeInBits); - } else { - Val = *getExactInteger( - cast(Op.getOperand(Idx))->getValueAPF(), - EltSizeInBits); - } - uint64_t ExpectedVal = - (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom; - int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits); - if (!SeqAddend) - SeqAddend = Addend; - else if (Addend != SeqAddend) - return std::nullopt; - } - - assert(SeqAddend && "Must have an addend if we have a step"); - - return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend}; -} - -// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT -// and lower it as a VRGATHER_VX_VL from the source vector. -static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, - SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { - if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT) - return SDValue(); - SDValue Vec = SplatVal.getOperand(0); - // Only perform this optimization on vectors of the same size for simplicity. - // Don't perform this optimization for i1 vectors. - // FIXME: Support i1 vectors, maybe by promoting to i8? - if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1) - return SDValue(); - SDValue Idx = SplatVal.getOperand(1); - // The index must be a legal type. - if (Idx.getValueType() != Subtarget.getXLenVT()) - return SDValue(); - - MVT ContainerVT = VT; - if (VT.isFixedLengthVector()) { - ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); - Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); - } - - auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); - - SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec, - Idx, DAG.getUNDEF(ContainerVT), Mask, VL); - - if (!VT.isFixedLengthVector()) - return Gather; - - return convertFromScalableVector(VT, Gather, DAG, Subtarget); -} - -static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { - MVT VT = Op.getSimpleValueType(); - assert(VT.isFixedLengthVector() && "Unexpected vector!"); - - MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); - - SDLoc DL(Op); - auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); - - MVT XLenVT = Subtarget.getXLenVT(); - unsigned NumElts = Op.getNumOperands(); - - if (VT.getVectorElementType() == MVT::i1) { - if (ISD::isBuildVectorAllZeros(Op.getNode())) { - SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL); - return convertFromScalableVector(VT, VMClr, DAG, Subtarget); - } - - if (ISD::isBuildVectorAllOnes(Op.getNode())) { - SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL); - return convertFromScalableVector(VT, VMSet, DAG, Subtarget); - } - - // Lower constant mask BUILD_VECTORs via an integer vector type, in - // scalar integer chunks whose bit-width depends on the number of mask - // bits and XLEN. - // First, determine the most appropriate scalar integer type to use. This - // is at most XLenVT, but may be shrunk to a smaller vector element type - // according to the size of the final vector - use i8 chunks rather than - // XLenVT if we're producing a v8i1. This results in more consistent - // codegen across RV32 and RV64. - unsigned NumViaIntegerBits = - std::min(std::max(NumElts, 8u), Subtarget.getXLen()); - NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELEN()); - if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) { - // If we have to use more than one INSERT_VECTOR_ELT then this - // optimization is likely to increase code size; avoid peforming it in - // such a case. We can use a load from a constant pool in this case. - if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits) - return SDValue(); - // Now we can create our integer vector type. Note that it may be larger - // than the resulting mask type: v4i1 would use v1i8 as its integer type. - MVT IntegerViaVecVT = - MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits), - divideCeil(NumElts, NumViaIntegerBits)); - - uint64_t Bits = 0; - unsigned BitPos = 0, IntegerEltIdx = 0; - SDValue Vec = DAG.getUNDEF(IntegerViaVecVT); - - for (unsigned I = 0; I < NumElts; I++, BitPos++) { - // Once we accumulate enough bits to fill our scalar type, insert into - // our vector and clear our accumulated data. - if (I != 0 && I % NumViaIntegerBits == 0) { - if (NumViaIntegerBits <= 32) - Bits = SignExtend64<32>(Bits); - SDValue Elt = DAG.getConstant(Bits, DL, XLenVT); - Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, - Elt, DAG.getConstant(IntegerEltIdx, DL, XLenVT)); - Bits = 0; - BitPos = 0; - IntegerEltIdx++; - } - SDValue V = Op.getOperand(I); - bool BitValue = !V.isUndef() && cast(V)->getZExtValue(); - Bits |= ((uint64_t)BitValue << BitPos); - } - - // Insert the (remaining) scalar value into position in our integer - // vector type. - if (NumViaIntegerBits <= 32) - Bits = SignExtend64<32>(Bits); - SDValue Elt = DAG.getConstant(Bits, DL, XLenVT); - Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, Elt, - DAG.getConstant(IntegerEltIdx, DL, XLenVT)); - - if (NumElts < NumViaIntegerBits) { - // If we're producing a smaller vector than our minimum legal integer - // type, bitcast to the equivalent (known-legal) mask type, and extract - // our final mask. - assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type"); - Vec = DAG.getBitcast(MVT::v8i1, Vec); - Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec, - DAG.getConstant(0, DL, XLenVT)); - } else { - // Else we must have produced an integer type with the same size as the - // mask type; bitcast for the final result. - assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits()); - Vec = DAG.getBitcast(VT, Vec); - } - - return Vec; - } - - // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask - // vector type, we have a legal equivalently-sized i8 type, so we can use - // that. - MVT WideVecVT = VT.changeVectorElementType(MVT::i8); - SDValue VecZero = DAG.getConstant(0, DL, WideVecVT); - - SDValue WideVec; - if (SDValue Splat = cast(Op)->getSplatValue()) { - // For a splat, perform a scalar truncate before creating the wider - // vector. - assert(Splat.getValueType() == XLenVT && - "Unexpected type for i1 splat value"); - Splat = DAG.getNode(ISD::AND, DL, XLenVT, Splat, - DAG.getConstant(1, DL, XLenVT)); - WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat); - } else { - SmallVector Ops(Op->op_values()); - WideVec = DAG.getBuildVector(WideVecVT, DL, Ops); - SDValue VecOne = DAG.getConstant(1, DL, WideVecVT); - WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne); - } - - return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE); - } - - if (SDValue Splat = cast(Op)->getSplatValue()) { - if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget)) - return Gather; - unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL - : RISCVISD::VMV_V_X_VL; - Splat = - DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL); - return convertFromScalableVector(VT, Splat, DAG, Subtarget); - } - - // Try and match index sequences, which we can lower to the vid instruction - // with optional modifications. An all-undef vector is matched by - // getSplatValue, above. - if (auto SimpleVID = isSimpleVIDSequence(Op)) { - int64_t StepNumerator = SimpleVID->StepNumerator; - unsigned StepDenominator = SimpleVID->StepDenominator; - int64_t Addend = SimpleVID->Addend; - - assert(StepNumerator != 0 && "Invalid step"); - bool Negate = false; - int64_t SplatStepVal = StepNumerator; - unsigned StepOpcode = ISD::MUL; - if (StepNumerator != 1) { - if (isPowerOf2_64(std::abs(StepNumerator))) { - Negate = StepNumerator < 0; - StepOpcode = ISD::SHL; - SplatStepVal = Log2_64(std::abs(StepNumerator)); - } - } - - // Only emit VIDs with suitably-small steps/addends. We use imm5 is a - // threshold since it's the immediate value many RVV instructions accept. - // There is no vmul.vi instruction so ensure multiply constant can fit in - // a single addi instruction. - if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) || - (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) && - isPowerOf2_32(StepDenominator) && - (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) { - MVT VIDVT = - VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT; - MVT VIDContainerVT = - getContainerForFixedLengthVector(DAG, VIDVT, Subtarget); - SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL); - // Convert right out of the scalable type so we can use standard ISD - // nodes for the rest of the computation. If we used scalable types with - // these, we'd lose the fixed-length vector info and generate worse - // vsetvli code. - VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget); - if ((StepOpcode == ISD::MUL && SplatStepVal != 1) || - (StepOpcode == ISD::SHL && SplatStepVal != 0)) { - SDValue SplatStep = DAG.getSplatBuildVector( - VIDVT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT)); - VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep); - } - if (StepDenominator != 1) { - SDValue SplatStep = DAG.getSplatBuildVector( - VIDVT, DL, DAG.getConstant(Log2_64(StepDenominator), DL, XLenVT)); - VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep); - } - if (Addend != 0 || Negate) { - SDValue SplatAddend = DAG.getSplatBuildVector( - VIDVT, DL, DAG.getConstant(Addend, DL, XLenVT)); - VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend, - VID); - } - if (VT.isFloatingPoint()) { - // TODO: Use vfwcvt to reduce register pressure. - VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID); - } - return VID; - } - } - - // Attempt to detect "hidden" splats, which only reveal themselves as splats - // when re-interpreted as a vector with a larger element type. For example, - // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1 - // could be instead splat as - // v2i32 = build_vector i32 0x00010000, i32 0x00010000 - // TODO: This optimization could also work on non-constant splats, but it - // would require bit-manipulation instructions to construct the splat value. - SmallVector Sequence; - unsigned EltBitSize = VT.getScalarSizeInBits(); - const auto *BV = cast(Op); - if (VT.isInteger() && EltBitSize < 64 && - ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && - BV->getRepeatedSequence(Sequence) && - (Sequence.size() * EltBitSize) <= 64) { - unsigned SeqLen = Sequence.size(); - MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen); - MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, NumElts / SeqLen); - assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 || - ViaIntVT == MVT::i64) && - "Unexpected sequence type"); - - unsigned EltIdx = 0; - uint64_t EltMask = maskTrailingOnes(EltBitSize); - uint64_t SplatValue = 0; - // Construct the amalgamated value which can be splatted as this larger - // vector type. - for (const auto &SeqV : Sequence) { - if (!SeqV.isUndef()) - SplatValue |= ((cast(SeqV)->getZExtValue() & EltMask) - << (EltIdx * EltBitSize)); - EltIdx++; - } - - // On RV64, sign-extend from 32 to 64 bits where possible in order to - // achieve better constant materializion. - if (Subtarget.is64Bit() && ViaIntVT == MVT::i32) - SplatValue = SignExtend64<32>(SplatValue); - - // Since we can't introduce illegal i64 types at this stage, we can only - // perform an i64 splat on RV32 if it is its own sign-extended value. That - // way we can use RVV instructions to splat. - assert((ViaIntVT.bitsLE(XLenVT) || - (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) && - "Unexpected bitcast sequence"); - if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) { - SDValue ViaVL = - DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT); - MVT ViaContainerVT = - getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget); - SDValue Splat = - DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT, - DAG.getUNDEF(ViaContainerVT), - DAG.getConstant(SplatValue, DL, XLenVT), ViaVL); - Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget); - return DAG.getBitcast(VT, Splat); - } - } - - // Try and optimize BUILD_VECTORs with "dominant values" - these are values - // which constitute a large proportion of the elements. In such cases we can - // splat a vector with the dominant element and make up the shortfall with - // INSERT_VECTOR_ELTs. - // Note that this includes vectors of 2 elements by association. The - // upper-most element is the "dominant" one, allowing us to use a splat to - // "insert" the upper element, and an insert of the lower element at position - // 0, which improves codegen. - SDValue DominantValue; - unsigned MostCommonCount = 0; - DenseMap ValueCounts; - unsigned NumUndefElts = - count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); }); - - // Track the number of scalar loads we know we'd be inserting, estimated as - // any non-zero floating-point constant. Other kinds of element are either - // already in registers or are materialized on demand. The threshold at which - // a vector load is more desirable than several scalar materializion and - // vector-insertion instructions is not known. - unsigned NumScalarLoads = 0; - - for (SDValue V : Op->op_values()) { - if (V.isUndef()) - continue; - - ValueCounts.insert(std::make_pair(V, 0)); - unsigned &Count = ValueCounts[V]; - - if (auto *CFP = dyn_cast(V)) - NumScalarLoads += !CFP->isExactlyValue(+0.0); - - // Is this value dominant? In case of a tie, prefer the highest element as - // it's cheaper to insert near the beginning of a vector than it is at the - // end. - if (++Count >= MostCommonCount) { - DominantValue = V; - MostCommonCount = Count; - } - } - - assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR"); - unsigned NumDefElts = NumElts - NumUndefElts; - unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2; - - // Don't perform this optimization when optimizing for size, since - // materializing elements and inserting them tends to cause code bloat. - if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts && - ((MostCommonCount > DominantValueCountThreshold) || - (ValueCounts.size() <= Log2_32(NumDefElts)))) { - // Start by splatting the most common element. - SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue); - - DenseSet Processed{DominantValue}; - MVT SelMaskTy = VT.changeVectorElementType(MVT::i1); - for (const auto &OpIdx : enumerate(Op->ops())) { - const SDValue &V = OpIdx.value(); - if (V.isUndef() || !Processed.insert(V).second) - continue; - if (ValueCounts[V] == 1) { - Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V, - DAG.getConstant(OpIdx.index(), DL, XLenVT)); - } else { - // Blend in all instances of this value using a VSELECT, using a - // mask where each bit signals whether that element is the one - // we're after. - SmallVector Ops; - transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) { - return DAG.getConstant(V == V1, DL, XLenVT); - }); - Vec = DAG.getNode(ISD::VSELECT, DL, VT, - DAG.getBuildVector(SelMaskTy, DL, Ops), - DAG.getSplatBuildVector(VT, DL, V), Vec); - } - } - - return Vec; - } - - return SDValue(); -} - -static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, - SDValue Lo, SDValue Hi, SDValue VL, - SelectionDAG &DAG) { - if (!Passthru) - Passthru = DAG.getUNDEF(VT); - if (isa(Lo) && isa(Hi)) { - int32_t LoC = cast(Lo)->getSExtValue(); - int32_t HiC = cast(Hi)->getSExtValue(); - // If Hi constant is all the same sign bit as Lo, lower this as a custom - // node in order to try and match RVV vector/scalar instructions. - if ((LoC >> 31) == HiC) - return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL); - - // If vl is equal to XLEN_MAX and Hi constant is equal to Lo, we could use - // vmv.v.x whose EEW = 32 to lower it. - auto *Const = dyn_cast(VL); - if (LoC == HiC && Const && Const->isAllOnesValue()) { - MVT InterVT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2); - // TODO: if vl <= min(VLMAX), we can also do this. But we could not - // access the subtarget here now. - auto InterVec = DAG.getNode( - RISCVISD::VMV_V_X_VL, DL, InterVT, DAG.getUNDEF(InterVT), Lo, - DAG.getRegister(RISCV::X0, MVT::i32)); - return DAG.getNode(ISD::BITCAST, DL, VT, InterVec); - } - } - - // Fall back to a stack store and stride x0 vector load. - return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo, - Hi, VL); -} - -// Called by type legalization to handle splat of i64 on RV32. -// FIXME: We can optimize this when the type has sign or zero bits in one -// of the halves. -static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, - SDValue Scalar, SDValue VL, - SelectionDAG &DAG) { - assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!"); - SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar, - DAG.getConstant(0, DL, MVT::i32)); - SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar, - DAG.getConstant(1, DL, MVT::i32)); - return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG); -} - -// This function lowers a splat of a scalar operand Splat with the vector -// length VL. It ensures the final sequence is type legal, which is useful when -// lowering a splat after type legalization. -static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, - MVT VT, SDLoc DL, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { - bool HasPassthru = Passthru && !Passthru.isUndef(); - if (!HasPassthru && !Passthru) - Passthru = DAG.getUNDEF(VT); - if (VT.isFloatingPoint()) { - // If VL is 1, we could use vfmv.s.f. - if (isOneConstant(VL)) - return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL); - return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL); - } - - MVT XLenVT = Subtarget.getXLenVT(); - - // Simplest case is that the operand needs to be promoted to XLenVT. - if (Scalar.getValueType().bitsLE(XLenVT)) { - // If the operand is a constant, sign extend to increase our chances - // of being able to use a .vi instruction. ANY_EXTEND would become a - // a zero extend and the simm5 check in isel would fail. - // FIXME: Should we ignore the upper bits in isel instead? - unsigned ExtOpc = - isa(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; - Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar); - ConstantSDNode *Const = dyn_cast(Scalar); - // If VL is 1 and the scalar value won't benefit from immediate, we could - // use vmv.s.x. - if (isOneConstant(VL) && - (!Const || isNullConstant(Scalar) || !isInt<5>(Const->getSExtValue()))) - return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL); - return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL); - } - - assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 && - "Unexpected scalar for splat lowering!"); - - if (isOneConstant(VL) && isNullConstant(Scalar)) - return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, - DAG.getConstant(0, DL, XLenVT), VL); - - // Otherwise use the more complicated splatting algorithm. - return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG); -} - -static bool isInterleaveShuffle(ArrayRef Mask, MVT VT, bool &SwapSources, - const RISCVSubtarget &Subtarget) { - // We need to be able to widen elements to the next larger integer type. - if (VT.getScalarSizeInBits() >= Subtarget.getELEN()) - return false; - - int Size = Mask.size(); - assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size"); - - int Srcs[] = {-1, -1}; - for (int i = 0; i != Size; ++i) { - // Ignore undef elements. - if (Mask[i] < 0) - continue; - - // Is this an even or odd element. - int Pol = i % 2; - - // Ensure we consistently use the same source for this element polarity. - int Src = Mask[i] / Size; - if (Srcs[Pol] < 0) - Srcs[Pol] = Src; - if (Srcs[Pol] != Src) - return false; - - // Make sure the element within the source is appropriate for this element - // in the destination. - int Elt = Mask[i] % Size; - if (Elt != i / 2) - return false; - } - - // We need to find a source for each polarity and they can't be the same. - if (Srcs[0] < 0 || Srcs[1] < 0 || Srcs[0] == Srcs[1]) - return false; - - // Swap the sources if the second source was in the even polarity. - SwapSources = Srcs[0] > Srcs[1]; - - return true; -} - -/// Match shuffles that concatenate two vectors, rotate the concatenation, -/// and then extract the original number of elements from the rotated result. -/// This is equivalent to vector.splice or X86's PALIGNR instruction. The -/// returned rotation amount is for a rotate right, where elements move from -/// higher elements to lower elements. \p LoSrc indicates the first source -/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector -/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be -/// 0 or 1 if a rotation is found. -/// -/// NOTE: We talk about rotate to the right which matches how bit shift and -/// rotate instructions are described where LSBs are on the right, but LLVM IR -/// and the table below write vectors with the lowest elements on the left. -static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef Mask) { - int Size = Mask.size(); - - // We need to detect various ways of spelling a rotation: - // [11, 12, 13, 14, 15, 0, 1, 2] - // [-1, 12, 13, 14, -1, -1, 1, -1] - // [-1, -1, -1, -1, -1, -1, 1, 2] - // [ 3, 4, 5, 6, 7, 8, 9, 10] - // [-1, 4, 5, 6, -1, -1, 9, -1] - // [-1, 4, 5, 6, -1, -1, -1, -1] - int Rotation = 0; - LoSrc = -1; - HiSrc = -1; - for (int i = 0; i != Size; ++i) { - int M = Mask[i]; - if (M < 0) - continue; - - // Determine where a rotate vector would have started. - int StartIdx = i - (M % Size); - // The identity rotation isn't interesting, stop. - if (StartIdx == 0) - return -1; - - // If we found the tail of a vector the rotation must be the missing - // front. If we found the head of a vector, it must be how much of the - // head. - int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx; - - if (Rotation == 0) - Rotation = CandidateRotation; - else if (Rotation != CandidateRotation) - // The rotations don't match, so we can't match this mask. - return -1; - - // Compute which value this mask is pointing at. - int MaskSrc = M < Size ? 0 : 1; - - // Compute which of the two target values this index should be assigned to. - // This reflects whether the high elements are remaining or the low elemnts - // are remaining. - int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc; - - // Either set up this value if we've not encountered it before, or check - // that it remains consistent. - if (TargetSrc < 0) - TargetSrc = MaskSrc; - else if (TargetSrc != MaskSrc) - // This may be a rotation, but it pulls from the inputs in some - // unsupported interleaving. - return -1; - } - - // Check that we successfully analyzed the mask, and normalize the results. - assert(Rotation != 0 && "Failed to locate a viable rotation!"); - assert((LoSrc >= 0 || HiSrc >= 0) && - "Failed to find a rotated input vector!"); - - return Rotation; -} - -// Lower the following shuffles to vnsrl. -// t34: v8i8 = extract_subvector t11, Constant:i64<0> -// t33: v8i8 = extract_subvector t11, Constant:i64<8> -// a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33 -// b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33 -static SDValue lowerVECTOR_SHUFFLEAsVNSRL(const SDLoc &DL, MVT VT, - MVT ContainerVT, SDValue V1, - SDValue V2, SDValue TrueMask, - SDValue VL, ArrayRef Mask, - const RISCVSubtarget &Subtarget, - SelectionDAG &DAG) { - // Need to be able to widen the vector. - if (VT.getScalarSizeInBits() >= Subtarget.getELEN()) - return SDValue(); - - // Both input must be extracts. - if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR || - V2.getOpcode() != ISD::EXTRACT_SUBVECTOR) - return SDValue(); - - // Extracting from the same source. - SDValue Src = V1.getOperand(0); - if (Src != V2.getOperand(0)) - return SDValue(); - - // Src needs to have twice the number of elements. - if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2)) - return SDValue(); - - // The extracts must extract the two halves of the source. - if (V1.getConstantOperandVal(1) != 0 || - V2.getConstantOperandVal(1) != Mask.size()) - return SDValue(); - - // First index must be the first even or odd element from V1. - if (Mask[0] != 0 && Mask[0] != 1) - return SDValue(); - - // The others must increase by 2 each time. - // TODO: Support undef elements? - for (unsigned i = 1; i != Mask.size(); ++i) - if (Mask[i] != Mask[i - 1] + 2) - return SDValue(); - - // Convert the source using a container type with twice the elements. Since - // source VT is legal and twice this VT, we know VT isn't LMUL=8 so it is - // safe to double. - MVT DoubleContainerVT = - MVT::getVectorVT(ContainerVT.getVectorElementType(), - ContainerVT.getVectorElementCount() * 2); - Src = convertToScalableVector(DoubleContainerVT, Src, DAG, Subtarget); - - // Convert the vector to a wider integer type with the original element - // count. This also converts FP to int. - unsigned EltBits = ContainerVT.getScalarSizeInBits(); - MVT WideIntEltVT = MVT::getIntegerVT(EltBits * 2); - MVT WideIntContainerVT = - MVT::getVectorVT(WideIntEltVT, ContainerVT.getVectorElementCount()); - Src = DAG.getBitcast(WideIntContainerVT, Src); - - // Convert to the integer version of the container type. - MVT IntEltVT = MVT::getIntegerVT(EltBits); - MVT IntContainerVT = - MVT::getVectorVT(IntEltVT, ContainerVT.getVectorElementCount()); - - // If we want even elements, then the shift amount is 0. Otherwise, shift by - // the original element size. - unsigned Shift = Mask[0] == 0 ? 0 : EltBits; - SDValue SplatShift = DAG.getNode( - RISCVISD::VMV_V_X_VL, DL, IntContainerVT, DAG.getUNDEF(ContainerVT), - DAG.getConstant(Shift, DL, Subtarget.getXLenVT()), VL); - SDValue Res = - DAG.getNode(RISCVISD::VNSRL_VL, DL, IntContainerVT, Src, SplatShift, - DAG.getUNDEF(IntContainerVT), TrueMask, VL); - // Cast back to FP if needed. - Res = DAG.getBitcast(ContainerVT, Res); - - return convertFromScalableVector(VT, Res, DAG, Subtarget); -} - -// Lower the following shuffle to vslidedown. -// a) -// t49: v8i8 = extract_subvector t13, Constant:i64<0> -// t109: v8i8 = extract_subvector t13, Constant:i64<8> -// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106 -// b) -// t69: v16i16 = extract_subvector t68, Constant:i64<0> -// t23: v8i16 = extract_subvector t69, Constant:i64<0> -// t29: v4i16 = extract_subvector t23, Constant:i64<4> -// t26: v8i16 = extract_subvector t69, Constant:i64<8> -// t30: v4i16 = extract_subvector t26, Constant:i64<0> -// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30 -static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, - SDValue V1, SDValue V2, - ArrayRef Mask, - const RISCVSubtarget &Subtarget, - SelectionDAG &DAG) { - auto findNonEXTRACT_SUBVECTORParent = - [](SDValue Parent) -> std::pair { - uint64_t Offset = 0; - while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR && - // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from - // a scalable vector. But we don't want to match the case. - Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) { - Offset += Parent.getConstantOperandVal(1); - Parent = Parent.getOperand(0); - } - return std::make_pair(Parent, Offset); - }; - - auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1); - auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2); - - // Extracting from the same source. - SDValue Src = V1Src; - if (Src != V2Src) - return SDValue(); - - // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs. - SmallVector NewMask(Mask); - for (size_t i = 0; i != NewMask.size(); ++i) { - if (NewMask[i] == -1) - continue; - - if (static_cast(NewMask[i]) < NewMask.size()) { - NewMask[i] = NewMask[i] + V1IndexOffset; - } else { - // Minus NewMask.size() is needed. Otherwise, the b case would be - // <5,6,7,12> instead of <5,6,7,8>. - NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset; - } - } - - // First index must be known and non-zero. It will be used as the slidedown - // amount. - if (NewMask[0] <= 0) - return SDValue(); - - // NewMask is also continuous. - for (unsigned i = 1; i != NewMask.size(); ++i) - if (NewMask[i - 1] + 1 != NewMask[i]) - return SDValue(); - - MVT XLenVT = Subtarget.getXLenVT(); - MVT SrcVT = Src.getSimpleValueType(); - MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget); - auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); - SDValue Slidedown = DAG.getNode( - RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT), - convertToScalableVector(ContainerVT, Src, DAG, Subtarget), - DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL); - return DAG.getNode( - ISD::EXTRACT_SUBVECTOR, DL, VT, - convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget), - DAG.getConstant(0, DL, XLenVT)); -} - -static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { - SDValue V1 = Op.getOperand(0); - SDValue V2 = Op.getOperand(1); - SDLoc DL(Op); - MVT XLenVT = Subtarget.getXLenVT(); - MVT VT = Op.getSimpleValueType(); - unsigned NumElts = VT.getVectorNumElements(); - ShuffleVectorSDNode *SVN = cast(Op.getNode()); - - MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); - - auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); - - if (SVN->isSplat()) { - const int Lane = SVN->getSplatIndex(); - if (Lane >= 0) { - MVT SVT = VT.getVectorElementType(); - - // Turn splatted vector load into a strided load with an X0 stride. - SDValue V = V1; - // Peek through CONCAT_VECTORS as VectorCombine can concat a vector - // with undef. - // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts? - int Offset = Lane; - if (V.getOpcode() == ISD::CONCAT_VECTORS) { - int OpElements = - V.getOperand(0).getSimpleValueType().getVectorNumElements(); - V = V.getOperand(Offset / OpElements); - Offset %= OpElements; - } - - // We need to ensure the load isn't atomic or volatile. - if (ISD::isNormalLoad(V.getNode()) && cast(V)->isSimple()) { - auto *Ld = cast(V); - Offset *= SVT.getStoreSize(); - SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), - TypeSize::Fixed(Offset), DL); - - // If this is SEW=64 on RV32, use a strided load with a stride of x0. - if (SVT.isInteger() && SVT.bitsGT(XLenVT)) { - SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); - SDValue IntID = - DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT); - SDValue Ops[] = {Ld->getChain(), - IntID, - DAG.getUNDEF(ContainerVT), - NewAddr, - DAG.getRegister(RISCV::X0, XLenVT), - VL}; - SDValue NewLoad = DAG.getMemIntrinsicNode( - ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT, - DAG.getMachineFunction().getMachineMemOperand( - Ld->getMemOperand(), Offset, SVT.getStoreSize())); - DAG.makeEquivalentMemoryOrdering(Ld, NewLoad); - return convertFromScalableVector(VT, NewLoad, DAG, Subtarget); - } - - // Otherwise use a scalar load and splat. This will give the best - // opportunity to fold a splat into the operation. ISel can turn it into - // the x0 strided load if we aren't able to fold away the select. - if (SVT.isFloatingPoint()) - V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr, - Ld->getPointerInfo().getWithOffset(Offset), - Ld->getOriginalAlign(), - Ld->getMemOperand()->getFlags()); - else - V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr, - Ld->getPointerInfo().getWithOffset(Offset), SVT, - Ld->getOriginalAlign(), - Ld->getMemOperand()->getFlags()); - DAG.makeEquivalentMemoryOrdering(Ld, V); - - unsigned Opc = - VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL; - SDValue Splat = - DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), V, VL); - return convertFromScalableVector(VT, Splat, DAG, Subtarget); - } - - V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); - assert(Lane < (int)NumElts && "Unexpected lane!"); - SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, - V1, DAG.getConstant(Lane, DL, XLenVT), - DAG.getUNDEF(ContainerVT), TrueMask, VL); - return convertFromScalableVector(VT, Gather, DAG, Subtarget); - } - } - - ArrayRef Mask = SVN->getMask(); - - if (SDValue V = - lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG)) - return V; - - // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may - // be undef which can be handled with a single SLIDEDOWN/UP. - int LoSrc, HiSrc; - int Rotation = isElementRotate(LoSrc, HiSrc, Mask); - if (Rotation > 0) { - SDValue LoV, HiV; - if (LoSrc >= 0) { - LoV = LoSrc == 0 ? V1 : V2; - LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget); - } - if (HiSrc >= 0) { - HiV = HiSrc == 0 ? V1 : V2; - HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget); - } - - // We found a rotation. We need to slide HiV down by Rotation. Then we need - // to slide LoV up by (NumElts - Rotation). - unsigned InvRotate = NumElts - Rotation; - - SDValue Res = DAG.getUNDEF(ContainerVT); - if (HiV) { - // If we are doing a SLIDEDOWN+SLIDEUP, reduce the VL for the SLIDEDOWN. - // FIXME: If we are only doing a SLIDEDOWN, don't reduce the VL as it - // causes multiple vsetvlis in some test cases such as lowering - // reduce.mul - SDValue DownVL = VL; - if (LoV) - DownVL = DAG.getConstant(InvRotate, DL, XLenVT); - Res = - DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, Res, HiV, - DAG.getConstant(Rotation, DL, XLenVT), TrueMask, DownVL); - } - if (LoV) - Res = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Res, LoV, - DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL); - - return convertFromScalableVector(VT, Res, DAG, Subtarget); - } - - if (SDValue V = lowerVECTOR_SHUFFLEAsVNSRL( - DL, VT, ContainerVT, V1, V2, TrueMask, VL, Mask, Subtarget, DAG)) - return V; - - // Detect an interleave shuffle and lower to - // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1)) - bool SwapSources; - if (isInterleaveShuffle(Mask, VT, SwapSources, Subtarget)) { - // Swap sources if needed. - if (SwapSources) - std::swap(V1, V2); - - // Extract the lower half of the vectors. - MVT HalfVT = VT.getHalfNumVectorElementsVT(); - V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1, - DAG.getConstant(0, DL, XLenVT)); - V2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V2, - DAG.getConstant(0, DL, XLenVT)); - - // Double the element width and halve the number of elements in an int type. - unsigned EltBits = VT.getScalarSizeInBits(); - MVT WideIntEltVT = MVT::getIntegerVT(EltBits * 2); - MVT WideIntVT = - MVT::getVectorVT(WideIntEltVT, VT.getVectorNumElements() / 2); - // Convert this to a scalable vector. We need to base this on the - // destination size to ensure there's always a type with a smaller LMUL. - MVT WideIntContainerVT = - getContainerForFixedLengthVector(DAG, WideIntVT, Subtarget); - - // Convert sources to scalable vectors with the same element count as the - // larger type. - MVT HalfContainerVT = MVT::getVectorVT( - VT.getVectorElementType(), WideIntContainerVT.getVectorElementCount()); - V1 = convertToScalableVector(HalfContainerVT, V1, DAG, Subtarget); - V2 = convertToScalableVector(HalfContainerVT, V2, DAG, Subtarget); - - // Cast sources to integer. - MVT IntEltVT = MVT::getIntegerVT(EltBits); - MVT IntHalfVT = - MVT::getVectorVT(IntEltVT, HalfContainerVT.getVectorElementCount()); - V1 = DAG.getBitcast(IntHalfVT, V1); - V2 = DAG.getBitcast(IntHalfVT, V2); - - // Freeze V2 since we use it twice and we need to be sure that the add and - // multiply see the same value. - V2 = DAG.getFreeze(V2); - - // Recreate TrueMask using the widened type's element count. - TrueMask = getAllOnesMask(HalfContainerVT, VL, DL, DAG); - - // Widen V1 and V2 with 0s and add one copy of V2 to V1. - SDValue Add = - DAG.getNode(RISCVISD::VWADDU_VL, DL, WideIntContainerVT, V1, V2, - DAG.getUNDEF(WideIntContainerVT), TrueMask, VL); - // Create 2^eltbits - 1 copies of V2 by multiplying by the largest integer. - SDValue Multiplier = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntHalfVT, - DAG.getUNDEF(IntHalfVT), - DAG.getAllOnesConstant(DL, XLenVT), VL); - SDValue WidenMul = - DAG.getNode(RISCVISD::VWMULU_VL, DL, WideIntContainerVT, V2, Multiplier, - DAG.getUNDEF(WideIntContainerVT), TrueMask, VL); - // Add the new copies to our previous addition giving us 2^eltbits copies of - // V2. This is equivalent to shifting V2 left by eltbits. This should - // combine with the vwmulu.vv above to form vwmaccu.vv. - Add = DAG.getNode(RISCVISD::ADD_VL, DL, WideIntContainerVT, Add, WidenMul, - DAG.getUNDEF(WideIntContainerVT), TrueMask, VL); - // Cast back to ContainerVT. We need to re-create a new ContainerVT in case - // WideIntContainerVT is a larger fractional LMUL than implied by the fixed - // vector VT. - ContainerVT = - MVT::getVectorVT(VT.getVectorElementType(), - WideIntContainerVT.getVectorElementCount() * 2); - Add = DAG.getBitcast(ContainerVT, Add); - return convertFromScalableVector(VT, Add, DAG, Subtarget); - } - - // Detect shuffles which can be re-expressed as vector selects; these are - // shuffles in which each element in the destination is taken from an element - // at the corresponding index in either source vectors. - bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) { - int MaskIndex = MaskIdx.value(); - return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts; - }); - - assert(!V1.isUndef() && "Unexpected shuffle canonicalization"); - - SmallVector MaskVals; - // As a backup, shuffles can be lowered via a vrgather instruction, possibly - // merged with a second vrgather. - SmallVector GatherIndicesLHS, GatherIndicesRHS; - - // By default we preserve the original operand order, and use a mask to - // select LHS as true and RHS as false. However, since RVV vector selects may - // feature splats but only on the LHS, we may choose to invert our mask and - // instead select between RHS and LHS. - bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1); - bool InvertMask = IsSelect == SwapOps; - - // Keep a track of which non-undef indices are used by each LHS/RHS shuffle - // half. - DenseMap LHSIndexCounts, RHSIndexCounts; - - // Now construct the mask that will be used by the vselect or blended - // vrgather operation. For vrgathers, construct the appropriate indices into - // each vector. - for (int MaskIndex : Mask) { - bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask; - MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT)); - if (!IsSelect) { - bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts; - GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0 - ? DAG.getConstant(MaskIndex, DL, XLenVT) - : DAG.getUNDEF(XLenVT)); - GatherIndicesRHS.push_back( - IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT) - : DAG.getConstant(MaskIndex - NumElts, DL, XLenVT)); - if (IsLHSOrUndefIndex && MaskIndex >= 0) - ++LHSIndexCounts[MaskIndex]; - if (!IsLHSOrUndefIndex) - ++RHSIndexCounts[MaskIndex - NumElts]; - } - } - - if (SwapOps) { - std::swap(V1, V2); - std::swap(GatherIndicesLHS, GatherIndicesRHS); - } - - assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle"); - MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts); - SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals); - - if (IsSelect) - return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2); - - if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) { - // On such a large vector we're unable to use i8 as the index type. - // FIXME: We could promote the index to i16 and use vrgatherei16, but that - // may involve vector splitting if we're already at LMUL=8, or our - // user-supplied maximum fixed-length LMUL. - return SDValue(); - } - - unsigned GatherVXOpc = RISCVISD::VRGATHER_VX_VL; - unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL; - MVT IndexVT = VT.changeTypeToInteger(); - // Since we can't introduce illegal index types at this stage, use i16 and - // vrgatherei16 if the corresponding index type for plain vrgather is greater - // than XLenVT. - if (IndexVT.getScalarType().bitsGT(XLenVT)) { - GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL; - IndexVT = IndexVT.changeVectorElementType(MVT::i16); - } - - MVT IndexContainerVT = - ContainerVT.changeVectorElementType(IndexVT.getScalarType()); - - SDValue Gather; - // TODO: This doesn't trigger for i64 vectors on RV32, since there we - // encounter a bitcasted BUILD_VECTOR with low/high i32 values. - if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) { - Gather = lowerScalarSplat(SDValue(), SplatValue, VL, ContainerVT, DL, DAG, - Subtarget); - } else { - V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); - // If only one index is used, we can use a "splat" vrgather. - // TODO: We can splat the most-common index and fix-up any stragglers, if - // that's beneficial. - if (LHSIndexCounts.size() == 1) { - int SplatIndex = LHSIndexCounts.begin()->getFirst(); - Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V1, - DAG.getConstant(SplatIndex, DL, XLenVT), - DAG.getUNDEF(ContainerVT), TrueMask, VL); - } else { - SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS); - LHSIndices = - convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget); - - Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices, - DAG.getUNDEF(ContainerVT), TrueMask, VL); - } - } - - // If a second vector operand is used by this shuffle, blend it in with an - // additional vrgather. - if (!V2.isUndef()) { - V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget); - - MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1); - SelectMask = - convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget); - - // If only one index is used, we can use a "splat" vrgather. - // TODO: We can splat the most-common index and fix-up any stragglers, if - // that's beneficial. - if (RHSIndexCounts.size() == 1) { - int SplatIndex = RHSIndexCounts.begin()->getFirst(); - Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2, - DAG.getConstant(SplatIndex, DL, XLenVT), Gather, - SelectMask, VL); - } else { - SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS); - RHSIndices = - convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget); - Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, Gather, - SelectMask, VL); - } - } - - return convertFromScalableVector(VT, Gather, DAG, Subtarget); -} - -bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef M, EVT VT) const { - // Support splats for any type. These should type legalize well. - if (ShuffleVectorSDNode::isSplatMask(M.data(), VT)) - return true; - - // Only support legal VTs for other shuffles for now. - if (!isTypeLegal(VT)) - return false; - - MVT SVT = VT.getSimpleVT(); - - bool SwapSources; - int LoSrc, HiSrc; - return (isElementRotate(LoSrc, HiSrc, M) > 0) || - isInterleaveShuffle(M, SVT, SwapSources, Subtarget); -} - // Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting // the exponent. static SDValue lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) { @@ -2805,66 +1479,6 @@ static SDValue lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Trunc); } -// While RVV has alignment restrictions, we should always be able to load as a -// legal equivalently-sized byte-typed vector instead. This method is -// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If -// the load is already correctly-aligned, it returns SDValue(). -SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op, - SelectionDAG &DAG) const { - auto *Load = cast(Op); - assert(Load && Load->getMemoryVT().isVector() && "Expected vector load"); - - if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), - Load->getMemoryVT(), - *Load->getMemOperand())) - return SDValue(); - - SDLoc DL(Op); - MVT VT = Op.getSimpleValueType(); - unsigned EltSizeBits = VT.getScalarSizeInBits(); - assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) && - "Unexpected unaligned RVV load type"); - MVT NewVT = - MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8)); - assert(NewVT.isValid() && - "Expecting equally-sized RVV vector types to be legal"); - SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(), - Load->getPointerInfo(), Load->getOriginalAlign(), - Load->getMemOperand()->getFlags()); - return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL); -} - -// While RVV has alignment restrictions, we should always be able to store as a -// legal equivalently-sized byte-typed vector instead. This method is -// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It -// returns SDValue() if the store is already correctly aligned. -SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op, - SelectionDAG &DAG) const { - auto *Store = cast(Op); - assert(Store && Store->getValue().getValueType().isVector() && - "Expected vector store"); - - if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), - Store->getMemoryVT(), - *Store->getMemOperand())) - return SDValue(); - - SDLoc DL(Op); - SDValue StoredVal = Store->getValue(); - MVT VT = StoredVal.getSimpleValueType(); - unsigned EltSizeBits = VT.getScalarSizeInBits(); - assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) && - "Unexpected unaligned RVV store type"); - MVT NewVT = - MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8)); - assert(NewVT.isValid() && - "Expecting equally-sized RVV vector types to be legal"); - StoredVal = DAG.getBitcast(NewVT, StoredVal); - return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(), - Store->getPointerInfo(), Store->getOriginalAlign(), - Store->getMemOperand()->getFlags()); -} - static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { assert(Op.getValueType() == MVT::i64 && "Unexpected VT"); @@ -2951,46 +1565,11 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return SDValue(); } - assert(!VT.isScalableVector() && !Op0VT.isScalableVector() && - "Unexpected types"); - - if (VT.isFixedLengthVector()) { - // We can handle fixed length vector bitcasts with a simple replacement - // in isel. - if (Op0VT.isFixedLengthVector()) - return Op; - // When bitcasting from scalar to fixed-length vector, insert the scalar - // into a one-element vector of the result type, and perform a vector - // bitcast. - if (!Op0VT.isVector()) { - EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1); - if (!isTypeLegal(BVT)) - return SDValue(); - return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT, - DAG.getUNDEF(BVT), Op0, - DAG.getConstant(0, DL, XLenVT))); - } - return SDValue(); - } - // Custom-legalize bitcasts from fixed-length vector types to scalar types - // thus: bitcast the vector to a one-element vector type whose element type - // is the same as the result type, and extract the first element. - if (!VT.isVector() && Op0VT.isFixedLengthVector()) { - EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1); - if (!isTypeLegal(BVT)) - return SDValue(); - SDValue BVec = DAG.getBitcast(BVT, Op0); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec, - DAG.getConstant(0, DL, XLenVT)); - } + assert(0 && "TODO: vALU!!"); return SDValue(); } case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); - case ISD::INTRINSIC_W_CHAIN: - return LowerINTRINSIC_W_CHAIN(Op, DAG); - case ISD::INTRINSIC_VOID: - return LowerINTRINSIC_VOID(Op, DAG); case ISD::BITREVERSE: { MVT VT = Op.getSimpleValueType(); SDLoc DL(Op); @@ -3000,39 +1579,6 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0)); return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap); } - case ISD::VSCALE: { - MVT VT = Op.getSimpleValueType(); - SDLoc DL(Op); - SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT); - // We define our scalable vector types for lmul=1 to use a 32 bit known - // minimum size. e.g. . VLENB is in bytes so we calculate - // vscale as VLENB / 8. - static_assert(RISCV::RVVBitsPerBlock == 32, "Unexpected bits per block!"); - if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock) - report_fatal_error("Support for VLEN==32 is incomplete."); - // We assume VLENB is a multiple of 8. We manually choose the best shift - // here because SimplifyDemandedBits isn't always able to simplify it. - uint64_t Val = Op.getConstantOperandVal(0); - if (isPowerOf2_64(Val)) { - uint64_t Log2 = Log2_64(Val); - if (Log2 < 3) - return DAG.getNode(ISD::SRL, DL, VT, VLENB, - DAG.getConstant(3 - Log2, DL, VT)); - if (Log2 > 3) - return DAG.getNode(ISD::SHL, DL, VT, VLENB, - DAG.getConstant(Log2 - 3, DL, VT)); - return VLENB; - } - // If the multiplier is a multiple of 8, scale it down to avoid needing - // to shift the VLENB value. - if ((Val % 8) == 0) - return DAG.getNode(ISD::MUL, DL, VT, VLENB, - DAG.getConstant(Val / 8, DL, VT)); - - SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB, - DAG.getConstant(3, DL, VT)); - return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0)); - } case ISD::FPOWI: { // Custom promote f16 powi with illegal i32 integer type on RV64. Once // promoted this will be legalized into a libcall by LegalizeIntegerTypes. @@ -3076,17 +1622,6 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return lowerSET_ROUNDING(Op, DAG); case ISD::EH_DWARF_CFA: return lowerEH_DWARF_CFA(Op, DAG); - case ISD::VP_FP_EXTEND: - case ISD::VP_FP_ROUND: - return lowerVectorFPExtendOrRoundLike(Op, DAG); - case ISD::VP_FCEIL: - case ISD::VP_FFLOOR: - case ISD::VP_FRINT: - case ISD::VP_FNEARBYINT: - case ISD::VP_FROUND: - case ISD::VP_FROUNDEVEN: - case ISD::VP_FROUNDTOZERO: - return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); } } @@ -3591,263 +2126,6 @@ SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, return DAG.getMergeValues(Parts, DL); } -SDValue -RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op, - SelectionDAG &DAG) const { - bool IsVP = - Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND; - bool IsExtend = - Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND; - // RVV can only do truncate fp to types half the size as the source. We - // custom-lower f64->f16 rounds via RVV's round-to-odd float - // conversion instruction. - SDLoc DL(Op); - MVT VT = Op.getSimpleValueType(); - - assert(VT.isVector() && "Unexpected type for vector truncate lowering"); - - SDValue Src = Op.getOperand(0); - MVT SrcVT = Src.getSimpleValueType(); - - bool IsDirectExtend = IsExtend && (VT.getVectorElementType() != MVT::f64 || - SrcVT.getVectorElementType() != MVT::f16); - bool IsDirectTrunc = !IsExtend && (VT.getVectorElementType() != MVT::f16 || - SrcVT.getVectorElementType() != MVT::f64); - - bool IsDirectConv = IsDirectExtend || IsDirectTrunc; - - // Prepare any fixed-length vector operands. - MVT ContainerVT = VT; - SDValue Mask, VL; - if (IsVP) { - Mask = Op.getOperand(1); - VL = Op.getOperand(2); - } - if (VT.isFixedLengthVector()) { - MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT); - ContainerVT = - SrcContainerVT.changeVectorElementType(VT.getVectorElementType()); - Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); - if (IsVP) { - MVT MaskVT = getMaskTypeFor(ContainerVT); - Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); - } - } - - if (!IsVP) - std::tie(Mask, VL) = - getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); - - unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL; - - if (IsDirectConv) { - Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL); - if (VT.isFixedLengthVector()) - Src = convertFromScalableVector(VT, Src, DAG, Subtarget); - return Src; - } - - unsigned InterConvOpc = - IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::VFNCVT_ROD_VL; - - MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32); - SDValue IntermediateConv = - DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL); - SDValue Result = - DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL); - if (VT.isFixedLengthVector()) - return convertFromScalableVector(VT, Result, DAG, Subtarget); - return Result; -} - -// Some RVV intrinsics may claim that they want an integer operand to be -// promoted or expanded. -static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { - assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || - Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) && - "Unexpected opcode"); - - if (!Subtarget.hasVInstructions()) - return SDValue(); - - bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN; - unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0); - SDLoc DL(Op); - - const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = - RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo); - if (!II || !II->hasScalarOperand()) - return SDValue(); - - unsigned SplatOp = II->ScalarOperand + 1 + HasChain; - assert(SplatOp < Op.getNumOperands()); - - SmallVector Operands(Op->op_begin(), Op->op_end()); - SDValue &ScalarOp = Operands[SplatOp]; - MVT OpVT = ScalarOp.getSimpleValueType(); - MVT XLenVT = Subtarget.getXLenVT(); - - // If this isn't a scalar, or its type is XLenVT we're done. - if (!OpVT.isScalarInteger() || OpVT == XLenVT) - return SDValue(); - - // Simplest case is that the operand needs to be promoted to XLenVT. - if (OpVT.bitsLT(XLenVT)) { - // If the operand is a constant, sign extend to increase our chances - // of being able to use a .vi instruction. ANY_EXTEND would become a - // a zero extend and the simm5 check in isel would fail. - // FIXME: Should we ignore the upper bits in isel instead? - unsigned ExtOpc = - isa(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; - ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp); - return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); - } - - // Use the previous operand to get the vXi64 VT. The result might be a mask - // VT for compares. Using the previous operand assumes that the previous - // operand will never have a smaller element size than a scalar operand and - // that a widening operation never uses SEW=64. - // NOTE: If this fails the below assert, we can probably just find the - // element count from any operand or result and use it to construct the VT. - assert(II->ScalarOperand > 0 && "Unexpected splat operand!"); - MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType(); - - // The more complex case is when the scalar is larger than XLenVT. - assert(XLenVT == MVT::i32 && OpVT == MVT::i64 && - VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!"); - - // If this is a sign-extended 32-bit value, we can truncate it and rely on the - // instruction to sign-extend since SEW>XLEN. - if (DAG.ComputeNumSignBits(ScalarOp) > 32) { - ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp); - return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); - } - - switch (IntNo) { - case Intrinsic::riscv_vslide1up: - case Intrinsic::riscv_vslide1down: - case Intrinsic::riscv_vslide1up_mask: - case Intrinsic::riscv_vslide1down_mask: { - // We need to special case these when the scalar is larger than XLen. - unsigned NumOps = Op.getNumOperands(); - bool IsMasked = NumOps == 7; - - // Convert the vector source to the equivalent nxvXi32 vector. - MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2); - SDValue Vec = DAG.getBitcast(I32VT, Operands[2]); - - SDValue ScalarLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, ScalarOp, - DAG.getConstant(0, DL, XLenVT)); - SDValue ScalarHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, ScalarOp, - DAG.getConstant(1, DL, XLenVT)); - - // Double the VL since we halved SEW. - SDValue AVL = getVLOperand(Op); - SDValue I32VL; - - // Optimize for constant AVL - if (isa(AVL)) { - unsigned EltSize = VT.getScalarSizeInBits(); - unsigned MinSize = VT.getSizeInBits().getKnownMinValue(); - - unsigned VectorBitsMax = Subtarget.getRealMaxVLen(); - unsigned MaxVLMAX = - RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize); - - unsigned VectorBitsMin = Subtarget.getRealMinVLen(); - unsigned MinVLMAX = - RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize); - - uint64_t AVLInt = cast(AVL)->getZExtValue(); - if (AVLInt <= MinVLMAX) { - I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT); - } else if (AVLInt >= 2 * MaxVLMAX) { - // Just set vl to VLMAX in this situation - RISCVII::VLMUL Lmul = 1; - SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT); - unsigned Sew = RISCVVType::encodeSEW(I32VT.getScalarSizeInBits()); - SDValue SEW = DAG.getConstant(Sew, DL, XLenVT); - SDValue SETVLMAX = DAG.getTargetConstant( - Intrinsic::riscv_vsetvlimax_opt, DL, MVT::i32); - I32VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVLMAX, SEW, - LMUL); - } else { - // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl - // is related to the hardware implementation. - // So let the following code handle - } - } - if (!I32VL) { - RISCVII::VLMUL Lmul = 1; - SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT); - unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits()); - SDValue SEW = DAG.getConstant(Sew, DL, XLenVT); - SDValue SETVL = - DAG.getTargetConstant(Intrinsic::riscv_vsetvli_opt, DL, MVT::i32); - // Using vsetvli instruction to get actually used length which related to - // the hardware implementation - SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL, - SEW, LMUL); - I32VL = - DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT)); - } - - SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG); - - // Shift the two scalar parts in using SEW=32 slide1up/slide1down - // instructions. - SDValue Passthru; - if (IsMasked) - Passthru = DAG.getUNDEF(I32VT); - else - Passthru = DAG.getBitcast(I32VT, Operands[1]); - - if (IntNo == Intrinsic::riscv_vslide1up || - IntNo == Intrinsic::riscv_vslide1up_mask) { - Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec, - ScalarHi, I32Mask, I32VL); - Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec, - ScalarLo, I32Mask, I32VL); - } else { - Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec, - ScalarLo, I32Mask, I32VL); - Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec, - ScalarHi, I32Mask, I32VL); - } - - // Convert back to nxvXi64. - Vec = DAG.getBitcast(VT, Vec); - - if (!IsMasked) - return Vec; - // Apply mask after the operation. - SDValue Mask = Operands[NumOps - 3]; - SDValue MaskedOff = Operands[1]; - // Assume Policy operand is the last operand. - uint64_t Policy = - cast(Operands[NumOps - 1])->getZExtValue(); - // We don't need to select maskedoff if it's undef. - if (MaskedOff.isUndef()) - return Vec; - // TAMU - if (Policy == RISCVII::TAIL_AGNOSTIC) - return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff, - AVL); - // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma. - // It's fine because vmerge does not care mask policy. - return DAG.getNode(RISCVISD::VP_MERGE_VL, DL, VT, Mask, Vec, MaskedOff, - AVL); - } - } - - // We need to convert the scalar to a splat vector. - SDValue VL = getVLOperand(Op); - assert(VL.getValueType() == XLenVT); - ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG); - return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); -} - SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { unsigned IntNo = Op.getConstantOperandVal(0); @@ -3878,264 +2156,12 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(), Op.getOperand(1)); case Intrinsic::riscv_vmv_v_x: - return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2), - Op.getOperand(3), Op.getSimpleValueType(), DL, DAG, - Subtarget); + assert(0 && "sGPR to vGPR move!"); case Intrinsic::riscv_vfmv_v_f: return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(), Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); - case Intrinsic::riscv_vmv_s_x: { - SDValue Scalar = Op.getOperand(2); - - if (Scalar.getValueType().bitsLE(XLenVT)) { - Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar); - return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(), - Op.getOperand(1), Scalar, Op.getOperand(3)); - } - - assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!"); - - // This is an i64 value that lives in two scalar registers. We have to - // insert this in a convoluted way. First we build vXi64 splat containing - // the two values that we assemble using some bit math. Next we'll use - // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask - // to merge element 0 from our splat into the source vector. - // FIXME: This is probably not the best way to do this, but it is - // consistent with INSERT_VECTOR_ELT lowering so it is a good starting - // point. - // sw lo, (a0) - // sw hi, 4(a0) - // vlse vX, (a0) - // - // vid.v vVid - // vmseq.vx mMask, vVid, 0 - // vmerge.vvm vDest, vSrc, vVal, mMask - MVT VT = Op.getSimpleValueType(); - SDValue Vec = Op.getOperand(1); - SDValue VL = getVLOperand(Op); - - SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG); - if (Op.getOperand(1).isUndef()) - return SplattedVal; - SDValue SplattedIdx = - DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT), - DAG.getConstant(0, DL, MVT::i32), VL); - - MVT MaskVT = getMaskTypeFor(VT); - SDValue Mask = getAllOnesMask(VT, VL, DL, DAG); - SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL); - SDValue SelectCond = - DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, - {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ), - DAG.getUNDEF(MaskVT), Mask, VL}); - return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal, - Vec, VL); - } - } - - return lowerVectorIntrinsicScalars(Op, DAG, Subtarget); -} - -SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, - SelectionDAG &DAG) const { - unsigned IntNo = Op.getConstantOperandVal(1); - switch (IntNo) { - default: - break; - case Intrinsic::riscv_masked_strided_load: { - SDLoc DL(Op); - MVT XLenVT = Subtarget.getXLenVT(); - - // If the mask is known to be all ones, optimize to an unmasked intrinsic; - // the selection of the masked intrinsics doesn't do this for us. - SDValue Mask = Op.getOperand(5); - bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); - - MVT VT = Op->getSimpleValueType(0); - MVT ContainerVT = VT; - if (VT.isFixedLengthVector()) - ContainerVT = getContainerForFixedLengthVector(VT); - - SDValue PassThru = Op.getOperand(2); - if (!IsUnmasked) { - MVT MaskVT = getMaskTypeFor(ContainerVT); - if (VT.isFixedLengthVector()) { - Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); - PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget); - } - } - - auto *Load = cast(Op); - SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; - SDValue Ptr = Op.getOperand(3); - SDValue Stride = Op.getOperand(4); - SDValue Result, Chain; - - // TODO: We restrict this to unmasked loads currently in consideration of - // the complexity of hanlding all falses masks. - if (IsUnmasked && isNullConstant(Stride)) { - MVT ScalarVT = ContainerVT.getVectorElementType(); - SDValue ScalarLoad = - DAG.getExtLoad(ISD::ZEXTLOAD, DL, XLenVT, Load->getChain(), Ptr, - ScalarVT, Load->getMemOperand()); - Chain = ScalarLoad.getValue(1); - Result = lowerScalarSplat(SDValue(), ScalarLoad, VL, ContainerVT, DL, DAG, - Subtarget); - } else { - SDValue IntID = DAG.getTargetConstant( - IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL, - XLenVT); - - SmallVector Ops{Load->getChain(), IntID}; - if (IsUnmasked) - Ops.push_back(DAG.getUNDEF(ContainerVT)); - else - Ops.push_back(PassThru); - Ops.push_back(Ptr); - Ops.push_back(Stride); - if (!IsUnmasked) - Ops.push_back(Mask); - Ops.push_back(VL); - if (!IsUnmasked) { - SDValue Policy = - DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT); - Ops.push_back(Policy); - } - - SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); - Result = - DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, - Load->getMemoryVT(), Load->getMemOperand()); - Chain = Result.getValue(1); - } - if (VT.isFixedLengthVector()) - Result = convertFromScalableVector(VT, Result, DAG, Subtarget); - return DAG.getMergeValues({Result, Chain}, DL); - } - case Intrinsic::riscv_seg2_load: - case Intrinsic::riscv_seg3_load: - case Intrinsic::riscv_seg4_load: - case Intrinsic::riscv_seg5_load: - case Intrinsic::riscv_seg6_load: - case Intrinsic::riscv_seg7_load: - case Intrinsic::riscv_seg8_load: { - SDLoc DL(Op); - static const Intrinsic::ID VlsegInts[7] = { - Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3, - Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5, - Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7, - Intrinsic::riscv_vlseg8}; - unsigned NF = Op->getNumValues() - 1; - assert(NF >= 2 && NF <= 8 && "Unexpected seg number"); - MVT XLenVT = Subtarget.getXLenVT(); - MVT VT = Op->getSimpleValueType(0); - MVT ContainerVT = getContainerForFixedLengthVector(VT); - - SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget); - SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT); - auto *Load = cast(Op); - SmallVector ContainerVTs(NF, ContainerVT); - ContainerVTs.push_back(MVT::Other); - SDVTList VTs = DAG.getVTList(ContainerVTs); - SmallVector Ops = {Load->getChain(), IntID}; - Ops.insert(Ops.end(), NF, DAG.getUNDEF(ContainerVT)); - Ops.push_back(Op.getOperand(2)); - Ops.push_back(VL); - SDValue Result = - DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, - Load->getMemoryVT(), Load->getMemOperand()); - SmallVector Results; - for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) - Results.push_back(convertFromScalableVector(VT, Result.getValue(RetIdx), - DAG, Subtarget)); - Results.push_back(Result.getValue(NF)); - return DAG.getMergeValues(Results, DL); - } - } - - return lowerVectorIntrinsicScalars(Op, DAG, Subtarget); -} - -SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op, - SelectionDAG &DAG) const { - unsigned IntNo = Op.getConstantOperandVal(1); - switch (IntNo) { - default: - break; - case Intrinsic::riscv_masked_strided_store: { - SDLoc DL(Op); - MVT XLenVT = Subtarget.getXLenVT(); - - // If the mask is known to be all ones, optimize to an unmasked intrinsic; - // the selection of the masked intrinsics doesn't do this for us. - SDValue Mask = Op.getOperand(5); - bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); - - SDValue Val = Op.getOperand(2); - MVT VT = Val.getSimpleValueType(); - MVT ContainerVT = VT; - if (VT.isFixedLengthVector()) { - ContainerVT = getContainerForFixedLengthVector(VT); - Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget); - } - if (!IsUnmasked) { - MVT MaskVT = getMaskTypeFor(ContainerVT); - if (VT.isFixedLengthVector()) - Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); - } - - SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; - - SDValue IntID = DAG.getTargetConstant( - IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL, - XLenVT); - - auto *Store = cast(Op); - SmallVector Ops{Store->getChain(), IntID}; - Ops.push_back(Val); - Ops.push_back(Op.getOperand(3)); // Ptr - Ops.push_back(Op.getOperand(4)); // Stride - if (!IsUnmasked) - Ops.push_back(Mask); - Ops.push_back(VL); - - return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Store->getVTList(), - Ops, Store->getMemoryVT(), - Store->getMemOperand()); - } - } - - return SDValue(); -} - - -// Given a reduction op, this function returns the matching reduction opcode, -// the vector SDValue and the scalar SDValue required to lower this to a -// RISCVISD node. -static std::tuple -getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) { - SDLoc DL(Op); - auto Flags = Op->getFlags(); - unsigned Opcode = Op.getOpcode(); - unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Opcode); - switch (Opcode) { - default: - llvm_unreachable("Unhandled reduction"); - case ISD::VECREDUCE_FADD: { - // Use positive zero if we can. It is cheaper to materialize. - SDValue Zero = - DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT); - return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero); - } - case ISD::VECREDUCE_SEQ_FADD: - return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1), - Op.getOperand(0)); - case ISD::VECREDUCE_FMIN: - return std::make_tuple(RISCVISD::VECREDUCE_FMIN_VL, Op.getOperand(0), - DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags)); - case ISD::VECREDUCE_FMAX: - return std::make_tuple(RISCVISD::VECREDUCE_FMAX_VL, Op.getOperand(0), - DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags)); + case Intrinsic::riscv_vmv_s_x: + assert(0 && "vGPR to sGPR move!"); } } @@ -6665,92 +4691,6 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0), DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound)); } - case ISD::MGATHER: - case ISD::MSCATTER: - case ISD::VP_GATHER: - case ISD::VP_SCATTER: { - if (!DCI.isBeforeLegalize()) - break; - SDValue Index, ScaleOp; - bool IsIndexSigned = false; - if (const auto *VPGSN = dyn_cast(N)) { - Index = VPGSN->getIndex(); - ScaleOp = VPGSN->getScale(); - IsIndexSigned = VPGSN->isIndexSigned(); - assert(!VPGSN->isIndexScaled() && - "Scaled gather/scatter should not be formed"); - } else { - const auto *MGSN = cast(N); - Index = MGSN->getIndex(); - ScaleOp = MGSN->getScale(); - IsIndexSigned = MGSN->isIndexSigned(); - assert(!MGSN->isIndexScaled() && - "Scaled gather/scatter should not be formed"); - - } - EVT IndexVT = Index.getValueType(); - MVT XLenVT = Subtarget.getXLenVT(); - // RISCV indexed loads only support the "unsigned unscaled" addressing - // mode, so anything else must be manually legalized. - bool NeedsIdxLegalization = - (IsIndexSigned && IndexVT.getVectorElementType().bitsLT(XLenVT)); - if (!NeedsIdxLegalization) - break; - - SDLoc DL(N); - - // Any index legalization should first promote to XLenVT, so we don't lose - // bits when scaling. This may create an illegal index type so we let - // LLVM's legalization take care of the splitting. - // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet. - if (IndexVT.getVectorElementType().bitsLT(XLenVT)) { - IndexVT = IndexVT.changeVectorElementType(XLenVT); - Index = DAG.getNode(IsIndexSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, - DL, IndexVT, Index); - } - - ISD::MemIndexType NewIndexTy = ISD::UNSIGNED_SCALED; - if (const auto *VPGN = dyn_cast(N)) - return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL, - {VPGN->getChain(), VPGN->getBasePtr(), Index, - ScaleOp, VPGN->getMask(), - VPGN->getVectorLength()}, - VPGN->getMemOperand(), NewIndexTy); - if (const auto *VPSN = dyn_cast(N)) - return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL, - {VPSN->getChain(), VPSN->getValue(), - VPSN->getBasePtr(), Index, ScaleOp, - VPSN->getMask(), VPSN->getVectorLength()}, - VPSN->getMemOperand(), NewIndexTy); - if (const auto *MGN = dyn_cast(N)) - return DAG.getMaskedGather( - N->getVTList(), MGN->getMemoryVT(), DL, - {MGN->getChain(), MGN->getPassThru(), MGN->getMask(), - MGN->getBasePtr(), Index, ScaleOp}, - MGN->getMemOperand(), NewIndexTy, MGN->getExtensionType()); - const auto *MSN = cast(N); - return DAG.getMaskedScatter( - N->getVTList(), MSN->getMemoryVT(), DL, - {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(), - Index, ScaleOp}, - MSN->getMemOperand(), NewIndexTy, MSN->isTruncatingStore()); - } - case RISCVISD::SRA_VL: - case RISCVISD::SRL_VL: - case RISCVISD::SHL_VL: { - SDValue ShAmt = N->getOperand(1); - if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) { - // We don't need the upper 32 bits of a 64-bit element for a shift amount. - SDLoc DL(N); - SDValue VL = N->getOperand(3); - EVT VT = N->getValueType(0); - ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT), - ShAmt.getOperand(1), VL); - return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt, - N->getOperand(2), N->getOperand(3), N->getOperand(4)); - } - break; - } case ISD::SRA: if (SDValue V = performSRACombine(N, DAG, Subtarget)) return V; @@ -6811,77 +4751,6 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask, VL); } - case ISD::STORE: { - auto *Store = cast(N); - SDValue Val = Store->getValue(); - // Combine store of vmv.x.s to vse with VL of 1. - // FIXME: Support FP. - if (Val.getOpcode() == RISCVISD::VMV_X_S) { - SDValue Src = Val.getOperand(0); - MVT VecVT = Src.getSimpleValueType(); - EVT MemVT = Store->getMemoryVT(); - // The memory VT and the element type must match. - if (MemVT == VecVT.getVectorElementType()) { - SDLoc DL(N); - MVT MaskVT = getMaskTypeFor(VecVT); - return DAG.getStoreVP( - Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(), - DAG.getConstant(1, DL, MaskVT), - DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT, - Store->getMemOperand(), Store->getAddressingMode(), - Store->isTruncatingStore(), /*IsCompress*/ false); - } - } - - break; - } - case ISD::SPLAT_VECTOR: { - EVT VT = N->getValueType(0); - // Only perform this combine on legal MVT types. - if (!isTypeLegal(VT)) - break; - if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N, - DAG, Subtarget)) - return Gather; - break; - } - case RISCVISD::VMV_V_X_VL: { - // Tail agnostic VMV.V.X only demands the vector element bitwidth from the - // scalar input. - unsigned ScalarSize = N->getOperand(1).getValueSizeInBits(); - unsigned EltWidth = N->getValueType(0).getScalarSizeInBits(); - if (ScalarSize > EltWidth && N->getOperand(0).isUndef()) - if (SimplifyDemandedLowBitsHelper(1, EltWidth)) - return SDValue(N, 0); - - break; - } - case ISD::INTRINSIC_WO_CHAIN: { - unsigned IntNo = N->getConstantOperandVal(0); - switch (IntNo) { - // By default we do not combine any intrinsic. - default: - return SDValue(); - case Intrinsic::riscv_vcpop: - case Intrinsic::riscv_vcpop_mask: - case Intrinsic::riscv_vfirst: - case Intrinsic::riscv_vfirst_mask: { - SDValue VL = N->getOperand(2); - if (IntNo == Intrinsic::riscv_vcpop_mask || - IntNo == Intrinsic::riscv_vfirst_mask) - VL = N->getOperand(3); - if (!isNullConstant(VL)) - return SDValue(); - // If VL is 0, vcpop -> li 0, vfirst -> li -1. - SDLoc DL(N); - EVT VT = N->getValueType(0); - if (IntNo == Intrinsic::riscv_vfirst || - IntNo == Intrinsic::riscv_vfirst_mask) - return DAG.getConstant(-1, DL, VT); - return DAG.getConstant(0, DL, VT); - } - } - } case ISD::BITCAST: { assert(Subtarget.useRVVForFixedLengthVectors()); SDValue N0 = N->getOperand(0); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 345668247a10..7f0f0214b2bb 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -638,19 +638,13 @@ private: SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; SDValue lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const; - SDValue lowerVectorFPExtendOrRoundLike(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; SDValue lowerABS(SDValue Op, SelectionDAG &DAG) const; SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; SDValue lowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const; - SDValue expandUnalignedRVVLoad(SDValue Op, SelectionDAG &DAG) const; - SDValue expandUnalignedRVVStore(SDValue Op, SelectionDAG &DAG) const; - bool isEligibleForTailCallOptimization( CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, const SmallVector &ArgLocs) const; @@ -661,18 +655,6 @@ private: const SmallVectorImpl> &Regs, MachineFunction &MF) const; - bool useRVVForFixedLengthVectorVT(MVT VT) const; - - MVT getVPExplicitVectorLengthTy() const override; - - /// RVV code generation for fixed length vectors does not lower all - /// BUILD_VECTORs. This makes BUILD_VECTOR legalisation a source of stores to - /// merge. However, merging them creates a BUILD_VECTOR that is just as - /// illegal as the original, thus leading to an infinite legalisation loop. - /// NOTE: Once BUILD_VECTOR can be custom lowered for all legal vector types, - /// this override can be removed. - bool mergeStoresAfterLegalization(EVT VT) const override; - /// Disable normalizing /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index 043a36efa82f..1d0ed8111d39 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -178,25 +178,6 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB, bool KillSrcReg = false; - if (Offset.getScalable()) { - unsigned ScalableAdjOpc = RISCV::ADD; - int64_t ScalableValue = Offset.getScalable(); - if (ScalableValue < 0) { - ScalableValue = -ScalableValue; - ScalableAdjOpc = RISCV::SUB; - } - // Get vlenb and multiply vlen with the number of vector registers. - Register ScratchReg = DestReg; - if (DestReg == SrcReg) - ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); - TII->getVLENFactoredAmount(MF, MBB, II, DL, ScratchReg, ScalableValue, Flag); - BuildMI(MBB, II, DL, TII->get(ScalableAdjOpc), DestReg) - .addReg(SrcReg).addReg(ScratchReg, RegState::Kill) - .setMIFlag(Flag); - SrcReg = DestReg; - KillSrcReg = true; - } - int64_t Val = Offset.getFixed(); if (DestReg == SrcReg && Val == 0) return; @@ -264,64 +245,35 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Register FrameReg; StackOffset Offset = getFrameLowering(MF)->getFrameIndexReference(MF, FrameIndex, FrameReg); - bool IsRVVSpill = RISCV::isRVVSpill(MI); - if (!IsRVVSpill) - Offset += StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm()); - if (Offset.getScalable() && - ST.getRealMinVLen() == ST.getRealMaxVLen()) { - // For an exact VLEN value, scalable offsets become constant and thus - // can be converted entirely into fixed offsets. - int64_t FixedValue = Offset.getFixed(); - int64_t ScalableValue = Offset.getScalable(); - assert(ScalableValue % 8 == 0 && - "Scalable offset is not a multiple of a single vector size."); - int64_t NumOfVReg = ScalableValue / 8; - int64_t VLENB = ST.getRealMinVLen() / 8; - Offset = StackOffset::getFixed(FixedValue + NumOfVReg * VLENB); - } + Offset += StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm()); if (!isInt<32>(Offset.getFixed())) { report_fatal_error( "Frame offsets outside of the signed 32-bit range not supported"); } - if (!IsRVVSpill) { - if (MI.getOpcode() == RISCV::ADDI && !isInt<12>(Offset.getFixed())) { - // We chose to emit the canonical immediate sequence rather than folding - // the offset into the using add under the theory that doing so doesn't - // save dynamic instruction count and some target may fuse the canonical - // 32 bit immediate sequence. We still need to clear the portion of the - // offset encoded in the immediate. - MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0); - } else { - // We can encode an add with 12 bit signed immediate in the immediate - // operand of our user instruction. As a result, the remaining - // offset can by construction, at worst, a LUI and a ADD. - int64_t Val = Offset.getFixed(); - int64_t Lo12 = SignExtend64<12>(Val); - MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Lo12); - Offset = StackOffset::get((uint64_t)Val - (uint64_t)Lo12, - Offset.getScalable()); - } + if (MI.getOpcode() == RISCV::ADDI && !isInt<12>(Offset.getFixed())) { + // We chose to emit the canonical immediate sequence rather than folding + // the offset into the using add under the theory that doing so doesn't + // save dynamic instruction count and some target may fuse the canonical + // 32 bit immediate sequence. We still need to clear the portion of the + // offset encoded in the immediate. + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0); + } else { + // We can encode an add with 12 bit signed immediate in the immediate + // operand of our user instruction. As a result, the remaining + // offset can by construction, at worst, a LUI and a ADD. + int64_t Val = Offset.getFixed(); + int64_t Lo12 = SignExtend64<12>(Val); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Lo12); + Offset = StackOffset::get((uint64_t)Val - (uint64_t)Lo12, + Offset.getScalable()); } - if (Offset.getScalable() || Offset.getFixed()) { - Register DestReg; - if (MI.getOpcode() == RISCV::ADDI) - DestReg = MI.getOperand(0).getReg(); - else - DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); - adjustReg(*II->getParent(), II, DL, DestReg, FrameReg, Offset, - MachineInstr::NoFlags, std::nullopt); - MI.getOperand(FIOperandNum).ChangeToRegister(DestReg, /*IsDef*/false, - /*IsImp*/false, - /*IsKill*/true); - } else { - MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, /*IsDef*/false, - /*IsImp*/false, - /*IsKill*/false); - } + MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, /*IsDef*/false, + /*IsImp*/false, + /*IsKill*/false); // If after materializing the adjustment, we have a pointless ADDI, remove it if (MI.getOpcode() == RISCV::ADDI && @@ -331,21 +283,6 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, return true; } - auto ZvlssegInfo = RISCV::isRVVSpillForZvlsseg(MI.getOpcode()); - if (ZvlssegInfo) { - MachineBasicBlock &MBB = *MI.getParent(); - Register VL = MRI.createVirtualRegister(&RISCV::GPRRegClass); - BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VL); - uint32_t ShiftAmount = Log2_32(ZvlssegInfo->second); - if (ShiftAmount != 0) - BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), VL) - .addReg(VL) - .addImm(ShiftAmount); - // The last argument of pseudo spilling opcode for zvlsseg is the length of - // one element of zvlsseg types. For example, for vint32m2x2_t, it will be - // the length of vint32m2_t. - MI.getOperand(FIOperandNum + 1).ChangeToRegister(VL, /*isDef=*/false); - } return false; } diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index d16625d9a405..ee1ce773adbb 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -244,323 +244,6 @@ RISCVTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, return getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind); } -// Currently, these represent both throughput and codesize costs -// for the respective intrinsics. The costs in this table are simply -// instruction counts with the following adjustments made: -// * One vsetvli is considered free. -static const CostTblEntry VectorIntrinsicCostTable[]{ - {Intrinsic::floor, MVT::v2f32, 9}, - {Intrinsic::floor, MVT::v4f32, 9}, - {Intrinsic::floor, MVT::v8f32, 9}, - {Intrinsic::floor, MVT::v16f32, 9}, - {Intrinsic::floor, MVT::nxv1f32, 9}, - {Intrinsic::floor, MVT::nxv2f32, 9}, - {Intrinsic::floor, MVT::nxv4f32, 9}, - {Intrinsic::floor, MVT::nxv8f32, 9}, - {Intrinsic::floor, MVT::nxv16f32, 9}, - {Intrinsic::floor, MVT::v2f64, 9}, - {Intrinsic::floor, MVT::v4f64, 9}, - {Intrinsic::floor, MVT::v8f64, 9}, - {Intrinsic::floor, MVT::v16f64, 9}, - {Intrinsic::floor, MVT::nxv1f64, 9}, - {Intrinsic::floor, MVT::nxv2f64, 9}, - {Intrinsic::floor, MVT::nxv4f64, 9}, - {Intrinsic::floor, MVT::nxv8f64, 9}, - {Intrinsic::ceil, MVT::v2f32, 9}, - {Intrinsic::ceil, MVT::v4f32, 9}, - {Intrinsic::ceil, MVT::v8f32, 9}, - {Intrinsic::ceil, MVT::v16f32, 9}, - {Intrinsic::ceil, MVT::nxv1f32, 9}, - {Intrinsic::ceil, MVT::nxv2f32, 9}, - {Intrinsic::ceil, MVT::nxv4f32, 9}, - {Intrinsic::ceil, MVT::nxv8f32, 9}, - {Intrinsic::ceil, MVT::nxv16f32, 9}, - {Intrinsic::ceil, MVT::v2f64, 9}, - {Intrinsic::ceil, MVT::v4f64, 9}, - {Intrinsic::ceil, MVT::v8f64, 9}, - {Intrinsic::ceil, MVT::v16f64, 9}, - {Intrinsic::ceil, MVT::nxv1f64, 9}, - {Intrinsic::ceil, MVT::nxv2f64, 9}, - {Intrinsic::ceil, MVT::nxv4f64, 9}, - {Intrinsic::ceil, MVT::nxv8f64, 9}, - {Intrinsic::trunc, MVT::v2f32, 7}, - {Intrinsic::trunc, MVT::v4f32, 7}, - {Intrinsic::trunc, MVT::v8f32, 7}, - {Intrinsic::trunc, MVT::v16f32, 7}, - {Intrinsic::trunc, MVT::nxv1f32, 7}, - {Intrinsic::trunc, MVT::nxv2f32, 7}, - {Intrinsic::trunc, MVT::nxv4f32, 7}, - {Intrinsic::trunc, MVT::nxv8f32, 7}, - {Intrinsic::trunc, MVT::nxv16f32, 7}, - {Intrinsic::trunc, MVT::v2f64, 7}, - {Intrinsic::trunc, MVT::v4f64, 7}, - {Intrinsic::trunc, MVT::v8f64, 7}, - {Intrinsic::trunc, MVT::v16f64, 7}, - {Intrinsic::trunc, MVT::nxv1f64, 7}, - {Intrinsic::trunc, MVT::nxv2f64, 7}, - {Intrinsic::trunc, MVT::nxv4f64, 7}, - {Intrinsic::trunc, MVT::nxv8f64, 7}, - {Intrinsic::round, MVT::v2f32, 9}, - {Intrinsic::round, MVT::v4f32, 9}, - {Intrinsic::round, MVT::v8f32, 9}, - {Intrinsic::round, MVT::v16f32, 9}, - {Intrinsic::round, MVT::nxv1f32, 9}, - {Intrinsic::round, MVT::nxv2f32, 9}, - {Intrinsic::round, MVT::nxv4f32, 9}, - {Intrinsic::round, MVT::nxv8f32, 9}, - {Intrinsic::round, MVT::nxv16f32, 9}, - {Intrinsic::round, MVT::v2f64, 9}, - {Intrinsic::round, MVT::v4f64, 9}, - {Intrinsic::round, MVT::v8f64, 9}, - {Intrinsic::round, MVT::v16f64, 9}, - {Intrinsic::round, MVT::nxv1f64, 9}, - {Intrinsic::round, MVT::nxv2f64, 9}, - {Intrinsic::round, MVT::nxv4f64, 9}, - {Intrinsic::round, MVT::nxv8f64, 9}, - {Intrinsic::roundeven, MVT::v2f32, 9}, - {Intrinsic::roundeven, MVT::v4f32, 9}, - {Intrinsic::roundeven, MVT::v8f32, 9}, - {Intrinsic::roundeven, MVT::v16f32, 9}, - {Intrinsic::roundeven, MVT::nxv1f32, 9}, - {Intrinsic::roundeven, MVT::nxv2f32, 9}, - {Intrinsic::roundeven, MVT::nxv4f32, 9}, - {Intrinsic::roundeven, MVT::nxv8f32, 9}, - {Intrinsic::roundeven, MVT::nxv16f32, 9}, - {Intrinsic::roundeven, MVT::v2f64, 9}, - {Intrinsic::roundeven, MVT::v4f64, 9}, - {Intrinsic::roundeven, MVT::v8f64, 9}, - {Intrinsic::roundeven, MVT::v16f64, 9}, - {Intrinsic::roundeven, MVT::nxv1f64, 9}, - {Intrinsic::roundeven, MVT::nxv2f64, 9}, - {Intrinsic::roundeven, MVT::nxv4f64, 9}, - {Intrinsic::roundeven, MVT::nxv8f64, 9}, - {Intrinsic::fabs, MVT::v2f32, 1}, - {Intrinsic::fabs, MVT::v4f32, 1}, - {Intrinsic::fabs, MVT::v8f32, 1}, - {Intrinsic::fabs, MVT::v16f32, 1}, - {Intrinsic::fabs, MVT::nxv1f32, 1}, - {Intrinsic::fabs, MVT::nxv2f32, 1}, - {Intrinsic::fabs, MVT::nxv4f32, 1}, - {Intrinsic::fabs, MVT::nxv8f32, 1}, - {Intrinsic::fabs, MVT::nxv16f32, 1}, - {Intrinsic::fabs, MVT::v2f64, 1}, - {Intrinsic::fabs, MVT::v4f64, 1}, - {Intrinsic::fabs, MVT::v8f64, 1}, - {Intrinsic::fabs, MVT::v16f64, 1}, - {Intrinsic::fabs, MVT::nxv1f64, 1}, - {Intrinsic::fabs, MVT::nxv2f64, 1}, - {Intrinsic::fabs, MVT::nxv4f64, 1}, - {Intrinsic::fabs, MVT::nxv8f64, 1}, - {Intrinsic::sqrt, MVT::v2f32, 1}, - {Intrinsic::sqrt, MVT::v4f32, 1}, - {Intrinsic::sqrt, MVT::v8f32, 1}, - {Intrinsic::sqrt, MVT::v16f32, 1}, - {Intrinsic::sqrt, MVT::nxv1f32, 1}, - {Intrinsic::sqrt, MVT::nxv2f32, 1}, - {Intrinsic::sqrt, MVT::nxv4f32, 1}, - {Intrinsic::sqrt, MVT::nxv8f32, 1}, - {Intrinsic::sqrt, MVT::nxv16f32, 1}, - {Intrinsic::sqrt, MVT::v2f64, 1}, - {Intrinsic::sqrt, MVT::v4f64, 1}, - {Intrinsic::sqrt, MVT::v8f64, 1}, - {Intrinsic::sqrt, MVT::v16f64, 1}, - {Intrinsic::sqrt, MVT::nxv1f64, 1}, - {Intrinsic::sqrt, MVT::nxv2f64, 1}, - {Intrinsic::sqrt, MVT::nxv4f64, 1}, - {Intrinsic::sqrt, MVT::nxv8f64, 1}, - {Intrinsic::bswap, MVT::v2i16, 3}, - {Intrinsic::bswap, MVT::v4i16, 3}, - {Intrinsic::bswap, MVT::v8i16, 3}, - {Intrinsic::bswap, MVT::v16i16, 3}, - {Intrinsic::bswap, MVT::nxv1i16, 3}, - {Intrinsic::bswap, MVT::nxv2i16, 3}, - {Intrinsic::bswap, MVT::nxv4i16, 3}, - {Intrinsic::bswap, MVT::nxv8i16, 3}, - {Intrinsic::bswap, MVT::nxv16i16, 3}, - {Intrinsic::bswap, MVT::v2i32, 12}, - {Intrinsic::bswap, MVT::v4i32, 12}, - {Intrinsic::bswap, MVT::v8i32, 12}, - {Intrinsic::bswap, MVT::v16i32, 12}, - {Intrinsic::bswap, MVT::nxv1i32, 12}, - {Intrinsic::bswap, MVT::nxv2i32, 12}, - {Intrinsic::bswap, MVT::nxv4i32, 12}, - {Intrinsic::bswap, MVT::nxv8i32, 12}, - {Intrinsic::bswap, MVT::nxv16i32, 12}, - {Intrinsic::bswap, MVT::v2i64, 31}, - {Intrinsic::bswap, MVT::v4i64, 31}, - {Intrinsic::bswap, MVT::v8i64, 31}, - {Intrinsic::bswap, MVT::v16i64, 31}, - {Intrinsic::bswap, MVT::nxv1i64, 31}, - {Intrinsic::bswap, MVT::nxv2i64, 31}, - {Intrinsic::bswap, MVT::nxv4i64, 31}, - {Intrinsic::bswap, MVT::nxv8i64, 31}, - {Intrinsic::vp_bswap, MVT::v2i16, 3}, - {Intrinsic::vp_bswap, MVT::v4i16, 3}, - {Intrinsic::vp_bswap, MVT::v8i16, 3}, - {Intrinsic::vp_bswap, MVT::v16i16, 3}, - {Intrinsic::vp_bswap, MVT::nxv1i16, 3}, - {Intrinsic::vp_bswap, MVT::nxv2i16, 3}, - {Intrinsic::vp_bswap, MVT::nxv4i16, 3}, - {Intrinsic::vp_bswap, MVT::nxv8i16, 3}, - {Intrinsic::vp_bswap, MVT::nxv16i16, 3}, - {Intrinsic::vp_bswap, MVT::v2i32, 12}, - {Intrinsic::vp_bswap, MVT::v4i32, 12}, - {Intrinsic::vp_bswap, MVT::v8i32, 12}, - {Intrinsic::vp_bswap, MVT::v16i32, 12}, - {Intrinsic::vp_bswap, MVT::nxv1i32, 12}, - {Intrinsic::vp_bswap, MVT::nxv2i32, 12}, - {Intrinsic::vp_bswap, MVT::nxv4i32, 12}, - {Intrinsic::vp_bswap, MVT::nxv8i32, 12}, - {Intrinsic::vp_bswap, MVT::nxv16i32, 12}, - {Intrinsic::vp_bswap, MVT::v2i64, 31}, - {Intrinsic::vp_bswap, MVT::v4i64, 31}, - {Intrinsic::vp_bswap, MVT::v8i64, 31}, - {Intrinsic::vp_bswap, MVT::v16i64, 31}, - {Intrinsic::vp_bswap, MVT::nxv1i64, 31}, - {Intrinsic::vp_bswap, MVT::nxv2i64, 31}, - {Intrinsic::vp_bswap, MVT::nxv4i64, 31}, - {Intrinsic::vp_bswap, MVT::nxv8i64, 31}, - {Intrinsic::bitreverse, MVT::v2i8, 17}, - {Intrinsic::bitreverse, MVT::v4i8, 17}, - {Intrinsic::bitreverse, MVT::v8i8, 17}, - {Intrinsic::bitreverse, MVT::v16i8, 17}, - {Intrinsic::bitreverse, MVT::nxv1i8, 17}, - {Intrinsic::bitreverse, MVT::nxv2i8, 17}, - {Intrinsic::bitreverse, MVT::nxv4i8, 17}, - {Intrinsic::bitreverse, MVT::nxv8i8, 17}, - {Intrinsic::bitreverse, MVT::nxv16i8, 17}, - {Intrinsic::bitreverse, MVT::v2i16, 24}, - {Intrinsic::bitreverse, MVT::v4i16, 24}, - {Intrinsic::bitreverse, MVT::v8i16, 24}, - {Intrinsic::bitreverse, MVT::v16i16, 24}, - {Intrinsic::bitreverse, MVT::nxv1i16, 24}, - {Intrinsic::bitreverse, MVT::nxv2i16, 24}, - {Intrinsic::bitreverse, MVT::nxv4i16, 24}, - {Intrinsic::bitreverse, MVT::nxv8i16, 24}, - {Intrinsic::bitreverse, MVT::nxv16i16, 24}, - {Intrinsic::bitreverse, MVT::v2i32, 33}, - {Intrinsic::bitreverse, MVT::v4i32, 33}, - {Intrinsic::bitreverse, MVT::v8i32, 33}, - {Intrinsic::bitreverse, MVT::v16i32, 33}, - {Intrinsic::bitreverse, MVT::nxv1i32, 33}, - {Intrinsic::bitreverse, MVT::nxv2i32, 33}, - {Intrinsic::bitreverse, MVT::nxv4i32, 33}, - {Intrinsic::bitreverse, MVT::nxv8i32, 33}, - {Intrinsic::bitreverse, MVT::nxv16i32, 33}, - {Intrinsic::bitreverse, MVT::v2i64, 52}, - {Intrinsic::bitreverse, MVT::v4i64, 52}, - {Intrinsic::bitreverse, MVT::v8i64, 52}, - {Intrinsic::bitreverse, MVT::v16i64, 52}, - {Intrinsic::bitreverse, MVT::nxv1i64, 52}, - {Intrinsic::bitreverse, MVT::nxv2i64, 52}, - {Intrinsic::bitreverse, MVT::nxv4i64, 52}, - {Intrinsic::bitreverse, MVT::nxv8i64, 52}, - {Intrinsic::ctpop, MVT::v2i8, 12}, - {Intrinsic::ctpop, MVT::v4i8, 12}, - {Intrinsic::ctpop, MVT::v8i8, 12}, - {Intrinsic::ctpop, MVT::v16i8, 12}, - {Intrinsic::ctpop, MVT::nxv1i8, 12}, - {Intrinsic::ctpop, MVT::nxv2i8, 12}, - {Intrinsic::ctpop, MVT::nxv4i8, 12}, - {Intrinsic::ctpop, MVT::nxv8i8, 12}, - {Intrinsic::ctpop, MVT::nxv16i8, 12}, - {Intrinsic::ctpop, MVT::v2i16, 19}, - {Intrinsic::ctpop, MVT::v4i16, 19}, - {Intrinsic::ctpop, MVT::v8i16, 19}, - {Intrinsic::ctpop, MVT::v16i16, 19}, - {Intrinsic::ctpop, MVT::nxv1i16, 19}, - {Intrinsic::ctpop, MVT::nxv2i16, 19}, - {Intrinsic::ctpop, MVT::nxv4i16, 19}, - {Intrinsic::ctpop, MVT::nxv8i16, 19}, - {Intrinsic::ctpop, MVT::nxv16i16, 19}, - {Intrinsic::ctpop, MVT::v2i32, 20}, - {Intrinsic::ctpop, MVT::v4i32, 20}, - {Intrinsic::ctpop, MVT::v8i32, 20}, - {Intrinsic::ctpop, MVT::v16i32, 20}, - {Intrinsic::ctpop, MVT::nxv1i32, 20}, - {Intrinsic::ctpop, MVT::nxv2i32, 20}, - {Intrinsic::ctpop, MVT::nxv4i32, 20}, - {Intrinsic::ctpop, MVT::nxv8i32, 20}, - {Intrinsic::ctpop, MVT::nxv16i32, 20}, - {Intrinsic::ctpop, MVT::v2i64, 21}, - {Intrinsic::ctpop, MVT::v4i64, 21}, - {Intrinsic::ctpop, MVT::v8i64, 21}, - {Intrinsic::ctpop, MVT::v16i64, 21}, - {Intrinsic::ctpop, MVT::nxv1i64, 21}, - {Intrinsic::ctpop, MVT::nxv2i64, 21}, - {Intrinsic::ctpop, MVT::nxv4i64, 21}, - {Intrinsic::ctpop, MVT::nxv8i64, 21}, -}; - -InstructionCost -RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, - TTI::TargetCostKind CostKind) { - auto *RetTy = ICA.getReturnType(); - switch (ICA.getID()) { - case Intrinsic::ceil: - case Intrinsic::floor: - case Intrinsic::trunc: - case Intrinsic::rint: - case Intrinsic::round: - case Intrinsic::roundeven: { - // These all use the same code. - auto LT = getTypeLegalizationCost(RetTy); - if (!LT.second.isVector() && TLI->isOperationCustom(ISD::FCEIL, LT.second)) - return LT.first * 8; - break; - } - case Intrinsic::umin: - case Intrinsic::umax: - case Intrinsic::smin: - case Intrinsic::smax: { - auto LT = getTypeLegalizationCost(RetTy); - if ((ST->hasVInstructions() && LT.second.isVector()) || - (LT.second.isScalarInteger() && ST->hasStdExtZbb())) - return LT.first; - break; - } - case Intrinsic::sadd_sat: - case Intrinsic::ssub_sat: - case Intrinsic::uadd_sat: - case Intrinsic::usub_sat: { - auto LT = getTypeLegalizationCost(RetTy); - if (ST->hasVInstructions() && LT.second.isVector()) - return LT.first; - break; - } - // TODO: add more intrinsic - case Intrinsic::experimental_stepvector: { - unsigned Cost = 1; // vid - auto LT = getTypeLegalizationCost(RetTy); - return Cost + (LT.first - 1); - } - case Intrinsic::vp_rint: { - // RISC-V target uses at least 5 instructions to lower rounding intrinsics. - unsigned Cost = 5; - auto LT = getTypeLegalizationCost(RetTy); - if (TLI->isOperationCustom(ISD::VP_FRINT, LT.second)) - return Cost * LT.first; - break; - } - case Intrinsic::vp_nearbyint: { - // More one read and one write for fflags than vp_rint. - unsigned Cost = 7; - auto LT = getTypeLegalizationCost(RetTy); - if (TLI->isOperationCustom(ISD::VP_FRINT, LT.second)) - return Cost * LT.first; - break; - } - } - - if (ST->hasVInstructions() && RetTy->isVectorTy()) { - auto LT = getTypeLegalizationCost(RetTy); - if (const auto *Entry = CostTableLookup(VectorIntrinsicCostTable, - ICA.getID(), LT.second)) - return LT.first * Entry->Cost; - } - - return BaseT::getIntrinsicInstrCost(ICA, CostKind); -} InstructionCost RISCVTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn index ebb9da1a6173..de6cec3ac037 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn @@ -71,10 +71,8 @@ static_library("LLVMRISCVCodeGen") { "RISCVExpandAtomicPseudoInsts.cpp", "RISCVExpandPseudoInsts.cpp", "RISCVFrameLowering.cpp", - "RISCVGatherScatterLowering.cpp", "RISCVISelDAGToDAG.cpp", "RISCVISelLowering.cpp", - "RISCVInsertVSETVLI.cpp", "RISCVInstrInfo.cpp", "RISCVMCInstLower.cpp", "RISCVMachineFunctionInfo.cpp",