[RISCV] Add intrinsics for vcompress instruction

This patch defines vcompress intrinsics and lower to V instructions.

We work with @rogfer01 from BSC to come out this patch.

Authored-by: Roger Ferrer Ibanez <rofirrim@gmail.com>
Co-Authored-by: ShihPo Hung <shihpo.hung@sifive.com>

Differential revision: https://reviews.llvm.org/D93809
This commit is contained in:
ShihPo Hung 2020-12-24 18:56:24 -08:00
parent e3e25cfb44
commit 096b02ebbf
4 changed files with 1564 additions and 0 deletions

View File

@ -189,6 +189,13 @@ let TargetPrefix = "riscv" in {
LLVMPointerType<LLVMMatchType<0>>, llvm_anyvector_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
[NoCapture<ArgIndex<1>>, IntrWriteMem]>, RISCVVIntrinsic;
// For destination vector type is the same as first source vector (with mask).
// Input: (maskedoff, vector_in, mask, vl)
class RISCVUnaryAAMask
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
[IntrNoMem]>, RISCVVIntrinsic;
// For destination vector type is the same as first and second source vector.
// Input: (vector_in, vector_in, vl)
class RISCVBinaryAAANoMask
@ -680,6 +687,8 @@ let TargetPrefix = "riscv" in {
defm vrgather : RISCVBinaryAAX;
def "int_riscv_vcompress_mask" : RISCVUnaryAAMask;
defm vaaddu : RISCVSaturatingBinaryAAX;
defm vaadd : RISCVSaturatingBinaryAAX;
defm vasubu : RISCVSaturatingBinaryAAX;

View File

@ -669,6 +669,27 @@ class VPseudoUnaryMOutMask:
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
// Mask can be V0~V31
class VPseudoUnaryAnyMask<VReg RetClass,
VReg Op1Class> :
Pseudo<(outs RetClass:$rd),
(ins RetClass:$merge,
Op1Class:$rs2,
VR:$vm, GPR:$vl, ixlenimm:$sew),
[]>,
RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let usesCustomInserter = 1;
let Constraints = "@earlyclobber $rd, $rd = $merge";
let Uses = [VL, VTYPE];
let VLIndex = 4;
let SEWIndex = 5;
let MergeOpIndex = 1;
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
class VPseudoBinaryNoMask<VReg RetClass,
VReg Op1Class,
DAGOperand Op2Class,
@ -905,6 +926,13 @@ multiclass VPseudoUnaryV_M {
}
}
multiclass VPseudoUnaryV_V_AnyMask {
foreach m = MxList.m in {
let VLMul = m.value in
def _VM # "_" # m.MX # "_MASK" : VPseudoUnaryAnyMask<m.vrclass, m.vrclass>;
}
}
multiclass VPseudoBinary<VReg RetClass,
VReg Op1Class,
DAGOperand Op2Class,
@ -1311,6 +1339,27 @@ class VPatMaskUnaryMask<string intrinsic_name,
(mti.Mask VR:$rs2),
(mti.Mask V0), (NoX0 GPR:$vl), mti.SEW)>;
class VPatUnaryAnyMask<string intrinsic,
string inst,
string kind,
ValueType result_type,
ValueType op1_type,
ValueType mask_type,
int sew,
LMULInfo vlmul,
VReg result_reg_class,
VReg op1_reg_class> :
Pat<(result_type (!cast<Intrinsic>(intrinsic#"_mask")
(result_type result_reg_class:$merge),
(op1_type op1_reg_class:$rs1),
(mask_type VR:$rs2),
(XLenVT GPR:$vl))),
(!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX#"_MASK")
(result_type result_reg_class:$merge),
(op1_type op1_reg_class:$rs1),
(mask_type VR:$rs2),
(NoX0 GPR:$vl), sew)>;
class VPatBinaryNoMask<string intrinsic_name,
string inst,
string kind,
@ -1541,6 +1590,16 @@ multiclass VPatUnaryS_M<string intrinsic_name,
}
}
multiclass VPatUnaryV_V_AnyMask<string intrinsic, string instruction,
list<VTypeInfo> vtilist> {
foreach vti = vtilist in {
def : VPatUnaryAnyMask<intrinsic, instruction, "VM",
vti.Vector, vti.Vector, vti.Mask,
vti.SEW, vti.LMul, vti.RegClass,
vti.RegClass>;
}
}
multiclass VPatUnaryM_M<string intrinsic,
string inst>
{
@ -2645,6 +2704,11 @@ let Predicates = [HasStdExtV, HasStdExtF] in {
//===----------------------------------------------------------------------===//
defm PseudoVRGATHER : VPseudoBinaryV_VV_VX_VI<uimm5, "@earlyclobber $rd">;
//===----------------------------------------------------------------------===//
// 17.5. Vector Compress Instruction
//===----------------------------------------------------------------------===//
defm PseudoVCOMPRESS : VPseudoUnaryV_V_AnyMask;
//===----------------------------------------------------------------------===//
// Patterns.
//===----------------------------------------------------------------------===//
@ -3201,5 +3265,16 @@ let Predicates = [HasStdExtV, HasStdExtF] in {
AllFloatVectors, uimm5>;
} // Predicates = [HasStdExtV, HasStdExtF]
//===----------------------------------------------------------------------===//
// 17.5. Vector Compress Instruction
//===----------------------------------------------------------------------===//
let Predicates = [HasStdExtV] in {
defm "" : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllIntegerVectors>;
} // Predicates = [HasStdExtV]
let Predicates = [HasStdExtV, HasStdExtF] in {
defm "" : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllFloatVectors>;
} // Predicates = [HasStdExtV, HasStdExtF]
// Include the non-intrinsic ISel patterns
include "RISCVInstrInfoVSDPatterns.td"

View File

@ -0,0 +1,650 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f,+experimental-zfh -verify-machineinstrs \
; RUN: --riscv-no-aliases < %s | FileCheck %s
declare <vscale x 1 x i8> @llvm.riscv.vcompress.mask.nxv1i8(
<vscale x 1 x i8>,
<vscale x 1 x i8>,
<vscale x 1 x i1>,
i32);
define <vscale x 1 x i8> @intrinsic_vcompress_mask_vm_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1i8_nxv1i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e8,mf8,tu,mu
; CHECK-NEXT: vcompress.vm v16, v17, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 1 x i8> @llvm.riscv.vcompress.mask.nxv1i8(
<vscale x 1 x i8> %0,
<vscale x 1 x i8> %1,
<vscale x 1 x i1> %2,
i32 %3)
ret <vscale x 1 x i8> %a
}
declare <vscale x 2 x i8> @llvm.riscv.vcompress.mask.nxv2i8(
<vscale x 2 x i8>,
<vscale x 2 x i8>,
<vscale x 2 x i1>,
i32);
define <vscale x 2 x i8> @intrinsic_vcompress_mask_vm_nxv2i8_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i8> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2i8_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e8,mf4,tu,mu
; CHECK-NEXT: vcompress.vm v16, v17, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 2 x i8> @llvm.riscv.vcompress.mask.nxv2i8(
<vscale x 2 x i8> %0,
<vscale x 2 x i8> %1,
<vscale x 2 x i1> %2,
i32 %3)
ret <vscale x 2 x i8> %a
}
declare <vscale x 4 x i8> @llvm.riscv.vcompress.mask.nxv4i8(
<vscale x 4 x i8>,
<vscale x 4 x i8>,
<vscale x 4 x i1>,
i32);
define <vscale x 4 x i8> @intrinsic_vcompress_mask_vm_nxv4i8_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i8> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4i8_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e8,mf2,tu,mu
; CHECK-NEXT: vcompress.vm v16, v17, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 4 x i8> @llvm.riscv.vcompress.mask.nxv4i8(
<vscale x 4 x i8> %0,
<vscale x 4 x i8> %1,
<vscale x 4 x i1> %2,
i32 %3)
ret <vscale x 4 x i8> %a
}
declare <vscale x 8 x i8> @llvm.riscv.vcompress.mask.nxv8i8(
<vscale x 8 x i8>,
<vscale x 8 x i8>,
<vscale x 8 x i1>,
i32);
define <vscale x 8 x i8> @intrinsic_vcompress_mask_vm_nxv8i8_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i8> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8i8_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e8,m1,tu,mu
; CHECK-NEXT: vcompress.vm v16, v17, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 8 x i8> @llvm.riscv.vcompress.mask.nxv8i8(
<vscale x 8 x i8> %0,
<vscale x 8 x i8> %1,
<vscale x 8 x i1> %2,
i32 %3)
ret <vscale x 8 x i8> %a
}
declare <vscale x 16 x i8> @llvm.riscv.vcompress.mask.nxv16i8(
<vscale x 16 x i8>,
<vscale x 16 x i8>,
<vscale x 16 x i1>,
i32);
define <vscale x 16 x i8> @intrinsic_vcompress_mask_vm_nxv16i8_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16i8_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e8,m2,tu,mu
; CHECK-NEXT: vcompress.vm v16, v18, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 16 x i8> @llvm.riscv.vcompress.mask.nxv16i8(
<vscale x 16 x i8> %0,
<vscale x 16 x i8> %1,
<vscale x 16 x i1> %2,
i32 %3)
ret <vscale x 16 x i8> %a
}
declare <vscale x 32 x i8> @llvm.riscv.vcompress.mask.nxv32i8(
<vscale x 32 x i8>,
<vscale x 32 x i8>,
<vscale x 32 x i1>,
i32);
define <vscale x 32 x i8> @intrinsic_vcompress_mask_vm_nxv32i8_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, i32 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv32i8_nxv32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e8,m4,tu,mu
; CHECK-NEXT: vcompress.vm v16, v20, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 32 x i8> @llvm.riscv.vcompress.mask.nxv32i8(
<vscale x 32 x i8> %0,
<vscale x 32 x i8> %1,
<vscale x 32 x i1> %2,
i32 %3)
ret <vscale x 32 x i8> %a
}
declare <vscale x 64 x i8> @llvm.riscv.vcompress.mask.nxv64i8(
<vscale x 64 x i8>,
<vscale x 64 x i8>,
<vscale x 64 x i1>,
i32);
define <vscale x 64 x i8> @intrinsic_vcompress_mask_vm_nxv64i8_nxv64i8(<vscale x 64 x i8> %0, <vscale x 64 x i8> %1, <vscale x 64 x i1> %2, i32 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv64i8_nxv64i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a2, zero, e8,m8,ta,mu
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vsetvli a0, a1, e8,m8,tu,mu
; CHECK-NEXT: vcompress.vm v16, v8, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 64 x i8> @llvm.riscv.vcompress.mask.nxv64i8(
<vscale x 64 x i8> %0,
<vscale x 64 x i8> %1,
<vscale x 64 x i1> %2,
i32 %3)
ret <vscale x 64 x i8> %a
}
declare <vscale x 1 x i16> @llvm.riscv.vcompress.mask.nxv1i16(
<vscale x 1 x i16>,
<vscale x 1 x i16>,
<vscale x 1 x i1>,
i32);
define <vscale x 1 x i16> @intrinsic_vcompress_mask_vm_nxv1i16_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i16> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1i16_nxv1i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e16,mf4,tu,mu
; CHECK-NEXT: vcompress.vm v16, v17, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 1 x i16> @llvm.riscv.vcompress.mask.nxv1i16(
<vscale x 1 x i16> %0,
<vscale x 1 x i16> %1,
<vscale x 1 x i1> %2,
i32 %3)
ret <vscale x 1 x i16> %a
}
declare <vscale x 2 x i16> @llvm.riscv.vcompress.mask.nxv2i16(
<vscale x 2 x i16>,
<vscale x 2 x i16>,
<vscale x 2 x i1>,
i32);
define <vscale x 2 x i16> @intrinsic_vcompress_mask_vm_nxv2i16_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i16> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2i16_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e16,mf2,tu,mu
; CHECK-NEXT: vcompress.vm v16, v17, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 2 x i16> @llvm.riscv.vcompress.mask.nxv2i16(
<vscale x 2 x i16> %0,
<vscale x 2 x i16> %1,
<vscale x 2 x i1> %2,
i32 %3)
ret <vscale x 2 x i16> %a
}
declare <vscale x 4 x i16> @llvm.riscv.vcompress.mask.nxv4i16(
<vscale x 4 x i16>,
<vscale x 4 x i16>,
<vscale x 4 x i1>,
i32);
define <vscale x 4 x i16> @intrinsic_vcompress_mask_vm_nxv4i16_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i16> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4i16_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e16,m1,tu,mu
; CHECK-NEXT: vcompress.vm v16, v17, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 4 x i16> @llvm.riscv.vcompress.mask.nxv4i16(
<vscale x 4 x i16> %0,
<vscale x 4 x i16> %1,
<vscale x 4 x i1> %2,
i32 %3)
ret <vscale x 4 x i16> %a
}
declare <vscale x 8 x i16> @llvm.riscv.vcompress.mask.nxv8i16(
<vscale x 8 x i16>,
<vscale x 8 x i16>,
<vscale x 8 x i1>,
i32);
define <vscale x 8 x i16> @intrinsic_vcompress_mask_vm_nxv8i16_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8i16_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e16,m2,tu,mu
; CHECK-NEXT: vcompress.vm v16, v18, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vcompress.mask.nxv8i16(
<vscale x 8 x i16> %0,
<vscale x 8 x i16> %1,
<vscale x 8 x i1> %2,
i32 %3)
ret <vscale x 8 x i16> %a
}
declare <vscale x 16 x i16> @llvm.riscv.vcompress.mask.nxv16i16(
<vscale x 16 x i16>,
<vscale x 16 x i16>,
<vscale x 16 x i1>,
i32);
define <vscale x 16 x i16> @intrinsic_vcompress_mask_vm_nxv16i16_nxv16i16(<vscale x 16 x i16> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e16,m4,tu,mu
; CHECK-NEXT: vcompress.vm v16, v20, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vcompress.mask.nxv16i16(
<vscale x 16 x i16> %0,
<vscale x 16 x i16> %1,
<vscale x 16 x i1> %2,
i32 %3)
ret <vscale x 16 x i16> %a
}
declare <vscale x 32 x i16> @llvm.riscv.vcompress.mask.nxv32i16(
<vscale x 32 x i16>,
<vscale x 32 x i16>,
<vscale x 32 x i1>,
i32);
define <vscale x 32 x i16> @intrinsic_vcompress_mask_vm_nxv32i16_nxv32i16(<vscale x 32 x i16> %0, <vscale x 32 x i16> %1, <vscale x 32 x i1> %2, i32 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv32i16_nxv32i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a2, zero, e16,m8,ta,mu
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: vsetvli a0, a1, e16,m8,tu,mu
; CHECK-NEXT: vcompress.vm v16, v8, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vcompress.mask.nxv32i16(
<vscale x 32 x i16> %0,
<vscale x 32 x i16> %1,
<vscale x 32 x i1> %2,
i32 %3)
ret <vscale x 32 x i16> %a
}
declare <vscale x 1 x i32> @llvm.riscv.vcompress.mask.nxv1i32(
<vscale x 1 x i32>,
<vscale x 1 x i32>,
<vscale x 1 x i1>,
i32);
define <vscale x 1 x i32> @intrinsic_vcompress_mask_vm_nxv1i32_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i32> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1i32_nxv1i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e32,mf2,tu,mu
; CHECK-NEXT: vcompress.vm v16, v17, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 1 x i32> @llvm.riscv.vcompress.mask.nxv1i32(
<vscale x 1 x i32> %0,
<vscale x 1 x i32> %1,
<vscale x 1 x i1> %2,
i32 %3)
ret <vscale x 1 x i32> %a
}
declare <vscale x 2 x i32> @llvm.riscv.vcompress.mask.nxv2i32(
<vscale x 2 x i32>,
<vscale x 2 x i32>,
<vscale x 2 x i1>,
i32);
define <vscale x 2 x i32> @intrinsic_vcompress_mask_vm_nxv2i32_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i32> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2i32_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e32,m1,tu,mu
; CHECK-NEXT: vcompress.vm v16, v17, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 2 x i32> @llvm.riscv.vcompress.mask.nxv2i32(
<vscale x 2 x i32> %0,
<vscale x 2 x i32> %1,
<vscale x 2 x i1> %2,
i32 %3)
ret <vscale x 2 x i32> %a
}
declare <vscale x 4 x i32> @llvm.riscv.vcompress.mask.nxv4i32(
<vscale x 4 x i32>,
<vscale x 4 x i32>,
<vscale x 4 x i1>,
i32);
define <vscale x 4 x i32> @intrinsic_vcompress_mask_vm_nxv4i32_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4i32_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e32,m2,tu,mu
; CHECK-NEXT: vcompress.vm v16, v18, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vcompress.mask.nxv4i32(
<vscale x 4 x i32> %0,
<vscale x 4 x i32> %1,
<vscale x 4 x i1> %2,
i32 %3)
ret <vscale x 4 x i32> %a
}
declare <vscale x 8 x i32> @llvm.riscv.vcompress.mask.nxv8i32(
<vscale x 8 x i32>,
<vscale x 8 x i32>,
<vscale x 8 x i1>,
i32);
define <vscale x 8 x i32> @intrinsic_vcompress_mask_vm_nxv8i32_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8i32_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e32,m4,tu,mu
; CHECK-NEXT: vcompress.vm v16, v20, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vcompress.mask.nxv8i32(
<vscale x 8 x i32> %0,
<vscale x 8 x i32> %1,
<vscale x 8 x i1> %2,
i32 %3)
ret <vscale x 8 x i32> %a
}
declare <vscale x 16 x i32> @llvm.riscv.vcompress.mask.nxv16i32(
<vscale x 16 x i32>,
<vscale x 16 x i32>,
<vscale x 16 x i1>,
i32);
define <vscale x 16 x i32> @intrinsic_vcompress_mask_vm_nxv16i32_nxv16i32(<vscale x 16 x i32> %0, <vscale x 16 x i32> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16i32_nxv16i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a2, zero, e32,m8,ta,mu
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vsetvli a0, a1, e32,m8,tu,mu
; CHECK-NEXT: vcompress.vm v16, v8, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vcompress.mask.nxv16i32(
<vscale x 16 x i32> %0,
<vscale x 16 x i32> %1,
<vscale x 16 x i1> %2,
i32 %3)
ret <vscale x 16 x i32> %a
}
declare <vscale x 1 x half> @llvm.riscv.vcompress.mask.nxv1f16(
<vscale x 1 x half>,
<vscale x 1 x half>,
<vscale x 1 x i1>,
i32);
define <vscale x 1 x half> @intrinsic_vcompress_mask_vm_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1f16_nxv1f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e16,mf4,tu,mu
; CHECK-NEXT: vcompress.vm v16, v17, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 1 x half> @llvm.riscv.vcompress.mask.nxv1f16(
<vscale x 1 x half> %0,
<vscale x 1 x half> %1,
<vscale x 1 x i1> %2,
i32 %3)
ret <vscale x 1 x half> %a
}
declare <vscale x 2 x half> @llvm.riscv.vcompress.mask.nxv2f16(
<vscale x 2 x half>,
<vscale x 2 x half>,
<vscale x 2 x i1>,
i32);
define <vscale x 2 x half> @intrinsic_vcompress_mask_vm_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2f16_nxv2f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e16,mf2,tu,mu
; CHECK-NEXT: vcompress.vm v16, v17, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 2 x half> @llvm.riscv.vcompress.mask.nxv2f16(
<vscale x 2 x half> %0,
<vscale x 2 x half> %1,
<vscale x 2 x i1> %2,
i32 %3)
ret <vscale x 2 x half> %a
}
declare <vscale x 4 x half> @llvm.riscv.vcompress.mask.nxv4f16(
<vscale x 4 x half>,
<vscale x 4 x half>,
<vscale x 4 x i1>,
i32);
define <vscale x 4 x half> @intrinsic_vcompress_mask_vm_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4f16_nxv4f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e16,m1,tu,mu
; CHECK-NEXT: vcompress.vm v16, v17, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 4 x half> @llvm.riscv.vcompress.mask.nxv4f16(
<vscale x 4 x half> %0,
<vscale x 4 x half> %1,
<vscale x 4 x i1> %2,
i32 %3)
ret <vscale x 4 x half> %a
}
declare <vscale x 8 x half> @llvm.riscv.vcompress.mask.nxv8f16(
<vscale x 8 x half>,
<vscale x 8 x half>,
<vscale x 8 x i1>,
i32);
define <vscale x 8 x half> @intrinsic_vcompress_mask_vm_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8f16_nxv8f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e16,m2,tu,mu
; CHECK-NEXT: vcompress.vm v16, v18, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 8 x half> @llvm.riscv.vcompress.mask.nxv8f16(
<vscale x 8 x half> %0,
<vscale x 8 x half> %1,
<vscale x 8 x i1> %2,
i32 %3)
ret <vscale x 8 x half> %a
}
declare <vscale x 16 x half> @llvm.riscv.vcompress.mask.nxv16f16(
<vscale x 16 x half>,
<vscale x 16 x half>,
<vscale x 16 x i1>,
i32);
define <vscale x 16 x half> @intrinsic_vcompress_mask_vm_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16f16_nxv16f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e16,m4,tu,mu
; CHECK-NEXT: vcompress.vm v16, v20, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 16 x half> @llvm.riscv.vcompress.mask.nxv16f16(
<vscale x 16 x half> %0,
<vscale x 16 x half> %1,
<vscale x 16 x i1> %2,
i32 %3)
ret <vscale x 16 x half> %a
}
declare <vscale x 32 x half> @llvm.riscv.vcompress.mask.nxv32f16(
<vscale x 32 x half>,
<vscale x 32 x half>,
<vscale x 32 x i1>,
i32);
define <vscale x 32 x half> @intrinsic_vcompress_mask_vm_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x i1> %2, i32 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv32f16_nxv32f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a2, zero, e16,m8,ta,mu
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: vsetvli a0, a1, e16,m8,tu,mu
; CHECK-NEXT: vcompress.vm v16, v8, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 32 x half> @llvm.riscv.vcompress.mask.nxv32f16(
<vscale x 32 x half> %0,
<vscale x 32 x half> %1,
<vscale x 32 x i1> %2,
i32 %3)
ret <vscale x 32 x half> %a
}
declare <vscale x 1 x float> @llvm.riscv.vcompress.mask.nxv1f32(
<vscale x 1 x float>,
<vscale x 1 x float>,
<vscale x 1 x i1>,
i32);
define <vscale x 1 x float> @intrinsic_vcompress_mask_vm_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1f32_nxv1f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e32,mf2,tu,mu
; CHECK-NEXT: vcompress.vm v16, v17, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 1 x float> @llvm.riscv.vcompress.mask.nxv1f32(
<vscale x 1 x float> %0,
<vscale x 1 x float> %1,
<vscale x 1 x i1> %2,
i32 %3)
ret <vscale x 1 x float> %a
}
declare <vscale x 2 x float> @llvm.riscv.vcompress.mask.nxv2f32(
<vscale x 2 x float>,
<vscale x 2 x float>,
<vscale x 2 x i1>,
i32);
define <vscale x 2 x float> @intrinsic_vcompress_mask_vm_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2f32_nxv2f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e32,m1,tu,mu
; CHECK-NEXT: vcompress.vm v16, v17, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 2 x float> @llvm.riscv.vcompress.mask.nxv2f32(
<vscale x 2 x float> %0,
<vscale x 2 x float> %1,
<vscale x 2 x i1> %2,
i32 %3)
ret <vscale x 2 x float> %a
}
declare <vscale x 4 x float> @llvm.riscv.vcompress.mask.nxv4f32(
<vscale x 4 x float>,
<vscale x 4 x float>,
<vscale x 4 x i1>,
i32);
define <vscale x 4 x float> @intrinsic_vcompress_mask_vm_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4f32_nxv4f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e32,m2,tu,mu
; CHECK-NEXT: vcompress.vm v16, v18, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 4 x float> @llvm.riscv.vcompress.mask.nxv4f32(
<vscale x 4 x float> %0,
<vscale x 4 x float> %1,
<vscale x 4 x i1> %2,
i32 %3)
ret <vscale x 4 x float> %a
}
declare <vscale x 8 x float> @llvm.riscv.vcompress.mask.nxv8f32(
<vscale x 8 x float>,
<vscale x 8 x float>,
<vscale x 8 x i1>,
i32);
define <vscale x 8 x float> @intrinsic_vcompress_mask_vm_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8f32_nxv8f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e32,m4,tu,mu
; CHECK-NEXT: vcompress.vm v16, v20, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 8 x float> @llvm.riscv.vcompress.mask.nxv8f32(
<vscale x 8 x float> %0,
<vscale x 8 x float> %1,
<vscale x 8 x i1> %2,
i32 %3)
ret <vscale x 8 x float> %a
}
declare <vscale x 16 x float> @llvm.riscv.vcompress.mask.nxv16f32(
<vscale x 16 x float>,
<vscale x 16 x float>,
<vscale x 16 x i1>,
i32);
define <vscale x 16 x float> @intrinsic_vcompress_mask_vm_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16f32_nxv16f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a2, zero, e32,m8,ta,mu
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vsetvli a0, a1, e32,m8,tu,mu
; CHECK-NEXT: vcompress.vm v16, v8, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 16 x float> @llvm.riscv.vcompress.mask.nxv16f32(
<vscale x 16 x float> %0,
<vscale x 16 x float> %1,
<vscale x 16 x i1> %2,
i32 %3)
ret <vscale x 16 x float> %a
}

View File

@ -0,0 +1,830 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \
; RUN: --riscv-no-aliases < %s | FileCheck %s
declare <vscale x 1 x i8> @llvm.riscv.vcompress.mask.nxv1i8(
<vscale x 1 x i8>,
<vscale x 1 x i8>,
<vscale x 1 x i1>,
i64);
define <vscale x 1 x i8> @intrinsic_vcompress_mask_vm_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1i8_nxv1i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e8,mf8,tu,mu
; CHECK-NEXT: vcompress.vm v16, v17, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 1 x i8> @llvm.riscv.vcompress.mask.nxv1i8(
<vscale x 1 x i8> %0,
<vscale x 1 x i8> %1,
<vscale x 1 x i1> %2,
i64 %3)
ret <vscale x 1 x i8> %a
}
declare <vscale x 2 x i8> @llvm.riscv.vcompress.mask.nxv2i8(
<vscale x 2 x i8>,
<vscale x 2 x i8>,
<vscale x 2 x i1>,
i64);
define <vscale x 2 x i8> @intrinsic_vcompress_mask_vm_nxv2i8_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i8> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2i8_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e8,mf4,tu,mu
; CHECK-NEXT: vcompress.vm v16, v17, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 2 x i8> @llvm.riscv.vcompress.mask.nxv2i8(
<vscale x 2 x i8> %0,
<vscale x 2 x i8> %1,
<vscale x 2 x i1> %2,
i64 %3)
ret <vscale x 2 x i8> %a
}
declare <vscale x 4 x i8> @llvm.riscv.vcompress.mask.nxv4i8(
<vscale x 4 x i8>,
<vscale x 4 x i8>,
<vscale x 4 x i1>,
i64);
define <vscale x 4 x i8> @intrinsic_vcompress_mask_vm_nxv4i8_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i8> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4i8_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e8,mf2,tu,mu
; CHECK-NEXT: vcompress.vm v16, v17, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 4 x i8> @llvm.riscv.vcompress.mask.nxv4i8(
<vscale x 4 x i8> %0,
<vscale x 4 x i8> %1,
<vscale x 4 x i1> %2,
i64 %3)
ret <vscale x 4 x i8> %a
}
declare <vscale x 8 x i8> @llvm.riscv.vcompress.mask.nxv8i8(
<vscale x 8 x i8>,
<vscale x 8 x i8>,
<vscale x 8 x i1>,
i64);
define <vscale x 8 x i8> @intrinsic_vcompress_mask_vm_nxv8i8_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i8> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8i8_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e8,m1,tu,mu
; CHECK-NEXT: vcompress.vm v16, v17, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 8 x i8> @llvm.riscv.vcompress.mask.nxv8i8(
<vscale x 8 x i8> %0,
<vscale x 8 x i8> %1,
<vscale x 8 x i1> %2,
i64 %3)
ret <vscale x 8 x i8> %a
}
declare <vscale x 16 x i8> @llvm.riscv.vcompress.mask.nxv16i8(
<vscale x 16 x i8>,
<vscale x 16 x i8>,
<vscale x 16 x i1>,
i64);
define <vscale x 16 x i8> @intrinsic_vcompress_mask_vm_nxv16i8_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16i8_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e8,m2,tu,mu
; CHECK-NEXT: vcompress.vm v16, v18, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 16 x i8> @llvm.riscv.vcompress.mask.nxv16i8(
<vscale x 16 x i8> %0,
<vscale x 16 x i8> %1,
<vscale x 16 x i1> %2,
i64 %3)
ret <vscale x 16 x i8> %a
}
declare <vscale x 32 x i8> @llvm.riscv.vcompress.mask.nxv32i8(
<vscale x 32 x i8>,
<vscale x 32 x i8>,
<vscale x 32 x i1>,
i64);
define <vscale x 32 x i8> @intrinsic_vcompress_mask_vm_nxv32i8_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, i64 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv32i8_nxv32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e8,m4,tu,mu
; CHECK-NEXT: vcompress.vm v16, v20, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 32 x i8> @llvm.riscv.vcompress.mask.nxv32i8(
<vscale x 32 x i8> %0,
<vscale x 32 x i8> %1,
<vscale x 32 x i1> %2,
i64 %3)
ret <vscale x 32 x i8> %a
}
declare <vscale x 64 x i8> @llvm.riscv.vcompress.mask.nxv64i8(
<vscale x 64 x i8>,
<vscale x 64 x i8>,
<vscale x 64 x i1>,
i64);
define <vscale x 64 x i8> @intrinsic_vcompress_mask_vm_nxv64i8_nxv64i8(<vscale x 64 x i8> %0, <vscale x 64 x i8> %1, <vscale x 64 x i1> %2, i64 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv64i8_nxv64i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a2, zero, e8,m8,ta,mu
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vsetvli a0, a1, e8,m8,tu,mu
; CHECK-NEXT: vcompress.vm v16, v8, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 64 x i8> @llvm.riscv.vcompress.mask.nxv64i8(
<vscale x 64 x i8> %0,
<vscale x 64 x i8> %1,
<vscale x 64 x i1> %2,
i64 %3)
ret <vscale x 64 x i8> %a
}
declare <vscale x 1 x i16> @llvm.riscv.vcompress.mask.nxv1i16(
<vscale x 1 x i16>,
<vscale x 1 x i16>,
<vscale x 1 x i1>,
i64);
define <vscale x 1 x i16> @intrinsic_vcompress_mask_vm_nxv1i16_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i16> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1i16_nxv1i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e16,mf4,tu,mu
; CHECK-NEXT: vcompress.vm v16, v17, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 1 x i16> @llvm.riscv.vcompress.mask.nxv1i16(
<vscale x 1 x i16> %0,
<vscale x 1 x i16> %1,
<vscale x 1 x i1> %2,
i64 %3)
ret <vscale x 1 x i16> %a
}
declare <vscale x 2 x i16> @llvm.riscv.vcompress.mask.nxv2i16(
<vscale x 2 x i16>,
<vscale x 2 x i16>,
<vscale x 2 x i1>,
i64);
define <vscale x 2 x i16> @intrinsic_vcompress_mask_vm_nxv2i16_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i16> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2i16_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e16,mf2,tu,mu
; CHECK-NEXT: vcompress.vm v16, v17, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 2 x i16> @llvm.riscv.vcompress.mask.nxv2i16(
<vscale x 2 x i16> %0,
<vscale x 2 x i16> %1,
<vscale x 2 x i1> %2,
i64 %3)
ret <vscale x 2 x i16> %a
}
declare <vscale x 4 x i16> @llvm.riscv.vcompress.mask.nxv4i16(
<vscale x 4 x i16>,
<vscale x 4 x i16>,
<vscale x 4 x i1>,
i64);
define <vscale x 4 x i16> @intrinsic_vcompress_mask_vm_nxv4i16_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i16> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4i16_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e16,m1,tu,mu
; CHECK-NEXT: vcompress.vm v16, v17, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 4 x i16> @llvm.riscv.vcompress.mask.nxv4i16(
<vscale x 4 x i16> %0,
<vscale x 4 x i16> %1,
<vscale x 4 x i1> %2,
i64 %3)
ret <vscale x 4 x i16> %a
}
declare <vscale x 8 x i16> @llvm.riscv.vcompress.mask.nxv8i16(
<vscale x 8 x i16>,
<vscale x 8 x i16>,
<vscale x 8 x i1>,
i64);
define <vscale x 8 x i16> @intrinsic_vcompress_mask_vm_nxv8i16_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8i16_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e16,m2,tu,mu
; CHECK-NEXT: vcompress.vm v16, v18, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vcompress.mask.nxv8i16(
<vscale x 8 x i16> %0,
<vscale x 8 x i16> %1,
<vscale x 8 x i1> %2,
i64 %3)
ret <vscale x 8 x i16> %a
}
declare <vscale x 16 x i16> @llvm.riscv.vcompress.mask.nxv16i16(
<vscale x 16 x i16>,
<vscale x 16 x i16>,
<vscale x 16 x i1>,
i64);
define <vscale x 16 x i16> @intrinsic_vcompress_mask_vm_nxv16i16_nxv16i16(<vscale x 16 x i16> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e16,m4,tu,mu
; CHECK-NEXT: vcompress.vm v16, v20, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vcompress.mask.nxv16i16(
<vscale x 16 x i16> %0,
<vscale x 16 x i16> %1,
<vscale x 16 x i1> %2,
i64 %3)
ret <vscale x 16 x i16> %a
}
declare <vscale x 32 x i16> @llvm.riscv.vcompress.mask.nxv32i16(
<vscale x 32 x i16>,
<vscale x 32 x i16>,
<vscale x 32 x i1>,
i64);
define <vscale x 32 x i16> @intrinsic_vcompress_mask_vm_nxv32i16_nxv32i16(<vscale x 32 x i16> %0, <vscale x 32 x i16> %1, <vscale x 32 x i1> %2, i64 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv32i16_nxv32i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a2, zero, e16,m8,ta,mu
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: vsetvli a0, a1, e16,m8,tu,mu
; CHECK-NEXT: vcompress.vm v16, v8, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vcompress.mask.nxv32i16(
<vscale x 32 x i16> %0,
<vscale x 32 x i16> %1,
<vscale x 32 x i1> %2,
i64 %3)
ret <vscale x 32 x i16> %a
}
declare <vscale x 1 x i32> @llvm.riscv.vcompress.mask.nxv1i32(
<vscale x 1 x i32>,
<vscale x 1 x i32>,
<vscale x 1 x i1>,
i64);
define <vscale x 1 x i32> @intrinsic_vcompress_mask_vm_nxv1i32_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i32> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1i32_nxv1i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e32,mf2,tu,mu
; CHECK-NEXT: vcompress.vm v16, v17, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 1 x i32> @llvm.riscv.vcompress.mask.nxv1i32(
<vscale x 1 x i32> %0,
<vscale x 1 x i32> %1,
<vscale x 1 x i1> %2,
i64 %3)
ret <vscale x 1 x i32> %a
}
declare <vscale x 2 x i32> @llvm.riscv.vcompress.mask.nxv2i32(
<vscale x 2 x i32>,
<vscale x 2 x i32>,
<vscale x 2 x i1>,
i64);
define <vscale x 2 x i32> @intrinsic_vcompress_mask_vm_nxv2i32_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i32> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2i32_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e32,m1,tu,mu
; CHECK-NEXT: vcompress.vm v16, v17, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 2 x i32> @llvm.riscv.vcompress.mask.nxv2i32(
<vscale x 2 x i32> %0,
<vscale x 2 x i32> %1,
<vscale x 2 x i1> %2,
i64 %3)
ret <vscale x 2 x i32> %a
}
declare <vscale x 4 x i32> @llvm.riscv.vcompress.mask.nxv4i32(
<vscale x 4 x i32>,
<vscale x 4 x i32>,
<vscale x 4 x i1>,
i64);
define <vscale x 4 x i32> @intrinsic_vcompress_mask_vm_nxv4i32_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4i32_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e32,m2,tu,mu
; CHECK-NEXT: vcompress.vm v16, v18, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vcompress.mask.nxv4i32(
<vscale x 4 x i32> %0,
<vscale x 4 x i32> %1,
<vscale x 4 x i1> %2,
i64 %3)
ret <vscale x 4 x i32> %a
}
declare <vscale x 8 x i32> @llvm.riscv.vcompress.mask.nxv8i32(
<vscale x 8 x i32>,
<vscale x 8 x i32>,
<vscale x 8 x i1>,
i64);
define <vscale x 8 x i32> @intrinsic_vcompress_mask_vm_nxv8i32_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8i32_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e32,m4,tu,mu
; CHECK-NEXT: vcompress.vm v16, v20, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vcompress.mask.nxv8i32(
<vscale x 8 x i32> %0,
<vscale x 8 x i32> %1,
<vscale x 8 x i1> %2,
i64 %3)
ret <vscale x 8 x i32> %a
}
declare <vscale x 16 x i32> @llvm.riscv.vcompress.mask.nxv16i32(
<vscale x 16 x i32>,
<vscale x 16 x i32>,
<vscale x 16 x i1>,
i64);
define <vscale x 16 x i32> @intrinsic_vcompress_mask_vm_nxv16i32_nxv16i32(<vscale x 16 x i32> %0, <vscale x 16 x i32> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16i32_nxv16i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a2, zero, e32,m8,ta,mu
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vsetvli a0, a1, e32,m8,tu,mu
; CHECK-NEXT: vcompress.vm v16, v8, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vcompress.mask.nxv16i32(
<vscale x 16 x i32> %0,
<vscale x 16 x i32> %1,
<vscale x 16 x i1> %2,
i64 %3)
ret <vscale x 16 x i32> %a
}
declare <vscale x 1 x i64> @llvm.riscv.vcompress.mask.nxv1i64(
<vscale x 1 x i64>,
<vscale x 1 x i64>,
<vscale x 1 x i1>,
i64);
define <vscale x 1 x i64> @intrinsic_vcompress_mask_vm_nxv1i64_nxv1i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1i64_nxv1i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e64,m1,tu,mu
; CHECK-NEXT: vcompress.vm v16, v17, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 1 x i64> @llvm.riscv.vcompress.mask.nxv1i64(
<vscale x 1 x i64> %0,
<vscale x 1 x i64> %1,
<vscale x 1 x i1> %2,
i64 %3)
ret <vscale x 1 x i64> %a
}
declare <vscale x 2 x i64> @llvm.riscv.vcompress.mask.nxv2i64(
<vscale x 2 x i64>,
<vscale x 2 x i64>,
<vscale x 2 x i1>,
i64);
define <vscale x 2 x i64> @intrinsic_vcompress_mask_vm_nxv2i64_nxv2i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2i64_nxv2i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e64,m2,tu,mu
; CHECK-NEXT: vcompress.vm v16, v18, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vcompress.mask.nxv2i64(
<vscale x 2 x i64> %0,
<vscale x 2 x i64> %1,
<vscale x 2 x i1> %2,
i64 %3)
ret <vscale x 2 x i64> %a
}
declare <vscale x 4 x i64> @llvm.riscv.vcompress.mask.nxv4i64(
<vscale x 4 x i64>,
<vscale x 4 x i64>,
<vscale x 4 x i1>,
i64);
define <vscale x 4 x i64> @intrinsic_vcompress_mask_vm_nxv4i64_nxv4i64(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4i64_nxv4i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e64,m4,tu,mu
; CHECK-NEXT: vcompress.vm v16, v20, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vcompress.mask.nxv4i64(
<vscale x 4 x i64> %0,
<vscale x 4 x i64> %1,
<vscale x 4 x i1> %2,
i64 %3)
ret <vscale x 4 x i64> %a
}
declare <vscale x 8 x i64> @llvm.riscv.vcompress.mask.nxv8i64(
<vscale x 8 x i64>,
<vscale x 8 x i64>,
<vscale x 8 x i1>,
i64);
define <vscale x 8 x i64> @intrinsic_vcompress_mask_vm_nxv8i64_nxv8i64(<vscale x 8 x i64> %0, <vscale x 8 x i64> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8i64_nxv8i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vsetvli a0, a1, e64,m8,tu,mu
; CHECK-NEXT: vcompress.vm v16, v8, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vcompress.mask.nxv8i64(
<vscale x 8 x i64> %0,
<vscale x 8 x i64> %1,
<vscale x 8 x i1> %2,
i64 %3)
ret <vscale x 8 x i64> %a
}
declare <vscale x 1 x half> @llvm.riscv.vcompress.mask.nxv1f16(
<vscale x 1 x half>,
<vscale x 1 x half>,
<vscale x 1 x i1>,
i64);
define <vscale x 1 x half> @intrinsic_vcompress_mask_vm_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1f16_nxv1f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e16,mf4,tu,mu
; CHECK-NEXT: vcompress.vm v16, v17, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 1 x half> @llvm.riscv.vcompress.mask.nxv1f16(
<vscale x 1 x half> %0,
<vscale x 1 x half> %1,
<vscale x 1 x i1> %2,
i64 %3)
ret <vscale x 1 x half> %a
}
declare <vscale x 2 x half> @llvm.riscv.vcompress.mask.nxv2f16(
<vscale x 2 x half>,
<vscale x 2 x half>,
<vscale x 2 x i1>,
i64);
define <vscale x 2 x half> @intrinsic_vcompress_mask_vm_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2f16_nxv2f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e16,mf2,tu,mu
; CHECK-NEXT: vcompress.vm v16, v17, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 2 x half> @llvm.riscv.vcompress.mask.nxv2f16(
<vscale x 2 x half> %0,
<vscale x 2 x half> %1,
<vscale x 2 x i1> %2,
i64 %3)
ret <vscale x 2 x half> %a
}
declare <vscale x 4 x half> @llvm.riscv.vcompress.mask.nxv4f16(
<vscale x 4 x half>,
<vscale x 4 x half>,
<vscale x 4 x i1>,
i64);
define <vscale x 4 x half> @intrinsic_vcompress_mask_vm_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4f16_nxv4f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e16,m1,tu,mu
; CHECK-NEXT: vcompress.vm v16, v17, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 4 x half> @llvm.riscv.vcompress.mask.nxv4f16(
<vscale x 4 x half> %0,
<vscale x 4 x half> %1,
<vscale x 4 x i1> %2,
i64 %3)
ret <vscale x 4 x half> %a
}
declare <vscale x 8 x half> @llvm.riscv.vcompress.mask.nxv8f16(
<vscale x 8 x half>,
<vscale x 8 x half>,
<vscale x 8 x i1>,
i64);
define <vscale x 8 x half> @intrinsic_vcompress_mask_vm_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8f16_nxv8f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e16,m2,tu,mu
; CHECK-NEXT: vcompress.vm v16, v18, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 8 x half> @llvm.riscv.vcompress.mask.nxv8f16(
<vscale x 8 x half> %0,
<vscale x 8 x half> %1,
<vscale x 8 x i1> %2,
i64 %3)
ret <vscale x 8 x half> %a
}
declare <vscale x 16 x half> @llvm.riscv.vcompress.mask.nxv16f16(
<vscale x 16 x half>,
<vscale x 16 x half>,
<vscale x 16 x i1>,
i64);
define <vscale x 16 x half> @intrinsic_vcompress_mask_vm_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16f16_nxv16f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e16,m4,tu,mu
; CHECK-NEXT: vcompress.vm v16, v20, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 16 x half> @llvm.riscv.vcompress.mask.nxv16f16(
<vscale x 16 x half> %0,
<vscale x 16 x half> %1,
<vscale x 16 x i1> %2,
i64 %3)
ret <vscale x 16 x half> %a
}
declare <vscale x 32 x half> @llvm.riscv.vcompress.mask.nxv32f16(
<vscale x 32 x half>,
<vscale x 32 x half>,
<vscale x 32 x i1>,
i64);
define <vscale x 32 x half> @intrinsic_vcompress_mask_vm_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x i1> %2, i64 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv32f16_nxv32f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a2, zero, e16,m8,ta,mu
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: vsetvli a0, a1, e16,m8,tu,mu
; CHECK-NEXT: vcompress.vm v16, v8, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 32 x half> @llvm.riscv.vcompress.mask.nxv32f16(
<vscale x 32 x half> %0,
<vscale x 32 x half> %1,
<vscale x 32 x i1> %2,
i64 %3)
ret <vscale x 32 x half> %a
}
declare <vscale x 1 x float> @llvm.riscv.vcompress.mask.nxv1f32(
<vscale x 1 x float>,
<vscale x 1 x float>,
<vscale x 1 x i1>,
i64);
define <vscale x 1 x float> @intrinsic_vcompress_mask_vm_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1f32_nxv1f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e32,mf2,tu,mu
; CHECK-NEXT: vcompress.vm v16, v17, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 1 x float> @llvm.riscv.vcompress.mask.nxv1f32(
<vscale x 1 x float> %0,
<vscale x 1 x float> %1,
<vscale x 1 x i1> %2,
i64 %3)
ret <vscale x 1 x float> %a
}
declare <vscale x 2 x float> @llvm.riscv.vcompress.mask.nxv2f32(
<vscale x 2 x float>,
<vscale x 2 x float>,
<vscale x 2 x i1>,
i64);
define <vscale x 2 x float> @intrinsic_vcompress_mask_vm_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2f32_nxv2f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e32,m1,tu,mu
; CHECK-NEXT: vcompress.vm v16, v17, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 2 x float> @llvm.riscv.vcompress.mask.nxv2f32(
<vscale x 2 x float> %0,
<vscale x 2 x float> %1,
<vscale x 2 x i1> %2,
i64 %3)
ret <vscale x 2 x float> %a
}
declare <vscale x 4 x float> @llvm.riscv.vcompress.mask.nxv4f32(
<vscale x 4 x float>,
<vscale x 4 x float>,
<vscale x 4 x i1>,
i64);
define <vscale x 4 x float> @intrinsic_vcompress_mask_vm_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4f32_nxv4f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e32,m2,tu,mu
; CHECK-NEXT: vcompress.vm v16, v18, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 4 x float> @llvm.riscv.vcompress.mask.nxv4f32(
<vscale x 4 x float> %0,
<vscale x 4 x float> %1,
<vscale x 4 x i1> %2,
i64 %3)
ret <vscale x 4 x float> %a
}
declare <vscale x 8 x float> @llvm.riscv.vcompress.mask.nxv8f32(
<vscale x 8 x float>,
<vscale x 8 x float>,
<vscale x 8 x i1>,
i64);
define <vscale x 8 x float> @intrinsic_vcompress_mask_vm_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8f32_nxv8f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e32,m4,tu,mu
; CHECK-NEXT: vcompress.vm v16, v20, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 8 x float> @llvm.riscv.vcompress.mask.nxv8f32(
<vscale x 8 x float> %0,
<vscale x 8 x float> %1,
<vscale x 8 x i1> %2,
i64 %3)
ret <vscale x 8 x float> %a
}
declare <vscale x 16 x float> @llvm.riscv.vcompress.mask.nxv16f32(
<vscale x 16 x float>,
<vscale x 16 x float>,
<vscale x 16 x i1>,
i64);
define <vscale x 16 x float> @intrinsic_vcompress_mask_vm_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16f32_nxv16f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a2, zero, e32,m8,ta,mu
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vsetvli a0, a1, e32,m8,tu,mu
; CHECK-NEXT: vcompress.vm v16, v8, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 16 x float> @llvm.riscv.vcompress.mask.nxv16f32(
<vscale x 16 x float> %0,
<vscale x 16 x float> %1,
<vscale x 16 x i1> %2,
i64 %3)
ret <vscale x 16 x float> %a
}
declare <vscale x 1 x double> @llvm.riscv.vcompress.mask.nxv1f64(
<vscale x 1 x double>,
<vscale x 1 x double>,
<vscale x 1 x i1>,
i64);
define <vscale x 1 x double> @intrinsic_vcompress_mask_vm_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1f64_nxv1f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e64,m1,tu,mu
; CHECK-NEXT: vcompress.vm v16, v17, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 1 x double> @llvm.riscv.vcompress.mask.nxv1f64(
<vscale x 1 x double> %0,
<vscale x 1 x double> %1,
<vscale x 1 x i1> %2,
i64 %3)
ret <vscale x 1 x double> %a
}
declare <vscale x 2 x double> @llvm.riscv.vcompress.mask.nxv2f64(
<vscale x 2 x double>,
<vscale x 2 x double>,
<vscale x 2 x i1>,
i64);
define <vscale x 2 x double> @intrinsic_vcompress_mask_vm_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2f64_nxv2f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e64,m2,tu,mu
; CHECK-NEXT: vcompress.vm v16, v18, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 2 x double> @llvm.riscv.vcompress.mask.nxv2f64(
<vscale x 2 x double> %0,
<vscale x 2 x double> %1,
<vscale x 2 x i1> %2,
i64 %3)
ret <vscale x 2 x double> %a
}
declare <vscale x 4 x double> @llvm.riscv.vcompress.mask.nxv4f64(
<vscale x 4 x double>,
<vscale x 4 x double>,
<vscale x 4 x i1>,
i64);
define <vscale x 4 x double> @intrinsic_vcompress_mask_vm_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4f64_nxv4f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e64,m4,tu,mu
; CHECK-NEXT: vcompress.vm v16, v20, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 4 x double> @llvm.riscv.vcompress.mask.nxv4f64(
<vscale x 4 x double> %0,
<vscale x 4 x double> %1,
<vscale x 4 x i1> %2,
i64 %3)
ret <vscale x 4 x double> %a
}
declare <vscale x 8 x double> @llvm.riscv.vcompress.mask.nxv8f64(
<vscale x 8 x double>,
<vscale x 8 x double>,
<vscale x 8 x i1>,
i64);
define <vscale x 8 x double> @intrinsic_vcompress_mask_vm_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8f64_nxv8f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vsetvli a0, a1, e64,m8,tu,mu
; CHECK-NEXT: vcompress.vm v16, v8, v0
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 8 x double> @llvm.riscv.vcompress.mask.nxv8f64(
<vscale x 8 x double> %0,
<vscale x 8 x double> %1,
<vscale x 8 x i1> %2,
i64 %3)
ret <vscale x 8 x double> %a
}