258 lines
9.5 KiB
LLVM
258 lines
9.5 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple aarch64-none-linux-gnu | FileCheck %s
|
|
|
|
; Supported combines
|
|
|
|
define <8 x i16> @dupsext_v8i8_v8i16(i8 %src, <8 x i8> %b) {
|
|
; CHECK-LABEL: dupsext_v8i8_v8i16:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: dup v1.8b, w0
|
|
; CHECK-NEXT: smull v0.8h, v1.8b, v0.8b
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%in = sext i8 %src to i16
|
|
%ext.b = sext <8 x i8> %b to <8 x i16>
|
|
%broadcast.splatinsert = insertelement <8 x i16> undef, i16 %in, i16 0
|
|
%broadcast.splat = shufflevector <8 x i16> %broadcast.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
|
|
%out = mul nsw <8 x i16> %broadcast.splat, %ext.b
|
|
ret <8 x i16> %out
|
|
}
|
|
|
|
define <8 x i16> @dupzext_v8i8_v8i16(i8 %src, <8 x i8> %b) {
|
|
; CHECK-LABEL: dupzext_v8i8_v8i16:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: dup v1.8b, w0
|
|
; CHECK-NEXT: umull v0.8h, v1.8b, v0.8b
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%in = zext i8 %src to i16
|
|
%ext.b = zext <8 x i8> %b to <8 x i16>
|
|
%broadcast.splatinsert = insertelement <8 x i16> undef, i16 %in, i16 0
|
|
%broadcast.splat = shufflevector <8 x i16> %broadcast.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
|
|
%out = mul nuw <8 x i16> %broadcast.splat, %ext.b
|
|
ret <8 x i16> %out
|
|
}
|
|
|
|
define <4 x i32> @dupsext_v4i16_v4i32(i16 %src, <4 x i16> %b) {
|
|
; CHECK-LABEL: dupsext_v4i16_v4i32:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: dup v1.4h, w0
|
|
; CHECK-NEXT: smull v0.4s, v1.4h, v0.4h
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%in = sext i16 %src to i32
|
|
%ext.b = sext <4 x i16> %b to <4 x i32>
|
|
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %in, i32 0
|
|
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
|
|
%out = mul nsw <4 x i32> %broadcast.splat, %ext.b
|
|
ret <4 x i32> %out
|
|
}
|
|
|
|
define <4 x i32> @dupzext_v4i16_v4i32(i16 %src, <4 x i16> %b) {
|
|
; CHECK-LABEL: dupzext_v4i16_v4i32:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: dup v1.4h, w0
|
|
; CHECK-NEXT: umull v0.4s, v1.4h, v0.4h
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%in = zext i16 %src to i32
|
|
%ext.b = zext <4 x i16> %b to <4 x i32>
|
|
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %in, i32 0
|
|
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
|
|
%out = mul nuw <4 x i32> %broadcast.splat, %ext.b
|
|
ret <4 x i32> %out
|
|
}
|
|
|
|
define <2 x i64> @dupsext_v2i32_v2i64(i32 %src, <2 x i32> %b) {
|
|
; CHECK-LABEL: dupsext_v2i32_v2i64:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: dup v1.2s, w0
|
|
; CHECK-NEXT: smull v0.2d, v1.2s, v0.2s
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%in = sext i32 %src to i64
|
|
%ext.b = sext <2 x i32> %b to <2 x i64>
|
|
%broadcast.splatinsert = insertelement <2 x i64> undef, i64 %in, i64 0
|
|
%broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
|
|
%out = mul nsw <2 x i64> %broadcast.splat, %ext.b
|
|
ret <2 x i64> %out
|
|
}
|
|
|
|
define <2 x i64> @dupzext_v2i32_v2i64(i32 %src, <2 x i32> %b) {
|
|
; CHECK-LABEL: dupzext_v2i32_v2i64:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: dup v1.2s, w0
|
|
; CHECK-NEXT: umull v0.2d, v1.2s, v0.2s
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%in = zext i32 %src to i64
|
|
%ext.b = zext <2 x i32> %b to <2 x i64>
|
|
%broadcast.splatinsert = insertelement <2 x i64> undef, i64 %in, i64 0
|
|
%broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
|
|
%out = mul nuw <2 x i64> %broadcast.splat, %ext.b
|
|
ret <2 x i64> %out
|
|
}
|
|
|
|
; Unsupported combines
|
|
|
|
define <2 x i16> @dupsext_v2i8_v2i16(i8 %src, <2 x i8> %b) {
|
|
; CHECK-LABEL: dupsext_v2i8_v2i16:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: sxtb w8, w0
|
|
; CHECK-NEXT: shl v0.2s, v0.2s, #24
|
|
; CHECK-NEXT: sshr v0.2s, v0.2s, #24
|
|
; CHECK-NEXT: dup v1.2s, w8
|
|
; CHECK-NEXT: mul v0.2s, v1.2s, v0.2s
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%in = sext i8 %src to i16
|
|
%ext.b = sext <2 x i8> %b to <2 x i16>
|
|
%broadcast.splatinsert = insertelement <2 x i16> undef, i16 %in, i16 0
|
|
%broadcast.splat = shufflevector <2 x i16> %broadcast.splatinsert, <2 x i16> undef, <2 x i32> zeroinitializer
|
|
%out = mul nsw <2 x i16> %broadcast.splat, %ext.b
|
|
ret <2 x i16> %out
|
|
}
|
|
|
|
define <2 x i64> @dupzext_v2i16_v2i64(i16 %src, <2 x i16> %b) {
|
|
; CHECK-LABEL: dupzext_v2i16_v2i64:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: movi d1, #0x00ffff0000ffff
|
|
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
|
|
; CHECK-NEXT: and x8, x0, #0xffff
|
|
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
|
|
; CHECK-NEXT: ushll v0.2d, v0.2s, #0
|
|
; CHECK-NEXT: fmov x9, d0
|
|
; CHECK-NEXT: mov x10, v0.d[1]
|
|
; CHECK-NEXT: mul x9, x8, x9
|
|
; CHECK-NEXT: mul x8, x8, x10
|
|
; CHECK-NEXT: fmov d0, x9
|
|
; CHECK-NEXT: mov v0.d[1], x8
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%in = zext i16 %src to i64
|
|
%ext.b = zext <2 x i16> %b to <2 x i64>
|
|
%broadcast.splatinsert = insertelement <2 x i64> undef, i64 %in, i64 0
|
|
%broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
|
|
%out = mul nuw <2 x i64> %broadcast.splat, %ext.b
|
|
ret <2 x i64> %out
|
|
}
|
|
|
|
; dupsext_v4i8_v4i16
|
|
; dupsext_v2i8_v2i32
|
|
; dupsext_v4i8_v4i32
|
|
; dupsext_v2i8_v2i64
|
|
; dupsext_v2i16_v2i32
|
|
; dupsext_v2i16_v2i64
|
|
; dupzext_v2i8_v2i16
|
|
; dupzext_v4i8_v4i16
|
|
; dupzext_v2i8_v2i32
|
|
; dupzext_v4i8_v4i32
|
|
; dupzext_v2i8_v2i64
|
|
; dupzext_v2i16_v2i32
|
|
; dupzext_v2i16_v2i64
|
|
|
|
; Unsupported states
|
|
|
|
define <8 x i16> @nonsplat_shuffleinsert(i8 %src, <8 x i8> %b) {
|
|
; CHECK-LABEL: nonsplat_shuffleinsert:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: dup v1.8b, w0
|
|
; CHECK-NEXT: smull v0.8h, v1.8b, v0.8b
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%in = sext i8 %src to i16
|
|
%ext.b = sext <8 x i8> %b to <8 x i16>
|
|
%broadcast.splatinsert = insertelement <8 x i16> undef, i16 %in, i16 1
|
|
%broadcast.splat = shufflevector <8 x i16> %broadcast.splatinsert, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1>
|
|
%out = mul nsw <8 x i16> %broadcast.splat, %ext.b
|
|
ret <8 x i16> %out
|
|
}
|
|
|
|
define <4 x i32> @nonsplat_shuffleinsert2(<4 x i16> %b, i16 %b0, i16 %b1, i16 %b2, i16 %b3) {
|
|
; CHECK-LABEL: nonsplat_shuffleinsert2:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fmov s1, w0
|
|
; CHECK-NEXT: mov v1.h[1], w1
|
|
; CHECK-NEXT: mov v1.h[2], w2
|
|
; CHECK-NEXT: mov v1.h[3], w3
|
|
; CHECK-NEXT: smull v0.4s, v1.4h, v0.4h
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%s0 = sext i16 %b0 to i32
|
|
%s1 = sext i16 %b1 to i32
|
|
%s2 = sext i16 %b2 to i32
|
|
%s3 = sext i16 %b3 to i32
|
|
%ext.b = sext <4 x i16> %b to <4 x i32>
|
|
%v0 = insertelement <4 x i32> undef, i32 %s0, i32 0
|
|
%v1 = insertelement <4 x i32> %v0, i32 %s1, i32 1
|
|
%v2 = insertelement <4 x i32> %v1, i32 %s2, i32 2
|
|
%v3 = insertelement <4 x i32> %v2, i32 %s3, i32 3
|
|
%out = mul nsw <4 x i32> %v3, %ext.b
|
|
ret <4 x i32> %out
|
|
}
|
|
|
|
define void @typei1_orig(i64 %a, i8* %p, <8 x i16>* %q) {
|
|
; CHECK-LABEL: typei1_orig:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: cmp x0, #0
|
|
; CHECK-NEXT: ldr q0, [x2]
|
|
; CHECK-NEXT: cset w8, gt
|
|
; CHECK-NEXT: neg v0.8h, v0.8h
|
|
; CHECK-NEXT: dup v1.8h, w8
|
|
; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h
|
|
; CHECK-NEXT: movi v1.2d, #0000000000000000
|
|
; CHECK-NEXT: cmtst v0.8h, v0.8h, v0.8h
|
|
; CHECK-NEXT: xtn v0.8b, v0.8h
|
|
; CHECK-NEXT: mov v0.d[1], v1.d[0]
|
|
; CHECK-NEXT: str q0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%tmp = xor <16 x i1> zeroinitializer, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
|
|
%tmp6 = load <8 x i16>, <8 x i16>* %q, align 2
|
|
%tmp7 = sub <8 x i16> zeroinitializer, %tmp6
|
|
%tmp8 = shufflevector <8 x i16> %tmp7, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%tmp9 = icmp slt i64 0, %a
|
|
%tmp10 = zext i1 %tmp9 to i16
|
|
%tmp11 = insertelement <16 x i16> undef, i16 %tmp10, i64 0
|
|
%tmp12 = shufflevector <16 x i16> %tmp11, <16 x i16> undef, <16 x i32> zeroinitializer
|
|
%tmp13 = mul nuw <16 x i16> %tmp8, %tmp12
|
|
%tmp14 = icmp ne <16 x i16> %tmp13, zeroinitializer
|
|
%tmp15 = and <16 x i1> %tmp14, %tmp
|
|
%tmp16 = sext <16 x i1> %tmp15 to <16 x i8>
|
|
%tmp17 = bitcast i8* %p to <16 x i8>*
|
|
store <16 x i8> %tmp16, <16 x i8>* %tmp17, align 1
|
|
ret void
|
|
}
|
|
|
|
define <8 x i16> @typei1_v8i1_v8i16(i1 %src, <8 x i1> %b) {
|
|
; CHECK-LABEL: typei1_v8i1_v8i16:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: movi v1.8b, #1
|
|
; CHECK-NEXT: and w8, w0, #0x1
|
|
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
|
|
; CHECK-NEXT: dup v1.8h, w8
|
|
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
|
|
; CHECK-NEXT: mul v0.8h, v1.8h, v0.8h
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%in = zext i1 %src to i16
|
|
%ext.b = zext <8 x i1> %b to <8 x i16>
|
|
%broadcast.splatinsert = insertelement <8 x i16> undef, i16 %in, i16 0
|
|
%broadcast.splat = shufflevector <8 x i16> %broadcast.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
|
|
%out = mul nsw <8 x i16> %broadcast.splat, %ext.b
|
|
ret <8 x i16> %out
|
|
}
|
|
|
|
define <8 x i16> @missing_insert(<8 x i8> %b) {
|
|
; CHECK-LABEL: missing_insert:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
|
|
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #4
|
|
; CHECK-NEXT: mul v0.8h, v1.8h, v0.8h
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%ext.b = sext <8 x i8> %b to <8 x i16>
|
|
%broadcast.splat = shufflevector <8 x i16> %ext.b, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1>
|
|
%out = mul nsw <8 x i16> %broadcast.splat, %ext.b
|
|
ret <8 x i16> %out
|
|
}
|