[AArch64] Add tests for shuffle (tbl2, tbl2) -> tbl4 fold.

Add extra tests where shuffle (tbl2, tbl2) can be folded to tbl4.
Regenerate check lines automatically as well.
This commit is contained in:
Florian Hahn 2022-09-08 14:01:11 +01:00
parent 52f7eb3151
commit 7d4ee32662
No known key found for this signature in database
GPG Key ID: CF59919C6547A668
1 changed files with 205 additions and 32 deletions

View File

@ -1,61 +1,200 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
define <8 x i8> @tbl1_8b(<16 x i8> %A, <8 x i8> %B) nounwind {
; CHECK: tbl1_8b
; CHECK: tbl.8b
; CHECK-LABEL: tbl1_8b:
; CHECK: // %bb.0:
; CHECK-NEXT: tbl.8b v0, { v0 }, v1
; CHECK-NEXT: ret
%tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %A, <8 x i8> %B)
ret <8 x i8> %tmp3
}
define <16 x i8> @tbl1_16b(<16 x i8> %A, <16 x i8> %B) nounwind {
; CHECK: tbl1_16b
; CHECK: tbl.16b
; CHECK-LABEL: tbl1_16b:
; CHECK: // %bb.0:
; CHECK-NEXT: tbl.16b v0, { v0 }, v1
; CHECK-NEXT: ret
%tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %A, <16 x i8> %B)
ret <16 x i8> %tmp3
}
define <8 x i8> @tbl2_8b(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) {
; CHECK: tbl2_8b
; CHECK: tbl.8b
; CHECK-LABEL: tbl2_8b:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-NEXT: tbl.8b v0, { v0, v1 }, v2
; CHECK-NEXT: ret
%tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C)
ret <8 x i8> %tmp3
}
define <16 x i8> @tbl2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) {
; CHECK: tbl2_16b
; CHECK: tbl.16b
; CHECK-LABEL: tbl2_16b:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2
; CHECK-NEXT: ret
%tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C)
ret <16 x i8> %tmp3
}
define <8 x i8> @tbl3_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) {
; CHECK: tbl3_8b
; CHECK: tbl.8b
; CHECK-LABEL: tbl3_8b:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-NEXT: tbl.8b v0, { v0, v1, v2 }, v3
; CHECK-NEXT: ret
%tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D)
ret <8 x i8> %tmp3
}
define <16 x i8> @tbl3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) {
; CHECK: tbl3_16b
; CHECK: tbl.16b
; CHECK-LABEL: tbl3_16b:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-NEXT: tbl.16b v0, { v0, v1, v2 }, v3
; CHECK-NEXT: ret
%tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D)
ret <16 x i8> %tmp3
}
define <8 x i8> @tbl4_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) {
; CHECK: tbl4_8b
; CHECK: tbl.8b
; CHECK-LABEL: tbl4_8b:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-NEXT: tbl.8b v0, { v0, v1, v2, v3 }, v4
; CHECK-NEXT: ret
%tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E)
ret <8 x i8> %tmp3
}
define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) {
; CHECK: tbl4_16b
; CHECK: tbl.16b
; CHECK-LABEL: tbl4_16b:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
; CHECK-NEXT: ret
%tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E)
ret <16 x i8> %tmp3
}
; CHECK-LABEL: .LCPI8_0:
; CHECK-NEXT: .byte 0 // 0x0
; CHECK-NEXT: .byte 4 // 0x4
; CHECK-NEXT: .byte 8 // 0x8
; CHECK-NEXT: .byte 12 // 0xc
; CHECK-NEXT: .byte 16 // 0x10
; CHECK-NEXT: .byte 20 // 0x14
; CHECK-NEXT: .byte 24 // 0x18
; CHECK-NEXT: .byte 28 // 0x1c
; CHECK-NEXT: .byte 255 // 0xff
; CHECK-NEXT: .byte 255 // 0xff
; CHECK-NEXT: .byte 255 // 0xff
; CHECK-NEXT: .byte 255 // 0xff
; CHECK-NEXT: .byte 255 // 0xff
; CHECK-NEXT: .byte 255 // 0xff
; CHECK-NEXT: .byte 255 // 0xff
; CHECK-NEXT: .byte 255 // 0xff
define <16 x i8> @shuffled_tbl2_to_tbl4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
; CHECK-LABEL: shuffled_tbl2_to_tbl4:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI8_0
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI8_0]
; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v4
; CHECK-NEXT: tbl.16b v1, { v2, v3 }, v4
; CHECK-NEXT: mov.d v0[1], v1[0]
; CHECK-NEXT: ret
%t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
%t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
%s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
ret <16 x i8> %s
}
define <16 x i8> @shuffled_tbl2_to_tbl4_incompatible_shuffle(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
; CHECK-LABEL: shuffled_tbl2_to_tbl4_incompatible_shuffle:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI9_0
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI9_0]
; CHECK-NEXT: adrp x8, .LCPI9_1
; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v4
; CHECK-NEXT: tbl.16b v1, { v2, v3 }, v4
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI9_1]
; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2
; CHECK-NEXT: ret
%t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
%t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
%s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
ret <16 x i8> %s
}
define <16 x i8> @shuffled_tbl2_to_tbl4_incompatible_tbl2_mask1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
; CHECK-LABEL: shuffled_tbl2_to_tbl4_incompatible_tbl2_mask1:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI10_0
; CHECK-NEXT: adrp x9, .LCPI10_1
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI10_0]
; CHECK-NEXT: adrp x8, .LCPI10_2
; CHECK-NEXT: ldr q5, [x9, :lo12:.LCPI10_1]
; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v4
; CHECK-NEXT: tbl.16b v1, { v2, v3 }, v5
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI10_2]
; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2
; CHECK-NEXT: ret
%t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
%t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
%s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
ret <16 x i8> %s
}
define <16 x i8> @shuffled_tbl2_to_tbl4_incompatible_tbl2_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
; CHECK-LABEL: shuffled_tbl2_to_tbl4_incompatible_tbl2_mask2:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI11_0
; CHECK-NEXT: adrp x9, .LCPI11_1
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI11_0]
; CHECK-NEXT: adrp x8, .LCPI11_2
; CHECK-NEXT: ldr q5, [x9, :lo12:.LCPI11_1]
; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v4
; CHECK-NEXT: tbl.16b v1, { v2, v3 }, v5
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI11_2]
; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2
; CHECK-NEXT: ret
%t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
%t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
%s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
ret <16 x i8> %s
}
declare <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8>, <8 x i8>) nounwind readnone
declare <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
@ -66,57 +205,91 @@ declare <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <
declare <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
define <8 x i8> @tbx1_8b(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C) nounwind {
; CHECK: tbx1_8b
; CHECK: tbx.8b
; CHECK-LABEL: tbx1_8b:
; CHECK: // %bb.0:
; CHECK-NEXT: tbx.8b v0, { v1 }, v2
; CHECK-NEXT: ret
%tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C)
ret <8 x i8> %tmp3
}
define <16 x i8> @tbx1_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) nounwind {
; CHECK: tbx1_16b
; CHECK: tbx.16b
; CHECK-LABEL: tbx1_16b:
; CHECK: // %bb.0:
; CHECK-NEXT: tbx.16b v0, { v1 }, v2
; CHECK-NEXT: ret
%tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C)
ret <16 x i8> %tmp3
}
define <8 x i8> @tbx2_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) {
; CHECK: tbx2_8b
; CHECK: tbx.8b
; CHECK-LABEL: tbx2_8b:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2
; CHECK-NEXT: tbx.8b v0, { v1, v2 }, v3
; CHECK-NEXT: ret
%tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D)
ret <8 x i8> %tmp3
}
define <16 x i8> @tbx2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) {
; CHECK: tbx2_16b
; CHECK: tbx.16b
; CHECK-LABEL: tbx2_16b:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2
; CHECK-NEXT: tbx.16b v0, { v1, v2 }, v3
; CHECK-NEXT: ret
%tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D)
ret <16 x i8> %tmp3
}
define <8 x i8> @tbx3_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) {
; CHECK: tbx3_8b
; CHECK: tbx.8b
; CHECK-LABEL: tbx3_8b:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3
; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3
; CHECK-NEXT: tbx.8b v0, { v1, v2, v3 }, v4
; CHECK-NEXT: ret
%tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(< 8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E)
ret <8 x i8> %tmp3
}
define <16 x i8> @tbx3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) {
; CHECK: tbx3_16b
; CHECK: tbx.16b
; CHECK-LABEL: tbx3_16b:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3
; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3
; CHECK-NEXT: tbx.16b v0, { v1, v2, v3 }, v4
; CHECK-NEXT: ret
%tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E)
ret <16 x i8> %tmp3
}
define <8 x i8> @tbx4_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) {
; CHECK: tbx4_8b
; CHECK: tbx.8b
; CHECK-LABEL: tbx4_8b:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
; CHECK-NEXT: tbx.8b v0, { v1, v2, v3, v4 }, v5
; CHECK-NEXT: ret
%tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F)
ret <8 x i8> %tmp3
}
define <16 x i8> @tbx4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) {
; CHECK: tbx4_16b
; CHECK: tbx.16b
; CHECK-LABEL: tbx4_16b:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
; CHECK-NEXT: tbx.16b v0, { v1, v2, v3, v4 }, v5
; CHECK-NEXT: ret
%tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F)
ret <16 x i8> %tmp3
}