3026 lines
193 KiB
LLVM
3026 lines
193 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=avx2 | FileCheck %s --check-prefix=AVX2 --check-prefix=X86 --check-prefix=X86-AVX
|
|
; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq -show-mc-encoding | FileCheck %s --check-prefix=AVX512VL --check-prefix=X86 --check-prefix=X86-AVX512VL
|
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx2 | FileCheck %s --check-prefix=AVX2 --check-prefix=X64 --check-prefix=X64-AVX
|
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq -show-mc-encoding | FileCheck %s --check-prefix=AVX512VL --check-prefix=X64 --check-prefix=X64-AVX512VL
|
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW
|
|
; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F-32
|
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw -mattr=+avx512vl --show-mc-encoding | FileCheck %s
|
|
; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=-avx,+sse2 | FileCheck %s --check-prefix=SSE
|
|
|
|
define <32 x i8> @test_x86_avx2_padds_b(<32 x i8> %a0, <32 x i8> %a1) {
|
|
; AVX2-LABEL: test_x86_avx2_padds_b:
|
|
; AVX2: ## %bb.0:
|
|
; AVX2-NEXT: vpaddsb %ymm1, %ymm0, %ymm0
|
|
; AVX2-NEXT: ret{{[l|q]}}
|
|
;
|
|
; AVX512VL-LABEL: test_x86_avx2_padds_b:
|
|
; AVX512VL: ## %bb.0:
|
|
; AVX512VL-NEXT: vpaddsb %ymm1, %ymm0, %ymm0
|
|
; AVX512VL-NEXT: ret{{[l|q]}}
|
|
%1 = sext <32 x i8> %a0 to <32 x i16>
|
|
%2 = sext <32 x i8> %a1 to <32 x i16>
|
|
%3 = add nsw <32 x i16> %1, %2
|
|
%4 = icmp slt <32 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%6 = icmp sgt <32 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%7 = select <32 x i1> %6, <32 x i16> %5, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%8 = trunc <32 x i16> %7 to <32 x i8>
|
|
ret <32 x i8> %8
|
|
}
|
|
|
|
|
|
define <16 x i16> @test_x86_avx2_padds_w(<16 x i16> %a0, <16 x i16> %a1) {
|
|
; AVX2-LABEL: test_x86_avx2_padds_w:
|
|
; AVX2: ## %bb.0:
|
|
; AVX2-NEXT: vpaddsw %ymm1, %ymm0, %ymm0
|
|
; AVX2-NEXT: ret{{[l|q]}}
|
|
;
|
|
; AVX512VL-LABEL: test_x86_avx2_padds_w:
|
|
; AVX512VL: ## %bb.0:
|
|
; AVX512VL-NEXT: vpaddsw %ymm1, %ymm0, %ymm0
|
|
; AVX512VL-NEXT: ret{{[l|q]}}
|
|
%1 = sext <16 x i16> %a0 to <16 x i32>
|
|
%2 = sext <16 x i16> %a1 to <16 x i32>
|
|
%3 = add nsw <16 x i32> %1, %2
|
|
%4 = icmp slt <16 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <16 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <16 x i1> %6, <16 x i32> %5, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <16 x i32> %7 to <16 x i16>
|
|
ret <16 x i16> %8
|
|
}
|
|
|
|
|
|
define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
|
|
; X86-AVX-LABEL: test_mask_adds_epi16_rr_512:
|
|
; X86-AVX: ## %bb.0:
|
|
; X86-AVX-NEXT: vpaddsw %ymm2, %ymm0, %ymm0
|
|
; X86-AVX-NEXT: vpaddsw %ymm3, %ymm1, %ymm1
|
|
; X86-AVX-NEXT: retl
|
|
;
|
|
; X86-AVX512VL-LABEL: test_mask_adds_epi16_rr_512:
|
|
; X86-AVX512VL: ## %bb.0:
|
|
; X86-AVX512VL-NEXT: vpaddsw %zmm1, %zmm0, %zmm0
|
|
; X86-AVX512VL-NEXT: retl
|
|
;
|
|
; X64-AVX-LABEL: test_mask_adds_epi16_rr_512:
|
|
; X64-AVX: ## %bb.0:
|
|
; X64-AVX-NEXT: vpaddsw %ymm2, %ymm0, %ymm0
|
|
; X64-AVX-NEXT: vpaddsw %ymm3, %ymm1, %ymm1
|
|
; X64-AVX-NEXT: retq
|
|
;
|
|
; X64-AVX512VL-LABEL: test_mask_adds_epi16_rr_512:
|
|
; X64-AVX512VL: ## %bb.0:
|
|
; X64-AVX512VL-NEXT: vpaddsw %zmm1, %zmm0, %zmm0
|
|
; X64-AVX512VL-NEXT: retq
|
|
%1 = sext <32 x i16> %a to <32 x i32>
|
|
%2 = sext <32 x i16> %b to <32 x i32>
|
|
%3 = add nsw <32 x i32> %1, %2
|
|
%4 = icmp slt <32 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <32 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <32 x i1> %6, <32 x i32> %5, <32 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <32 x i32> %7 to <32 x i16>
|
|
ret <32 x i16> %8
|
|
}
|
|
|
|
|
|
define <32 x i8> @test_x86_avx2_paddus_b(<32 x i8> %a0, <32 x i8> %a1) {
|
|
; AVX2-LABEL: test_x86_avx2_paddus_b:
|
|
; AVX2: ## %bb.0:
|
|
; AVX2-NEXT: vpaddusb %ymm1, %ymm0, %ymm0
|
|
; AVX2-NEXT: ret{{[l|q]}}
|
|
;
|
|
; AVX512VL-LABEL: test_x86_avx2_paddus_b:
|
|
; AVX512VL: ## %bb.0:
|
|
; AVX512VL-NEXT: vpaddusb %ymm1, %ymm0, %ymm0
|
|
; AVX512VL-NEXT: ret{{[l|q]}}
|
|
%1 = zext <32 x i8> %a0 to <32 x i16>
|
|
%2 = zext <32 x i8> %a1 to <32 x i16>
|
|
%3 = add nsw <32 x i16> %1, %2
|
|
%4 = icmp ult <32 x i16> %3, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
|
|
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
|
|
%6 = trunc <32 x i16> %5 to <32 x i8>
|
|
ret <32 x i8> %6
|
|
}
|
|
|
|
|
|
define <16 x i16> @test_x86_avx2_paddus_w(<16 x i16> %a0, <16 x i16> %a1) {
|
|
; AVX2-LABEL: test_x86_avx2_paddus_w:
|
|
; AVX2: ## %bb.0:
|
|
; AVX2-NEXT: vpaddusw %ymm1, %ymm0, %ymm0
|
|
; AVX2-NEXT: ret{{[l|q]}}
|
|
;
|
|
; AVX512VL-LABEL: test_x86_avx2_paddus_w:
|
|
; AVX512VL: ## %bb.0:
|
|
; AVX512VL-NEXT: vpaddusw %ymm1, %ymm0, %ymm0
|
|
; AVX512VL-NEXT: ret{{[l|q]}}
|
|
%1 = zext <16 x i16> %a0 to <16 x i32>
|
|
%2 = zext <16 x i16> %a1 to <16 x i32>
|
|
%3 = add nsw <16 x i32> %1, %2
|
|
%4 = icmp ult <16 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%6 = trunc <16 x i32> %5 to <16 x i16>
|
|
ret <16 x i16> %6
|
|
}
|
|
|
|
|
|
define <32 x i16> @test_mask_adds_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
|
|
; X86-AVX-LABEL: test_mask_adds_epu16_rr_512:
|
|
; X86-AVX: ## %bb.0:
|
|
; X86-AVX-NEXT: vpaddusw %ymm2, %ymm0, %ymm0
|
|
; X86-AVX-NEXT: vpaddusw %ymm3, %ymm1, %ymm1
|
|
; X86-AVX-NEXT: retl
|
|
;
|
|
; X86-AVX512VL-LABEL: test_mask_adds_epu16_rr_512:
|
|
; X86-AVX512VL: ## %bb.0:
|
|
; X86-AVX512VL-NEXT: vpaddusw %zmm1, %zmm0, %zmm0
|
|
; X86-AVX512VL-NEXT: retl
|
|
;
|
|
; X64-AVX-LABEL: test_mask_adds_epu16_rr_512:
|
|
; X64-AVX: ## %bb.0:
|
|
; X64-AVX-NEXT: vpaddusw %ymm2, %ymm0, %ymm0
|
|
; X64-AVX-NEXT: vpaddusw %ymm3, %ymm1, %ymm1
|
|
; X64-AVX-NEXT: retq
|
|
;
|
|
; X64-AVX512VL-LABEL: test_mask_adds_epu16_rr_512:
|
|
; X64-AVX512VL: ## %bb.0:
|
|
; X64-AVX512VL-NEXT: vpaddusw %zmm1, %zmm0, %zmm0
|
|
; X64-AVX512VL-NEXT: retq
|
|
%1 = zext <32 x i16> %a to <32 x i32>
|
|
%2 = zext <32 x i16> %b to <32 x i32>
|
|
%3 = add nsw <32 x i32> %1, %2
|
|
%4 = icmp ult <32 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%6 = trunc <32 x i32> %5 to <32 x i16>
|
|
ret <32 x i16> %6
|
|
}
|
|
|
|
define <32 x i8> @test_x86_avx2_psubs_b(<32 x i8> %a0, <32 x i8> %a1) {
|
|
; AVX2-LABEL: test_x86_avx2_psubs_b:
|
|
; AVX2: ## %bb.0:
|
|
; AVX2-NEXT: vpsubsb %ymm1, %ymm0, %ymm0
|
|
; AVX2-NEXT: ret{{[l|q]}}
|
|
;
|
|
; AVX512VL-LABEL: test_x86_avx2_psubs_b:
|
|
; AVX512VL: ## %bb.0:
|
|
; AVX512VL-NEXT: vpsubsb %ymm1, %ymm0, %ymm0
|
|
; AVX512VL-NEXT: ret{{[l|q]}}
|
|
%1 = sext <32 x i8> %a0 to <32 x i16>
|
|
%2 = sext <32 x i8> %a1 to <32 x i16>
|
|
%3 = sub nsw <32 x i16> %1, %2
|
|
%4 = icmp slt <32 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%6 = icmp sgt <32 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%7 = select <32 x i1> %6, <32 x i16> %5, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%8 = trunc <32 x i16> %7 to <32 x i8>
|
|
ret <32 x i8> %8
|
|
}
|
|
|
|
|
|
define <16 x i16> @test_x86_avx2_psubs_w(<16 x i16> %a0, <16 x i16> %a1) {
|
|
; AVX2-LABEL: test_x86_avx2_psubs_w:
|
|
; AVX2: ## %bb.0:
|
|
; AVX2-NEXT: vpsubsw %ymm1, %ymm0, %ymm0
|
|
; AVX2-NEXT: ret{{[l|q]}}
|
|
;
|
|
; AVX512VL-LABEL: test_x86_avx2_psubs_w:
|
|
; AVX512VL: ## %bb.0:
|
|
; AVX512VL-NEXT: vpsubsw %ymm1, %ymm0, %ymm0
|
|
; AVX512VL-NEXT: ret{{[l|q]}}
|
|
%1 = sext <16 x i16> %a0 to <16 x i32>
|
|
%2 = sext <16 x i16> %a1 to <16 x i32>
|
|
%3 = sub nsw <16 x i32> %1, %2
|
|
%4 = icmp slt <16 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <16 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <16 x i1> %6, <16 x i32> %5, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <16 x i32> %7 to <16 x i16>
|
|
ret <16 x i16> %8
|
|
}
|
|
|
|
|
|
define <32 x i16> @test_mask_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
|
|
; X86-AVX-LABEL: test_mask_subs_epi16_rr_512:
|
|
; X86-AVX: ## %bb.0:
|
|
; X86-AVX-NEXT: vpsubsw %ymm2, %ymm0, %ymm0
|
|
; X86-AVX-NEXT: vpsubsw %ymm3, %ymm1, %ymm1
|
|
; X86-AVX-NEXT: retl
|
|
;
|
|
; X86-AVX512VL-LABEL: test_mask_subs_epi16_rr_512:
|
|
; X86-AVX512VL: ## %bb.0:
|
|
; X86-AVX512VL-NEXT: vpsubsw %zmm1, %zmm0, %zmm0
|
|
; X86-AVX512VL-NEXT: retl
|
|
;
|
|
; X64-AVX-LABEL: test_mask_subs_epi16_rr_512:
|
|
; X64-AVX: ## %bb.0:
|
|
; X64-AVX-NEXT: vpsubsw %ymm2, %ymm0, %ymm0
|
|
; X64-AVX-NEXT: vpsubsw %ymm3, %ymm1, %ymm1
|
|
; X64-AVX-NEXT: retq
|
|
;
|
|
; X64-AVX512VL-LABEL: test_mask_subs_epi16_rr_512:
|
|
; X64-AVX512VL: ## %bb.0:
|
|
; X64-AVX512VL-NEXT: vpsubsw %zmm1, %zmm0, %zmm0
|
|
; X64-AVX512VL-NEXT: retq
|
|
%1 = sext <32 x i16> %a to <32 x i32>
|
|
%2 = sext <32 x i16> %b to <32 x i32>
|
|
%3 = sub nsw <32 x i32> %1, %2
|
|
%4 = icmp slt <32 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <32 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <32 x i1> %6, <32 x i32> %5, <32 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <32 x i32> %7 to <32 x i16>
|
|
ret <32 x i16> %8
|
|
}
|
|
|
|
|
|
define <32 x i8> @test_x86_avx2_psubus_b(<32 x i8> %a0, <32 x i8> %a1) {
|
|
; AVX2-LABEL: test_x86_avx2_psubus_b:
|
|
; AVX2: ## %bb.0:
|
|
; AVX2-NEXT: vpsubusb %ymm1, %ymm0, %ymm0
|
|
; AVX2-NEXT: ret{{[l|q]}}
|
|
;
|
|
; AVX512VL-LABEL: test_x86_avx2_psubus_b:
|
|
; AVX512VL: ## %bb.0:
|
|
; AVX512VL-NEXT: vpsubusb %ymm1, %ymm0, %ymm0
|
|
; AVX512VL-NEXT: ret{{[l|q]}}
|
|
%cmp = icmp ugt <32 x i8> %a0, %a1
|
|
%sel = select <32 x i1> %cmp, <32 x i8> %a0, <32 x i8> %a1
|
|
%sub = sub <32 x i8> %sel, %a1
|
|
ret <32 x i8> %sub
|
|
}
|
|
|
|
|
|
define <16 x i16> @test_x86_avx2_psubus_w(<16 x i16> %a0, <16 x i16> %a1) {
|
|
; AVX2-LABEL: test_x86_avx2_psubus_w:
|
|
; AVX2: ## %bb.0:
|
|
; AVX2-NEXT: vpsubusw %ymm1, %ymm0, %ymm0
|
|
; AVX2-NEXT: ret{{[l|q]}}
|
|
;
|
|
; AVX512VL-LABEL: test_x86_avx2_psubus_w:
|
|
; AVX512VL: ## %bb.0:
|
|
; AVX512VL-NEXT: vpsubusw %ymm1, %ymm0, %ymm0
|
|
; AVX512VL-NEXT: ret{{[l|q]}}
|
|
%cmp = icmp ugt <16 x i16> %a0, %a1
|
|
%sel = select <16 x i1> %cmp, <16 x i16> %a0, <16 x i16> %a1
|
|
%sub = sub <16 x i16> %sel, %a1
|
|
ret <16 x i16> %sub
|
|
}
|
|
|
|
|
|
define <32 x i16> @test_mask_subs_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
|
|
; X86-AVX-LABEL: test_mask_subs_epu16_rr_512:
|
|
; X86-AVX: ## %bb.0:
|
|
; X86-AVX-NEXT: vpsubusw %ymm2, %ymm0, %ymm0
|
|
; X86-AVX-NEXT: vpsubusw %ymm3, %ymm1, %ymm1
|
|
; X86-AVX-NEXT: retl
|
|
;
|
|
; X86-AVX512VL-LABEL: test_mask_subs_epu16_rr_512:
|
|
; X86-AVX512VL: ## %bb.0:
|
|
; X86-AVX512VL-NEXT: vpsubusw %zmm1, %zmm0, %zmm0
|
|
; X86-AVX512VL-NEXT: retl
|
|
;
|
|
; X64-AVX-LABEL: test_mask_subs_epu16_rr_512:
|
|
; X64-AVX: ## %bb.0:
|
|
; X64-AVX-NEXT: vpsubusw %ymm2, %ymm0, %ymm0
|
|
; X64-AVX-NEXT: vpsubusw %ymm3, %ymm1, %ymm1
|
|
; X64-AVX-NEXT: retq
|
|
;
|
|
; X64-AVX512VL-LABEL: test_mask_subs_epu16_rr_512:
|
|
; X64-AVX512VL: ## %bb.0:
|
|
; X64-AVX512VL-NEXT: vpsubusw %zmm1, %zmm0, %zmm0
|
|
; X64-AVX512VL-NEXT: retq
|
|
%cmp = icmp ugt <32 x i16> %a, %b
|
|
%sel = select <32 x i1> %cmp, <32 x i16> %a, <32 x i16> %b
|
|
%sub = sub <32 x i16> %sel, %b
|
|
ret <32 x i16> %sub
|
|
}
|
|
|
|
define <32 x i16> @test_mask_adds_epi16_rr_512_avx512(<32 x i16> %a, <32 x i16> %b) {
|
|
; AVX512BW-LABEL: test_mask_adds_epi16_rr_512_avx512:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm0
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512F-32-LABEL: test_mask_adds_epi16_rr_512_avx512:
|
|
; AVX512F-32: # %bb.0:
|
|
; AVX512F-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm0
|
|
; AVX512F-32-NEXT: retl
|
|
%1 = sext <32 x i16> %a to <32 x i32>
|
|
%2 = sext <32 x i16> %b to <32 x i32>
|
|
%3 = add nsw <32 x i32> %1, %2
|
|
%4 = icmp slt <32 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <32 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <32 x i1> %6, <32 x i32> %5, <32 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <32 x i32> %7 to <32 x i16>
|
|
ret <32 x i16> %8
|
|
}
|
|
|
|
define <32 x i16> @test_mask_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
|
|
; AVX512BW-LABEL: test_mask_adds_epi16_rrk_512:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: kmovd %edi, %k1
|
|
; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1}
|
|
; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512F-32-LABEL: test_mask_adds_epi16_rrk_512:
|
|
; AVX512F-32: # %bb.0:
|
|
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
|
; AVX512F-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1}
|
|
; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
|
|
; AVX512F-32-NEXT: retl
|
|
%1 = sext <32 x i16> %a to <32 x i32>
|
|
%2 = sext <32 x i16> %b to <32 x i32>
|
|
%3 = add nsw <32 x i32> %1, %2
|
|
%4 = icmp slt <32 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <32 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <32 x i1> %6, <32 x i32> %5, <32 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <32 x i32> %7 to <32 x i16>
|
|
%9 = bitcast i32 %mask to <32 x i1>
|
|
%10 = select <32 x i1> %9, <32 x i16> %8, <32 x i16> %passThru
|
|
ret <32 x i16> %10
|
|
}
|
|
|
|
define <32 x i16> @test_mask_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
|
|
; AVX512BW-LABEL: test_mask_adds_epi16_rrkz_512:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: kmovd %edi, %k1
|
|
; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z}
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512F-32-LABEL: test_mask_adds_epi16_rrkz_512:
|
|
; AVX512F-32: # %bb.0:
|
|
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
|
; AVX512F-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z}
|
|
; AVX512F-32-NEXT: retl
|
|
%1 = sext <32 x i16> %a to <32 x i32>
|
|
%2 = sext <32 x i16> %b to <32 x i32>
|
|
%3 = add nsw <32 x i32> %1, %2
|
|
%4 = icmp slt <32 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <32 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <32 x i1> %6, <32 x i32> %5, <32 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <32 x i32> %7 to <32 x i16>
|
|
%9 = bitcast i32 %mask to <32 x i1>
|
|
%10 = select <32 x i1> %9, <32 x i16> %8, <32 x i16> zeroinitializer
|
|
ret <32 x i16> %10
|
|
}
|
|
|
|
define <32 x i16> @test_mask_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
|
|
; AVX512BW-LABEL: test_mask_adds_epi16_rm_512:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: vpaddsw (%rdi), %zmm0, %zmm0
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512F-32-LABEL: test_mask_adds_epi16_rm_512:
|
|
; AVX512F-32: # %bb.0:
|
|
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; AVX512F-32-NEXT: vpaddsw (%eax), %zmm0, %zmm0
|
|
; AVX512F-32-NEXT: retl
|
|
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
|
%1 = sext <32 x i16> %a to <32 x i32>
|
|
%2 = sext <32 x i16> %b to <32 x i32>
|
|
%3 = add nsw <32 x i32> %1, %2
|
|
%4 = icmp slt <32 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <32 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <32 x i1> %6, <32 x i32> %5, <32 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <32 x i32> %7 to <32 x i16>
|
|
ret <32 x i16> %8
|
|
}
|
|
|
|
define <32 x i16> @test_mask_adds_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
|
|
; AVX512BW-LABEL: test_mask_adds_epi16_rmk_512:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: kmovd %esi, %k1
|
|
; AVX512BW-NEXT: vpaddsw (%rdi), %zmm0, %zmm1 {%k1}
|
|
; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512F-32-LABEL: test_mask_adds_epi16_rmk_512:
|
|
; AVX512F-32: # %bb.0:
|
|
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
|
; AVX512F-32-NEXT: vpaddsw (%eax), %zmm0, %zmm1 {%k1}
|
|
; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
|
|
; AVX512F-32-NEXT: retl
|
|
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
|
%1 = sext <32 x i16> %a to <32 x i32>
|
|
%2 = sext <32 x i16> %b to <32 x i32>
|
|
%3 = add nsw <32 x i32> %1, %2
|
|
%4 = icmp slt <32 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <32 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <32 x i1> %6, <32 x i32> %5, <32 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <32 x i32> %7 to <32 x i16>
|
|
%9 = bitcast i32 %mask to <32 x i1>
|
|
%10 = select <32 x i1> %9, <32 x i16> %8, <32 x i16> %passThru
|
|
ret <32 x i16> %10
|
|
}
|
|
|
|
define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
|
|
; AVX512BW-LABEL: test_mask_adds_epi16_rmkz_512:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: kmovd %esi, %k1
|
|
; AVX512BW-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z}
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512F-32-LABEL: test_mask_adds_epi16_rmkz_512:
|
|
; AVX512F-32: # %bb.0:
|
|
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
|
; AVX512F-32-NEXT: vpaddsw (%eax), %zmm0, %zmm0 {%k1} {z}
|
|
; AVX512F-32-NEXT: retl
|
|
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
|
%1 = sext <32 x i16> %a to <32 x i32>
|
|
%2 = sext <32 x i16> %b to <32 x i32>
|
|
%3 = add nsw <32 x i32> %1, %2
|
|
%4 = icmp slt <32 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <32 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <32 x i1> %6, <32 x i32> %5, <32 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <32 x i32> %7 to <32 x i16>
|
|
%9 = bitcast i32 %mask to <32 x i1>
|
|
%10 = select <32 x i1> %9, <32 x i16> %8, <32 x i16> zeroinitializer
|
|
ret <32 x i16> %10
|
|
}
|
|
|
|
define <64 x i16> @test_mask_adds_epi16_rr_1024(<64 x i16> %a, <64 x i16> %b) {
|
|
; AVX512BW-LABEL: test_mask_adds_epi16_rr_1024:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: vpaddsw %zmm2, %zmm0, %zmm0
|
|
; AVX512BW-NEXT: vpaddsw %zmm3, %zmm1, %zmm1
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512F-32-LABEL: test_mask_adds_epi16_rr_1024:
|
|
; AVX512F-32: # %bb.0:
|
|
; AVX512F-32-NEXT: pushl %ebp
|
|
; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
|
|
; AVX512F-32-NEXT: .cfi_offset %ebp, -8
|
|
; AVX512F-32-NEXT: movl %esp, %ebp
|
|
; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
|
|
; AVX512F-32-NEXT: andl $-64, %esp
|
|
; AVX512F-32-NEXT: subl $64, %esp
|
|
; AVX512F-32-NEXT: vpaddsw %zmm2, %zmm0, %zmm0
|
|
; AVX512F-32-NEXT: vpaddsw 8(%ebp), %zmm1, %zmm1
|
|
; AVX512F-32-NEXT: movl %ebp, %esp
|
|
; AVX512F-32-NEXT: popl %ebp
|
|
; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
|
|
; AVX512F-32-NEXT: retl
|
|
%1 = sext <64 x i16> %a to <64 x i32>
|
|
%2 = sext <64 x i16> %b to <64 x i32>
|
|
%3 = add nsw <64 x i32> %1, %2
|
|
%4 = icmp slt <64 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <64 x i1> %4, <64 x i32> %3, <64 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <64 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <64 x i1> %6, <64 x i32> %5, <64 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <64 x i32> %7 to <64 x i16>
|
|
ret <64 x i16> %8
|
|
}
|
|
|
|
define <32 x i16> @test_mask_subs_epi16_rr_512_avx512(<32 x i16> %a, <32 x i16> %b) {
|
|
; AVX512BW-LABEL: test_mask_subs_epi16_rr_512_avx512:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm0
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512F-32-LABEL: test_mask_subs_epi16_rr_512_avx512:
|
|
; AVX512F-32: # %bb.0:
|
|
; AVX512F-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm0
|
|
; AVX512F-32-NEXT: retl
|
|
%1 = sext <32 x i16> %a to <32 x i32>
|
|
%2 = sext <32 x i16> %b to <32 x i32>
|
|
%3 = sub nsw <32 x i32> %1, %2
|
|
%4 = icmp slt <32 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <32 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <32 x i1> %6, <32 x i32> %5, <32 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <32 x i32> %7 to <32 x i16>
|
|
ret <32 x i16> %8
|
|
}
|
|
|
|
define <32 x i16> @test_mask_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
|
|
; AVX512BW-LABEL: test_mask_subs_epi16_rrk_512:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: kmovd %edi, %k1
|
|
; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1}
|
|
; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512F-32-LABEL: test_mask_subs_epi16_rrk_512:
|
|
; AVX512F-32: # %bb.0:
|
|
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
|
; AVX512F-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1}
|
|
; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
|
|
; AVX512F-32-NEXT: retl
|
|
%1 = sext <32 x i16> %a to <32 x i32>
|
|
%2 = sext <32 x i16> %b to <32 x i32>
|
|
%3 = sub nsw <32 x i32> %1, %2
|
|
%4 = icmp slt <32 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <32 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <32 x i1> %6, <32 x i32> %5, <32 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <32 x i32> %7 to <32 x i16>
|
|
%9 = bitcast i32 %mask to <32 x i1>
|
|
%10 = select <32 x i1> %9, <32 x i16> %8, <32 x i16> %passThru
|
|
ret <32 x i16> %10
|
|
}
|
|
|
|
define <32 x i16> @test_mask_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
|
|
; AVX512BW-LABEL: test_mask_subs_epi16_rrkz_512:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: kmovd %edi, %k1
|
|
; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z}
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512F-32-LABEL: test_mask_subs_epi16_rrkz_512:
|
|
; AVX512F-32: # %bb.0:
|
|
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
|
; AVX512F-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z}
|
|
; AVX512F-32-NEXT: retl
|
|
%1 = sext <32 x i16> %a to <32 x i32>
|
|
%2 = sext <32 x i16> %b to <32 x i32>
|
|
%3 = sub nsw <32 x i32> %1, %2
|
|
%4 = icmp slt <32 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <32 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <32 x i1> %6, <32 x i32> %5, <32 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <32 x i32> %7 to <32 x i16>
|
|
%9 = bitcast i32 %mask to <32 x i1>
|
|
%10 = select <32 x i1> %9, <32 x i16> %8, <32 x i16> zeroinitializer
|
|
ret <32 x i16> %10
|
|
}
|
|
|
|
define <32 x i16> @test_mask_subs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
|
|
; AVX512BW-LABEL: test_mask_subs_epi16_rm_512:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: vpsubsw (%rdi), %zmm0, %zmm0
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512F-32-LABEL: test_mask_subs_epi16_rm_512:
|
|
; AVX512F-32: # %bb.0:
|
|
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; AVX512F-32-NEXT: vpsubsw (%eax), %zmm0, %zmm0
|
|
; AVX512F-32-NEXT: retl
|
|
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
|
%1 = sext <32 x i16> %a to <32 x i32>
|
|
%2 = sext <32 x i16> %b to <32 x i32>
|
|
%3 = sub nsw <32 x i32> %1, %2
|
|
%4 = icmp slt <32 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <32 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <32 x i1> %6, <32 x i32> %5, <32 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <32 x i32> %7 to <32 x i16>
|
|
ret <32 x i16> %8
|
|
}
|
|
|
|
define <32 x i16> @test_mask_subs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
|
|
; AVX512BW-LABEL: test_mask_subs_epi16_rmk_512:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: kmovd %esi, %k1
|
|
; AVX512BW-NEXT: vpsubsw (%rdi), %zmm0, %zmm1 {%k1}
|
|
; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512F-32-LABEL: test_mask_subs_epi16_rmk_512:
|
|
; AVX512F-32: # %bb.0:
|
|
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
|
; AVX512F-32-NEXT: vpsubsw (%eax), %zmm0, %zmm1 {%k1}
|
|
; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
|
|
; AVX512F-32-NEXT: retl
|
|
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
|
%1 = sext <32 x i16> %a to <32 x i32>
|
|
%2 = sext <32 x i16> %b to <32 x i32>
|
|
%3 = sub nsw <32 x i32> %1, %2
|
|
%4 = icmp slt <32 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <32 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <32 x i1> %6, <32 x i32> %5, <32 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <32 x i32> %7 to <32 x i16>
|
|
%9 = bitcast i32 %mask to <32 x i1>
|
|
%10 = select <32 x i1> %9, <32 x i16> %8, <32 x i16> %passThru
|
|
ret <32 x i16> %10
|
|
}
|
|
|
|
define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
|
|
; AVX512BW-LABEL: test_mask_subs_epi16_rmkz_512:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: kmovd %esi, %k1
|
|
; AVX512BW-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z}
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512F-32-LABEL: test_mask_subs_epi16_rmkz_512:
|
|
; AVX512F-32: # %bb.0:
|
|
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
|
; AVX512F-32-NEXT: vpsubsw (%eax), %zmm0, %zmm0 {%k1} {z}
|
|
; AVX512F-32-NEXT: retl
|
|
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
|
%1 = sext <32 x i16> %a to <32 x i32>
|
|
%2 = sext <32 x i16> %b to <32 x i32>
|
|
%3 = sub nsw <32 x i32> %1, %2
|
|
%4 = icmp slt <32 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <32 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <32 x i1> %6, <32 x i32> %5, <32 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <32 x i32> %7 to <32 x i16>
|
|
%9 = bitcast i32 %mask to <32 x i1>
|
|
%10 = select <32 x i1> %9, <32 x i16> %8, <32 x i16> zeroinitializer
|
|
ret <32 x i16> %10
|
|
}
|
|
|
|
define <64 x i16> @test_mask_subs_epi16_rr_1024(<64 x i16> %a, <64 x i16> %b) {
|
|
; AVX512BW-LABEL: test_mask_subs_epi16_rr_1024:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: vpsubsw %zmm2, %zmm0, %zmm0
|
|
; AVX512BW-NEXT: vpsubsw %zmm3, %zmm1, %zmm1
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512F-32-LABEL: test_mask_subs_epi16_rr_1024:
|
|
; AVX512F-32: # %bb.0:
|
|
; AVX512F-32-NEXT: pushl %ebp
|
|
; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
|
|
; AVX512F-32-NEXT: .cfi_offset %ebp, -8
|
|
; AVX512F-32-NEXT: movl %esp, %ebp
|
|
; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
|
|
; AVX512F-32-NEXT: andl $-64, %esp
|
|
; AVX512F-32-NEXT: subl $64, %esp
|
|
; AVX512F-32-NEXT: vpsubsw %zmm2, %zmm0, %zmm0
|
|
; AVX512F-32-NEXT: vpsubsw 8(%ebp), %zmm1, %zmm1
|
|
; AVX512F-32-NEXT: movl %ebp, %esp
|
|
; AVX512F-32-NEXT: popl %ebp
|
|
; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
|
|
; AVX512F-32-NEXT: retl
|
|
%1 = sext <64 x i16> %a to <64 x i32>
|
|
%2 = sext <64 x i16> %b to <64 x i32>
|
|
%3 = sub nsw <64 x i32> %1, %2
|
|
%4 = icmp slt <64 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <64 x i1> %4, <64 x i32> %3, <64 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <64 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <64 x i1> %6, <64 x i32> %5, <64 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <64 x i32> %7 to <64 x i16>
|
|
ret <64 x i16> %8
|
|
}
|
|
|
|
define <32 x i16> @test_mask_adds_epu16_rr_512_avx512(<32 x i16> %a, <32 x i16> %b) {
|
|
; AVX512BW-LABEL: test_mask_adds_epu16_rr_512_avx512:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm0
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512F-32-LABEL: test_mask_adds_epu16_rr_512_avx512:
|
|
; AVX512F-32: # %bb.0:
|
|
; AVX512F-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm0
|
|
; AVX512F-32-NEXT: retl
|
|
%1 = zext <32 x i16> %a to <32 x i32>
|
|
%2 = zext <32 x i16> %b to <32 x i32>
|
|
%3 = add nsw <32 x i32> %1, %2
|
|
%4 = icmp ult <32 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%6 = trunc <32 x i32> %5 to <32 x i16>
|
|
ret <32 x i16> %6
|
|
}
|
|
|
|
define <32 x i16> @test_mask_adds_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
|
|
; AVX512BW-LABEL: test_mask_adds_epu16_rrk_512:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: kmovd %edi, %k1
|
|
; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm2 {%k1}
|
|
; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512F-32-LABEL: test_mask_adds_epu16_rrk_512:
|
|
; AVX512F-32: # %bb.0:
|
|
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
|
; AVX512F-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm2 {%k1}
|
|
; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
|
|
; AVX512F-32-NEXT: retl
|
|
%1 = zext <32 x i16> %a to <32 x i32>
|
|
%2 = zext <32 x i16> %b to <32 x i32>
|
|
%3 = add nsw <32 x i32> %1, %2
|
|
%4 = icmp ult <32 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%6 = trunc <32 x i32> %5 to <32 x i16>
|
|
%7 = bitcast i32 %mask to <32 x i1>
|
|
%8 = select <32 x i1> %7, <32 x i16> %6, <32 x i16> %passThru
|
|
ret <32 x i16> %8
|
|
}
|
|
|
|
define <32 x i16> @test_mask_adds_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
|
|
; AVX512BW-LABEL: test_mask_adds_epu16_rrkz_512:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: kmovd %edi, %k1
|
|
; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z}
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512F-32-LABEL: test_mask_adds_epu16_rrkz_512:
|
|
; AVX512F-32: # %bb.0:
|
|
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
|
; AVX512F-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z}
|
|
; AVX512F-32-NEXT: retl
|
|
%1 = zext <32 x i16> %a to <32 x i32>
|
|
%2 = zext <32 x i16> %b to <32 x i32>
|
|
%3 = add nsw <32 x i32> %1, %2
|
|
%4 = icmp ult <32 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%6 = trunc <32 x i32> %5 to <32 x i16>
|
|
%7 = bitcast i32 %mask to <32 x i1>
|
|
%8 = select <32 x i1> %7, <32 x i16> %6, <32 x i16> zeroinitializer
|
|
ret <32 x i16> %8
|
|
}
|
|
|
|
define <32 x i16> @test_mask_adds_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
|
|
; AVX512BW-LABEL: test_mask_adds_epu16_rm_512:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: vpaddusw (%rdi), %zmm0, %zmm0
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512F-32-LABEL: test_mask_adds_epu16_rm_512:
|
|
; AVX512F-32: # %bb.0:
|
|
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; AVX512F-32-NEXT: vpaddusw (%eax), %zmm0, %zmm0
|
|
; AVX512F-32-NEXT: retl
|
|
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
|
%1 = zext <32 x i16> %a to <32 x i32>
|
|
%2 = zext <32 x i16> %b to <32 x i32>
|
|
%3 = add nsw <32 x i32> %1, %2
|
|
%4 = icmp ult <32 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%6 = trunc <32 x i32> %5 to <32 x i16>
|
|
ret <32 x i16> %6
|
|
}
|
|
|
|
define <32 x i16> @test_mask_adds_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
|
|
; AVX512BW-LABEL: test_mask_adds_epu16_rmk_512:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: kmovd %esi, %k1
|
|
; AVX512BW-NEXT: vpaddusw (%rdi), %zmm0, %zmm1 {%k1}
|
|
; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512F-32-LABEL: test_mask_adds_epu16_rmk_512:
|
|
; AVX512F-32: # %bb.0:
|
|
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
|
; AVX512F-32-NEXT: vpaddusw (%eax), %zmm0, %zmm1 {%k1}
|
|
; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
|
|
; AVX512F-32-NEXT: retl
|
|
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
|
%1 = zext <32 x i16> %a to <32 x i32>
|
|
%2 = zext <32 x i16> %b to <32 x i32>
|
|
%3 = add nsw <32 x i32> %1, %2
|
|
%4 = icmp ult <32 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%6 = trunc <32 x i32> %5 to <32 x i16>
|
|
%7 = bitcast i32 %mask to <32 x i1>
|
|
%8 = select <32 x i1> %7, <32 x i16> %6, <32 x i16> %passThru
|
|
ret <32 x i16> %8
|
|
}
|
|
|
|
define <32 x i16> @test_mask_adds_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
|
|
; AVX512BW-LABEL: test_mask_adds_epu16_rmkz_512:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: kmovd %esi, %k1
|
|
; AVX512BW-NEXT: vpaddusw (%rdi), %zmm0, %zmm0 {%k1} {z}
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512F-32-LABEL: test_mask_adds_epu16_rmkz_512:
|
|
; AVX512F-32: # %bb.0:
|
|
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
|
; AVX512F-32-NEXT: vpaddusw (%eax), %zmm0, %zmm0 {%k1} {z}
|
|
; AVX512F-32-NEXT: retl
|
|
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
|
%1 = zext <32 x i16> %a to <32 x i32>
|
|
%2 = zext <32 x i16> %b to <32 x i32>
|
|
%3 = add nsw <32 x i32> %1, %2
|
|
%4 = icmp ult <32 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%5 = select <32 x i1> %4, <32 x i32> %3, <32 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%6 = trunc <32 x i32> %5 to <32 x i16>
|
|
%7 = bitcast i32 %mask to <32 x i1>
|
|
%8 = select <32 x i1> %7, <32 x i16> %6, <32 x i16> zeroinitializer
|
|
ret <32 x i16> %8
|
|
}
|
|
|
|
define <64 x i16> @test_mask_adds_epu16_rr_1024(<64 x i16> %a, <64 x i16> %b) {
|
|
; AVX512BW-LABEL: test_mask_adds_epu16_rr_1024:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: vpaddusw %zmm2, %zmm0, %zmm0
|
|
; AVX512BW-NEXT: vpaddusw %zmm3, %zmm1, %zmm1
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512F-32-LABEL: test_mask_adds_epu16_rr_1024:
|
|
; AVX512F-32: # %bb.0:
|
|
; AVX512F-32-NEXT: pushl %ebp
|
|
; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
|
|
; AVX512F-32-NEXT: .cfi_offset %ebp, -8
|
|
; AVX512F-32-NEXT: movl %esp, %ebp
|
|
; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
|
|
; AVX512F-32-NEXT: andl $-64, %esp
|
|
; AVX512F-32-NEXT: subl $64, %esp
|
|
; AVX512F-32-NEXT: vpaddusw %zmm2, %zmm0, %zmm0
|
|
; AVX512F-32-NEXT: vpaddusw 8(%ebp), %zmm1, %zmm1
|
|
; AVX512F-32-NEXT: movl %ebp, %esp
|
|
; AVX512F-32-NEXT: popl %ebp
|
|
; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
|
|
; AVX512F-32-NEXT: retl
|
|
%1 = zext <64 x i16> %a to <64 x i32>
|
|
%2 = zext <64 x i16> %b to <64 x i32>
|
|
%3 = add nsw <64 x i32> %1, %2
|
|
%4 = icmp ult <64 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%5 = select <64 x i1> %4, <64 x i32> %3, <64 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%6 = trunc <64 x i32> %5 to <64 x i16>
|
|
ret <64 x i16> %6
|
|
}
|
|
|
|
define <32 x i16> @test_mask_subs_epu16_rr_512_avx512(<32 x i16> %a, <32 x i16> %b) {
|
|
; AVX512BW-LABEL: test_mask_subs_epu16_rr_512_avx512:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm0
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512F-32-LABEL: test_mask_subs_epu16_rr_512_avx512:
|
|
; AVX512F-32: # %bb.0:
|
|
; AVX512F-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm0
|
|
; AVX512F-32-NEXT: retl
|
|
%cmp = icmp ugt <32 x i16> %a, %b
|
|
%sel = select <32 x i1> %cmp, <32 x i16> %a, <32 x i16> %b
|
|
%sub = sub <32 x i16> %sel, %b
|
|
ret <32 x i16> %sub
|
|
}
|
|
|
|
define <32 x i16> @test_mask_subs_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
|
|
; AVX512BW-LABEL: test_mask_subs_epu16_rrk_512:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: kmovd %edi, %k1
|
|
; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm2 {%k1}
|
|
; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512F-32-LABEL: test_mask_subs_epu16_rrk_512:
|
|
; AVX512F-32: # %bb.0:
|
|
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
|
; AVX512F-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm2 {%k1}
|
|
; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0
|
|
; AVX512F-32-NEXT: retl
|
|
%cmp = icmp ugt <32 x i16> %a, %b
|
|
%sel = select <32 x i1> %cmp, <32 x i16> %a, <32 x i16> %b
|
|
%sub = sub <32 x i16> %sel, %b
|
|
%bc = bitcast i32 %mask to <32 x i1>
|
|
%res = select <32 x i1> %bc, <32 x i16> %sub, <32 x i16> %passThru
|
|
ret <32 x i16> %res
|
|
}
|
|
|
|
define <32 x i16> @test_mask_subs_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
|
|
; AVX512BW-LABEL: test_mask_subs_epu16_rrkz_512:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: kmovd %edi, %k1
|
|
; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z}
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512F-32-LABEL: test_mask_subs_epu16_rrkz_512:
|
|
; AVX512F-32: # %bb.0:
|
|
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
|
; AVX512F-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z}
|
|
; AVX512F-32-NEXT: retl
|
|
%cmp = icmp ugt <32 x i16> %a, %b
|
|
%sel = select <32 x i1> %cmp, <32 x i16> %a, <32 x i16> %b
|
|
%sub = sub <32 x i16> %sel, %b
|
|
%bc = bitcast i32 %mask to <32 x i1>
|
|
%res = select <32 x i1> %bc, <32 x i16> %sub, <32 x i16> zeroinitializer
|
|
ret <32 x i16> %res
|
|
}
|
|
|
|
define <32 x i16> @test_mask_subs_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
|
|
; AVX512BW-LABEL: test_mask_subs_epu16_rm_512:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: vpsubusw (%rdi), %zmm0, %zmm0
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512F-32-LABEL: test_mask_subs_epu16_rm_512:
|
|
; AVX512F-32: # %bb.0:
|
|
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; AVX512F-32-NEXT: vpsubusw (%eax), %zmm0, %zmm0
|
|
; AVX512F-32-NEXT: retl
|
|
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
|
%cmp = icmp ugt <32 x i16> %a, %b
|
|
%sel = select <32 x i1> %cmp, <32 x i16> %a, <32 x i16> %b
|
|
%sub = sub <32 x i16> %sel, %b
|
|
ret <32 x i16> %sub
|
|
}
|
|
|
|
define <32 x i16> @test_mask_subs_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
|
|
; AVX512BW-LABEL: test_mask_subs_epu16_rmk_512:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: kmovd %esi, %k1
|
|
; AVX512BW-NEXT: vpsubusw (%rdi), %zmm0, %zmm1 {%k1}
|
|
; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512F-32-LABEL: test_mask_subs_epu16_rmk_512:
|
|
; AVX512F-32: # %bb.0:
|
|
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
|
; AVX512F-32-NEXT: vpsubusw (%eax), %zmm0, %zmm1 {%k1}
|
|
; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0
|
|
; AVX512F-32-NEXT: retl
|
|
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
|
%cmp = icmp ugt <32 x i16> %a, %b
|
|
%sel = select <32 x i1> %cmp, <32 x i16> %a, <32 x i16> %b
|
|
%sub = sub <32 x i16> %sel, %b
|
|
%bc = bitcast i32 %mask to <32 x i1>
|
|
%res = select <32 x i1> %bc, <32 x i16> %sub, <32 x i16> %passThru
|
|
ret <32 x i16> %res
|
|
}
|
|
|
|
define <32 x i16> @test_mask_subs_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
|
|
; AVX512BW-LABEL: test_mask_subs_epu16_rmkz_512:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: kmovd %esi, %k1
|
|
; AVX512BW-NEXT: vpsubusw (%rdi), %zmm0, %zmm0 {%k1} {z}
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512F-32-LABEL: test_mask_subs_epu16_rmkz_512:
|
|
; AVX512F-32: # %bb.0:
|
|
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
|
; AVX512F-32-NEXT: vpsubusw (%eax), %zmm0, %zmm0 {%k1} {z}
|
|
; AVX512F-32-NEXT: retl
|
|
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
|
%cmp = icmp ugt <32 x i16> %a, %b
|
|
%sel = select <32 x i1> %cmp, <32 x i16> %a, <32 x i16> %b
|
|
%sub = sub <32 x i16> %sel, %b
|
|
%bc = bitcast i32 %mask to <32 x i1>
|
|
%res = select <32 x i1> %bc, <32 x i16> %sub, <32 x i16> zeroinitializer
|
|
ret <32 x i16> %res
|
|
}
|
|
|
|
define <64 x i16> @test_mask_subs_epu16_rr_1024(<64 x i16> %a, <64 x i16> %b) {
|
|
; AVX512BW-LABEL: test_mask_subs_epu16_rr_1024:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: vpsubusw %zmm2, %zmm0, %zmm0
|
|
; AVX512BW-NEXT: vpsubusw %zmm3, %zmm1, %zmm1
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; AVX512F-32-LABEL: test_mask_subs_epu16_rr_1024:
|
|
; AVX512F-32: # %bb.0:
|
|
; AVX512F-32-NEXT: pushl %ebp
|
|
; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
|
|
; AVX512F-32-NEXT: .cfi_offset %ebp, -8
|
|
; AVX512F-32-NEXT: movl %esp, %ebp
|
|
; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
|
|
; AVX512F-32-NEXT: andl $-64, %esp
|
|
; AVX512F-32-NEXT: subl $64, %esp
|
|
; AVX512F-32-NEXT: vpsubusw %zmm2, %zmm0, %zmm0
|
|
; AVX512F-32-NEXT: vpsubusw 8(%ebp), %zmm1, %zmm1
|
|
; AVX512F-32-NEXT: movl %ebp, %esp
|
|
; AVX512F-32-NEXT: popl %ebp
|
|
; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
|
|
; AVX512F-32-NEXT: retl
|
|
%cmp = icmp ugt <64 x i16> %a, %b
|
|
%sel = select <64 x i1> %cmp, <64 x i16> %a, <64 x i16> %b
|
|
%sub = sub <64 x i16> %sel, %b
|
|
ret <64 x i16> %sub
|
|
}
|
|
|
|
define <8 x i16> @test_mask_adds_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
|
|
; CHECK-LABEL: test_mask_adds_epi16_rr_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpaddsw %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = sext <8 x i16> %a to <8 x i32>
|
|
%2 = sext <8 x i16> %b to <8 x i32>
|
|
%3 = add nsw <8 x i32> %1, %2
|
|
%4 = icmp slt <8 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <8 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <8 x i1> %6, <8 x i32> %5, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <8 x i32> %7 to <8 x i16>
|
|
ret <8 x i16> %8
|
|
}
|
|
|
|
define <8 x i16> @test_mask_adds_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
|
|
; CHECK-LABEL: test_mask_adds_epi16_rrk_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
|
; CHECK-NEXT: vpaddsw %xmm1, %xmm0, %xmm2 {%k1}
|
|
; CHECK-NEXT: vmovdqa %xmm2, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = sext <8 x i16> %a to <8 x i32>
|
|
%2 = sext <8 x i16> %b to <8 x i32>
|
|
%3 = add nsw <8 x i32> %1, %2
|
|
%4 = icmp slt <8 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <8 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <8 x i1> %6, <8 x i32> %5, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <8 x i32> %7 to <8 x i16>
|
|
%9 = bitcast i8 %mask to <8 x i1>
|
|
%10 = select <8 x i1> %9, <8 x i16> %8, <8 x i16> %passThru
|
|
ret <8 x i16> %10
|
|
}
|
|
|
|
define <8 x i16> @test_mask_adds_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
|
|
; CHECK-LABEL: test_mask_adds_epi16_rrkz_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
|
; CHECK-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%1 = sext <8 x i16> %a to <8 x i32>
|
|
%2 = sext <8 x i16> %b to <8 x i32>
|
|
%3 = add nsw <8 x i32> %1, %2
|
|
%4 = icmp slt <8 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <8 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <8 x i1> %6, <8 x i32> %5, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <8 x i32> %7 to <8 x i16>
|
|
%9 = bitcast i8 %mask to <8 x i1>
|
|
%10 = select <8 x i1> %9, <8 x i16> %8, <8 x i16> zeroinitializer
|
|
ret <8 x i16> %10
|
|
}
|
|
|
|
define <8 x i16> @test_mask_adds_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
|
|
; CHECK-LABEL: test_mask_adds_epi16_rm_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpaddsw (%rdi), %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
|
%1 = sext <8 x i16> %a to <8 x i32>
|
|
%2 = sext <8 x i16> %b to <8 x i32>
|
|
%3 = add nsw <8 x i32> %1, %2
|
|
%4 = icmp slt <8 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <8 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <8 x i1> %6, <8 x i32> %5, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <8 x i32> %7 to <8 x i16>
|
|
ret <8 x i16> %8
|
|
}
|
|
|
|
define <8 x i16> @test_mask_adds_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
|
|
; CHECK-LABEL: test_mask_adds_epi16_rmk_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
|
; CHECK-NEXT: vpaddsw (%rdi), %xmm0, %xmm1 {%k1}
|
|
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
|
%1 = sext <8 x i16> %a to <8 x i32>
|
|
%2 = sext <8 x i16> %b to <8 x i32>
|
|
%3 = add nsw <8 x i32> %1, %2
|
|
%4 = icmp slt <8 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <8 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <8 x i1> %6, <8 x i32> %5, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <8 x i32> %7 to <8 x i16>
|
|
%9 = bitcast i8 %mask to <8 x i1>
|
|
%10 = select <8 x i1> %9, <8 x i16> %8, <8 x i16> %passThru
|
|
ret <8 x i16> %10
|
|
}
|
|
|
|
define <8 x i16> @test_mask_adds_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
|
|
; CHECK-LABEL: test_mask_adds_epi16_rmkz_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
|
; CHECK-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
|
%1 = sext <8 x i16> %a to <8 x i32>
|
|
%2 = sext <8 x i16> %b to <8 x i32>
|
|
%3 = add nsw <8 x i32> %1, %2
|
|
%4 = icmp slt <8 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <8 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <8 x i1> %6, <8 x i32> %5, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <8 x i32> %7 to <8 x i16>
|
|
%9 = bitcast i8 %mask to <8 x i1>
|
|
%10 = select <8 x i1> %9, <8 x i16> %8, <8 x i16> zeroinitializer
|
|
ret <8 x i16> %10
|
|
}
|
|
|
|
|
|
define <16 x i16> @test_mask_adds_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
|
|
; CHECK-LABEL: test_mask_adds_epi16_rr_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpaddsw %ymm1, %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%1 = sext <16 x i16> %a to <16 x i32>
|
|
%2 = sext <16 x i16> %b to <16 x i32>
|
|
%3 = add nsw <16 x i32> %1, %2
|
|
%4 = icmp slt <16 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <16 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <16 x i1> %6, <16 x i32> %5, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <16 x i32> %7 to <16 x i16>
|
|
ret <16 x i16> %8
|
|
}
|
|
|
|
define <16 x i16> @test_mask_adds_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
|
|
; CHECK-LABEL: test_mask_adds_epi16_rrk_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
|
; CHECK-NEXT: vpaddsw %ymm1, %ymm0, %ymm2 {%k1}
|
|
; CHECK-NEXT: vmovdqa %ymm2, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%1 = sext <16 x i16> %a to <16 x i32>
|
|
%2 = sext <16 x i16> %b to <16 x i32>
|
|
%3 = add nsw <16 x i32> %1, %2
|
|
%4 = icmp slt <16 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <16 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <16 x i1> %6, <16 x i32> %5, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <16 x i32> %7 to <16 x i16>
|
|
%9 = bitcast i16 %mask to <16 x i1>
|
|
%10 = select <16 x i1> %9, <16 x i16> %8, <16 x i16> %passThru
|
|
ret <16 x i16> %10
|
|
}
|
|
|
|
define <16 x i16> @test_mask_adds_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
|
|
; CHECK-LABEL: test_mask_adds_epi16_rrkz_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
|
; CHECK-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%1 = sext <16 x i16> %a to <16 x i32>
|
|
%2 = sext <16 x i16> %b to <16 x i32>
|
|
%3 = add nsw <16 x i32> %1, %2
|
|
%4 = icmp slt <16 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <16 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <16 x i1> %6, <16 x i32> %5, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <16 x i32> %7 to <16 x i16>
|
|
%9 = bitcast i16 %mask to <16 x i1>
|
|
%10 = select <16 x i1> %9, <16 x i16> %8, <16 x i16> zeroinitializer
|
|
ret <16 x i16> %10
|
|
}
|
|
|
|
define <16 x i16> @test_mask_adds_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
|
|
; CHECK-LABEL: test_mask_adds_epi16_rm_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpaddsw (%rdi), %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
|
%1 = sext <16 x i16> %a to <16 x i32>
|
|
%2 = sext <16 x i16> %b to <16 x i32>
|
|
%3 = add nsw <16 x i32> %1, %2
|
|
%4 = icmp slt <16 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <16 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <16 x i1> %6, <16 x i32> %5, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <16 x i32> %7 to <16 x i16>
|
|
ret <16 x i16> %8
|
|
}
|
|
|
|
define <16 x i16> @test_mask_adds_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
|
|
; CHECK-LABEL: test_mask_adds_epi16_rmk_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
|
; CHECK-NEXT: vpaddsw (%rdi), %ymm0, %ymm1 {%k1}
|
|
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
|
%1 = sext <16 x i16> %a to <16 x i32>
|
|
%2 = sext <16 x i16> %b to <16 x i32>
|
|
%3 = add nsw <16 x i32> %1, %2
|
|
%4 = icmp slt <16 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <16 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <16 x i1> %6, <16 x i32> %5, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <16 x i32> %7 to <16 x i16>
|
|
%9 = bitcast i16 %mask to <16 x i1>
|
|
%10 = select <16 x i1> %9, <16 x i16> %8, <16 x i16> %passThru
|
|
ret <16 x i16> %10
|
|
}
|
|
|
|
define <16 x i16> @test_mask_adds_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
|
|
; CHECK-LABEL: test_mask_adds_epi16_rmkz_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
|
; CHECK-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
|
%1 = sext <16 x i16> %a to <16 x i32>
|
|
%2 = sext <16 x i16> %b to <16 x i32>
|
|
%3 = add nsw <16 x i32> %1, %2
|
|
%4 = icmp slt <16 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <16 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <16 x i1> %6, <16 x i32> %5, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <16 x i32> %7 to <16 x i16>
|
|
%9 = bitcast i16 %mask to <16 x i1>
|
|
%10 = select <16 x i1> %9, <16 x i16> %8, <16 x i16> zeroinitializer
|
|
ret <16 x i16> %10
|
|
}
|
|
|
|
define <8 x i16> @test_mask_subs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
|
|
; CHECK-LABEL: test_mask_subs_epi16_rr_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpsubsw %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = sext <8 x i16> %a to <8 x i32>
|
|
%2 = sext <8 x i16> %b to <8 x i32>
|
|
%3 = sub nsw <8 x i32> %1, %2
|
|
%4 = icmp slt <8 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <8 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <8 x i1> %6, <8 x i32> %5, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <8 x i32> %7 to <8 x i16>
|
|
ret <8 x i16> %8
|
|
}
|
|
|
|
define <8 x i16> @test_mask_subs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
|
|
; CHECK-LABEL: test_mask_subs_epi16_rrk_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
|
; CHECK-NEXT: vpsubsw %xmm1, %xmm0, %xmm2 {%k1}
|
|
; CHECK-NEXT: vmovdqa %xmm2, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = sext <8 x i16> %a to <8 x i32>
|
|
%2 = sext <8 x i16> %b to <8 x i32>
|
|
%3 = sub nsw <8 x i32> %1, %2
|
|
%4 = icmp slt <8 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <8 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <8 x i1> %6, <8 x i32> %5, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <8 x i32> %7 to <8 x i16>
|
|
%9 = bitcast i8 %mask to <8 x i1>
|
|
%10 = select <8 x i1> %9, <8 x i16> %8, <8 x i16> %passThru
|
|
ret <8 x i16> %10
|
|
}
|
|
|
|
define <8 x i16> @test_mask_subs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
|
|
; CHECK-LABEL: test_mask_subs_epi16_rrkz_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
|
; CHECK-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%1 = sext <8 x i16> %a to <8 x i32>
|
|
%2 = sext <8 x i16> %b to <8 x i32>
|
|
%3 = sub nsw <8 x i32> %1, %2
|
|
%4 = icmp slt <8 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <8 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <8 x i1> %6, <8 x i32> %5, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <8 x i32> %7 to <8 x i16>
|
|
%9 = bitcast i8 %mask to <8 x i1>
|
|
%10 = select <8 x i1> %9, <8 x i16> %8, <8 x i16> zeroinitializer
|
|
ret <8 x i16> %10
|
|
}
|
|
|
|
define <8 x i16> @test_mask_subs_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
|
|
; CHECK-LABEL: test_mask_subs_epi16_rm_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpsubsw (%rdi), %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
|
%1 = sext <8 x i16> %a to <8 x i32>
|
|
%2 = sext <8 x i16> %b to <8 x i32>
|
|
%3 = sub nsw <8 x i32> %1, %2
|
|
%4 = icmp slt <8 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <8 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <8 x i1> %6, <8 x i32> %5, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <8 x i32> %7 to <8 x i16>
|
|
ret <8 x i16> %8
|
|
}
|
|
|
|
define <8 x i16> @test_mask_subs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
|
|
; CHECK-LABEL: test_mask_subs_epi16_rmk_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
|
; CHECK-NEXT: vpsubsw (%rdi), %xmm0, %xmm1 {%k1}
|
|
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
|
%1 = sext <8 x i16> %a to <8 x i32>
|
|
%2 = sext <8 x i16> %b to <8 x i32>
|
|
%3 = sub nsw <8 x i32> %1, %2
|
|
%4 = icmp slt <8 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <8 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <8 x i1> %6, <8 x i32> %5, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <8 x i32> %7 to <8 x i16>
|
|
%9 = bitcast i8 %mask to <8 x i1>
|
|
%10 = select <8 x i1> %9, <8 x i16> %8, <8 x i16> %passThru
|
|
ret <8 x i16> %10
|
|
}
|
|
|
|
define <8 x i16> @test_mask_subs_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
|
|
; CHECK-LABEL: test_mask_subs_epi16_rmkz_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
|
; CHECK-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
|
%1 = sext <8 x i16> %a to <8 x i32>
|
|
%2 = sext <8 x i16> %b to <8 x i32>
|
|
%3 = sub nsw <8 x i32> %1, %2
|
|
%4 = icmp slt <8 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <8 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <8 x i1> %6, <8 x i32> %5, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <8 x i32> %7 to <8 x i16>
|
|
%9 = bitcast i8 %mask to <8 x i1>
|
|
%10 = select <8 x i1> %9, <8 x i16> %8, <8 x i16> zeroinitializer
|
|
ret <8 x i16> %10
|
|
}
|
|
|
|
define <16 x i16> @test_mask_subs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
|
|
; CHECK-LABEL: test_mask_subs_epi16_rr_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpsubsw %ymm1, %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%1 = sext <16 x i16> %a to <16 x i32>
|
|
%2 = sext <16 x i16> %b to <16 x i32>
|
|
%3 = sub nsw <16 x i32> %1, %2
|
|
%4 = icmp slt <16 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <16 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <16 x i1> %6, <16 x i32> %5, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <16 x i32> %7 to <16 x i16>
|
|
ret <16 x i16> %8
|
|
}
|
|
|
|
define <16 x i16> @test_mask_subs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
|
|
; CHECK-LABEL: test_mask_subs_epi16_rrk_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
|
; CHECK-NEXT: vpsubsw %ymm1, %ymm0, %ymm2 {%k1}
|
|
; CHECK-NEXT: vmovdqa %ymm2, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%1 = sext <16 x i16> %a to <16 x i32>
|
|
%2 = sext <16 x i16> %b to <16 x i32>
|
|
%3 = sub nsw <16 x i32> %1, %2
|
|
%4 = icmp slt <16 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <16 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <16 x i1> %6, <16 x i32> %5, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <16 x i32> %7 to <16 x i16>
|
|
%9 = bitcast i16 %mask to <16 x i1>
|
|
%10 = select <16 x i1> %9, <16 x i16> %8, <16 x i16> %passThru
|
|
ret <16 x i16> %10
|
|
}
|
|
|
|
define <16 x i16> @test_mask_subs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
|
|
; CHECK-LABEL: test_mask_subs_epi16_rrkz_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
|
; CHECK-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%1 = sext <16 x i16> %a to <16 x i32>
|
|
%2 = sext <16 x i16> %b to <16 x i32>
|
|
%3 = sub nsw <16 x i32> %1, %2
|
|
%4 = icmp slt <16 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <16 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <16 x i1> %6, <16 x i32> %5, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <16 x i32> %7 to <16 x i16>
|
|
%9 = bitcast i16 %mask to <16 x i1>
|
|
%10 = select <16 x i1> %9, <16 x i16> %8, <16 x i16> zeroinitializer
|
|
ret <16 x i16> %10
|
|
}
|
|
|
|
define <16 x i16> @test_mask_subs_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
|
|
; CHECK-LABEL: test_mask_subs_epi16_rm_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpsubsw (%rdi), %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
|
%1 = sext <16 x i16> %a to <16 x i32>
|
|
%2 = sext <16 x i16> %b to <16 x i32>
|
|
%3 = sub nsw <16 x i32> %1, %2
|
|
%4 = icmp slt <16 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <16 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <16 x i1> %6, <16 x i32> %5, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <16 x i32> %7 to <16 x i16>
|
|
ret <16 x i16> %8
|
|
}
|
|
|
|
define <16 x i16> @test_mask_subs_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
|
|
; CHECK-LABEL: test_mask_subs_epi16_rmk_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
|
; CHECK-NEXT: vpsubsw (%rdi), %ymm0, %ymm1 {%k1}
|
|
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
|
%1 = sext <16 x i16> %a to <16 x i32>
|
|
%2 = sext <16 x i16> %b to <16 x i32>
|
|
%3 = sub nsw <16 x i32> %1, %2
|
|
%4 = icmp slt <16 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <16 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <16 x i1> %6, <16 x i32> %5, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <16 x i32> %7 to <16 x i16>
|
|
%9 = bitcast i16 %mask to <16 x i1>
|
|
%10 = select <16 x i1> %9, <16 x i16> %8, <16 x i16> %passThru
|
|
ret <16 x i16> %10
|
|
}
|
|
|
|
define <16 x i16> @test_mask_subs_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
|
|
; CHECK-LABEL: test_mask_subs_epi16_rmkz_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
|
; CHECK-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
|
%1 = sext <16 x i16> %a to <16 x i32>
|
|
%2 = sext <16 x i16> %b to <16 x i32>
|
|
%3 = sub nsw <16 x i32> %1, %2
|
|
%4 = icmp slt <16 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <16 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <16 x i1> %6, <16 x i32> %5, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <16 x i32> %7 to <16 x i16>
|
|
%9 = bitcast i16 %mask to <16 x i1>
|
|
%10 = select <16 x i1> %9, <16 x i16> %8, <16 x i16> zeroinitializer
|
|
ret <16 x i16> %10
|
|
}
|
|
|
|
define <8 x i16> @test_mask_adds_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) {
|
|
; CHECK-LABEL: test_mask_adds_epu16_rr_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpaddusw %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = zext <8 x i16> %a to <8 x i32>
|
|
%2 = zext <8 x i16> %b to <8 x i32>
|
|
%3 = add nsw <8 x i32> %1, %2
|
|
%4 = icmp ult <8 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%6 = trunc <8 x i32> %5 to <8 x i16>
|
|
ret <8 x i16> %6
|
|
}
|
|
|
|
define <8 x i16> @test_mask_adds_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
|
|
; CHECK-LABEL: test_mask_adds_epu16_rrk_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
|
; CHECK-NEXT: vpaddusw %xmm1, %xmm0, %xmm2 {%k1}
|
|
; CHECK-NEXT: vmovdqa %xmm2, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = zext <8 x i16> %a to <8 x i32>
|
|
%2 = zext <8 x i16> %b to <8 x i32>
|
|
%3 = add nsw <8 x i32> %1, %2
|
|
%4 = icmp ult <8 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%6 = trunc <8 x i32> %5 to <8 x i16>
|
|
%7 = bitcast i8 %mask to <8 x i1>
|
|
%8 = select <8 x i1> %7, <8 x i16> %6, <8 x i16> %passThru
|
|
ret <8 x i16> %8
|
|
}
|
|
|
|
define <8 x i16> @test_mask_adds_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
|
|
; CHECK-LABEL: test_mask_adds_epu16_rrkz_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
|
; CHECK-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%1 = zext <8 x i16> %a to <8 x i32>
|
|
%2 = zext <8 x i16> %b to <8 x i32>
|
|
%3 = add nsw <8 x i32> %1, %2
|
|
%4 = icmp ult <8 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%6 = trunc <8 x i32> %5 to <8 x i16>
|
|
%7 = bitcast i8 %mask to <8 x i1>
|
|
%8 = select <8 x i1> %7, <8 x i16> %6, <8 x i16> zeroinitializer
|
|
ret <8 x i16> %8
|
|
}
|
|
|
|
define <8 x i16> @test_mask_adds_epu16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
|
|
; CHECK-LABEL: test_mask_adds_epu16_rm_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpaddusw (%rdi), %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
|
%1 = zext <8 x i16> %a to <8 x i32>
|
|
%2 = zext <8 x i16> %b to <8 x i32>
|
|
%3 = add nsw <8 x i32> %1, %2
|
|
%4 = icmp ult <8 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%6 = trunc <8 x i32> %5 to <8 x i16>
|
|
ret <8 x i16> %6
|
|
}
|
|
|
|
define <8 x i16> @test_mask_adds_epu16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
|
|
; CHECK-LABEL: test_mask_adds_epu16_rmk_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
|
; CHECK-NEXT: vpaddusw (%rdi), %xmm0, %xmm1 {%k1}
|
|
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
|
%1 = zext <8 x i16> %a to <8 x i32>
|
|
%2 = zext <8 x i16> %b to <8 x i32>
|
|
%3 = add nsw <8 x i32> %1, %2
|
|
%4 = icmp ult <8 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%6 = trunc <8 x i32> %5 to <8 x i16>
|
|
%7 = bitcast i8 %mask to <8 x i1>
|
|
%8 = select <8 x i1> %7, <8 x i16> %6, <8 x i16> %passThru
|
|
ret <8 x i16> %8
|
|
}
|
|
|
|
define <8 x i16> @test_mask_adds_epu16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
|
|
; CHECK-LABEL: test_mask_adds_epu16_rmkz_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
|
; CHECK-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
|
%1 = zext <8 x i16> %a to <8 x i32>
|
|
%2 = zext <8 x i16> %b to <8 x i32>
|
|
%3 = add nsw <8 x i32> %1, %2
|
|
%4 = icmp ult <8 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%6 = trunc <8 x i32> %5 to <8 x i16>
|
|
%7 = bitcast i8 %mask to <8 x i1>
|
|
%8 = select <8 x i1> %7, <8 x i16> %6, <8 x i16> zeroinitializer
|
|
ret <8 x i16> %8
|
|
}
|
|
|
|
define <16 x i16> @test_mask_adds_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) {
|
|
; CHECK-LABEL: test_mask_adds_epu16_rr_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpaddusw %ymm1, %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%1 = zext <16 x i16> %a to <16 x i32>
|
|
%2 = zext <16 x i16> %b to <16 x i32>
|
|
%3 = add nsw <16 x i32> %1, %2
|
|
%4 = icmp ult <16 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%6 = trunc <16 x i32> %5 to <16 x i16>
|
|
ret <16 x i16> %6
|
|
}
|
|
|
|
define <16 x i16> @test_mask_adds_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
|
|
; CHECK-LABEL: test_mask_adds_epu16_rrk_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
|
; CHECK-NEXT: vpaddusw %ymm1, %ymm0, %ymm2 {%k1}
|
|
; CHECK-NEXT: vmovdqa %ymm2, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%1 = zext <16 x i16> %a to <16 x i32>
|
|
%2 = zext <16 x i16> %b to <16 x i32>
|
|
%3 = add nsw <16 x i32> %1, %2
|
|
%4 = icmp ult <16 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%6 = trunc <16 x i32> %5 to <16 x i16>
|
|
%7 = bitcast i16 %mask to <16 x i1>
|
|
%8 = select <16 x i1> %7, <16 x i16> %6, <16 x i16> %passThru
|
|
ret <16 x i16> %8
|
|
}
|
|
|
|
define <16 x i16> @test_mask_adds_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
|
|
; CHECK-LABEL: test_mask_adds_epu16_rrkz_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
|
; CHECK-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%1 = zext <16 x i16> %a to <16 x i32>
|
|
%2 = zext <16 x i16> %b to <16 x i32>
|
|
%3 = add nsw <16 x i32> %1, %2
|
|
%4 = icmp ult <16 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%6 = trunc <16 x i32> %5 to <16 x i16>
|
|
%7 = bitcast i16 %mask to <16 x i1>
|
|
%8 = select <16 x i1> %7, <16 x i16> %6, <16 x i16> zeroinitializer
|
|
ret <16 x i16> %8
|
|
}
|
|
|
|
define <16 x i16> @test_mask_adds_epu16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
|
|
; CHECK-LABEL: test_mask_adds_epu16_rm_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpaddusw (%rdi), %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
|
%1 = zext <16 x i16> %a to <16 x i32>
|
|
%2 = zext <16 x i16> %b to <16 x i32>
|
|
%3 = add nsw <16 x i32> %1, %2
|
|
%4 = icmp ult <16 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%6 = trunc <16 x i32> %5 to <16 x i16>
|
|
ret <16 x i16> %6
|
|
}
|
|
|
|
define <16 x i16> @test_mask_adds_epu16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
|
|
; CHECK-LABEL: test_mask_adds_epu16_rmk_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
|
; CHECK-NEXT: vpaddusw (%rdi), %ymm0, %ymm1 {%k1}
|
|
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
|
%1 = zext <16 x i16> %a to <16 x i32>
|
|
%2 = zext <16 x i16> %b to <16 x i32>
|
|
%3 = add nsw <16 x i32> %1, %2
|
|
%4 = icmp ult <16 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%6 = trunc <16 x i32> %5 to <16 x i16>
|
|
%7 = bitcast i16 %mask to <16 x i1>
|
|
%8 = select <16 x i1> %7, <16 x i16> %6, <16 x i16> %passThru
|
|
ret <16 x i16> %8
|
|
}
|
|
|
|
define <16 x i16> @test_mask_adds_epu16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
|
|
; CHECK-LABEL: test_mask_adds_epu16_rmkz_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
|
; CHECK-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
|
%1 = zext <16 x i16> %a to <16 x i32>
|
|
%2 = zext <16 x i16> %b to <16 x i32>
|
|
%3 = add nsw <16 x i32> %1, %2
|
|
%4 = icmp ult <16 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%6 = trunc <16 x i32> %5 to <16 x i16>
|
|
%7 = bitcast i16 %mask to <16 x i1>
|
|
%8 = select <16 x i1> %7, <16 x i16> %6, <16 x i16> zeroinitializer
|
|
ret <16 x i16> %8
|
|
}
|
|
|
|
define <8 x i16> @test_mask_subs_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) {
|
|
; CHECK-LABEL: test_mask_subs_epu16_rr_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpsubusw %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%cmp = icmp ugt <8 x i16> %a, %b
|
|
%sel = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b
|
|
%sub = sub <8 x i16> %sel, %b
|
|
ret <8 x i16> %sub
|
|
}
|
|
|
|
define <8 x i16> @test_mask_subs_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
|
|
; CHECK-LABEL: test_mask_subs_epu16_rrk_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
|
; CHECK-NEXT: vpsubusw %xmm1, %xmm0, %xmm2 {%k1}
|
|
; CHECK-NEXT: vmovdqa %xmm2, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%cmp = icmp ugt <8 x i16> %a, %b
|
|
%sel = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b
|
|
%sub = sub <8 x i16> %sel, %b
|
|
%bc = bitcast i8 %mask to <8 x i1>
|
|
%res = select <8 x i1> %bc, <8 x i16> %sub, <8 x i16> %passThru
|
|
ret <8 x i16> %res
|
|
}
|
|
|
|
define <8 x i16> @test_mask_subs_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
|
|
; CHECK-LABEL: test_mask_subs_epu16_rrkz_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
|
; CHECK-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%cmp = icmp ugt <8 x i16> %a, %b
|
|
%sel = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b
|
|
%sub = sub <8 x i16> %sel, %b
|
|
%bc = bitcast i8 %mask to <8 x i1>
|
|
%res = select <8 x i1> %bc, <8 x i16> %sub, <8 x i16> zeroinitializer
|
|
ret <8 x i16> %res
|
|
}
|
|
|
|
define <8 x i16> @test_mask_subs_epu16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
|
|
; CHECK-LABEL: test_mask_subs_epu16_rm_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpsubusw (%rdi), %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
|
%cmp = icmp ugt <8 x i16> %a, %b
|
|
%sel = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b
|
|
%sub = sub <8 x i16> %sel, %b
|
|
ret <8 x i16> %sub
|
|
}
|
|
|
|
define <8 x i16> @test_mask_subs_epu16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
|
|
; CHECK-LABEL: test_mask_subs_epu16_rmk_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
|
; CHECK-NEXT: vpsubusw (%rdi), %xmm0, %xmm1 {%k1}
|
|
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
|
%cmp = icmp ugt <8 x i16> %a, %b
|
|
%sel = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b
|
|
%sub = sub <8 x i16> %sel, %b
|
|
%bc = bitcast i8 %mask to <8 x i1>
|
|
%res = select <8 x i1> %bc, <8 x i16> %sub, <8 x i16> %passThru
|
|
ret <8 x i16> %res
|
|
}
|
|
|
|
define <8 x i16> @test_mask_subs_epu16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
|
|
; CHECK-LABEL: test_mask_subs_epu16_rmkz_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
|
; CHECK-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%b = load <8 x i16>, <8 x i16>* %ptr_b
|
|
%cmp = icmp ugt <8 x i16> %a, %b
|
|
%sel = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b
|
|
%sub = sub <8 x i16> %sel, %b
|
|
%bc = bitcast i8 %mask to <8 x i1>
|
|
%res = select <8 x i1> %bc, <8 x i16> %sub, <8 x i16> zeroinitializer
|
|
ret <8 x i16> %res
|
|
}
|
|
|
|
define <16 x i16> @test_mask_subs_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) {
|
|
; CHECK-LABEL: test_mask_subs_epu16_rr_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpsubusw %ymm1, %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%cmp = icmp ugt <16 x i16> %a, %b
|
|
%sel = select <16 x i1> %cmp, <16 x i16> %a, <16 x i16> %b
|
|
%sub = sub <16 x i16> %sel, %b
|
|
ret <16 x i16> %sub
|
|
}
|
|
|
|
define <16 x i16> @test_mask_subs_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
|
|
; CHECK-LABEL: test_mask_subs_epu16_rrk_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
|
; CHECK-NEXT: vpsubusw %ymm1, %ymm0, %ymm2 {%k1}
|
|
; CHECK-NEXT: vmovdqa %ymm2, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%cmp = icmp ugt <16 x i16> %a, %b
|
|
%sel = select <16 x i1> %cmp, <16 x i16> %a, <16 x i16> %b
|
|
%sub = sub <16 x i16> %sel, %b
|
|
%bc = bitcast i16 %mask to <16 x i1>
|
|
%res = select <16 x i1> %bc, <16 x i16> %sub, <16 x i16> %passThru
|
|
ret <16 x i16> %res
|
|
}
|
|
|
|
define <16 x i16> @test_mask_subs_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
|
|
; CHECK-LABEL: test_mask_subs_epu16_rrkz_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
|
; CHECK-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%cmp = icmp ugt <16 x i16> %a, %b
|
|
%sel = select <16 x i1> %cmp, <16 x i16> %a, <16 x i16> %b
|
|
%sub = sub <16 x i16> %sel, %b
|
|
%bc = bitcast i16 %mask to <16 x i1>
|
|
%res = select <16 x i1> %bc, <16 x i16> %sub, <16 x i16> zeroinitializer
|
|
ret <16 x i16> %res
|
|
}
|
|
|
|
define <16 x i16> @test_mask_subs_epu16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
|
|
; CHECK-LABEL: test_mask_subs_epu16_rm_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpsubusw (%rdi), %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
|
%cmp = icmp ugt <16 x i16> %a, %b
|
|
%sel = select <16 x i1> %cmp, <16 x i16> %a, <16 x i16> %b
|
|
%sub = sub <16 x i16> %sel, %b
|
|
ret <16 x i16> %sub
|
|
}
|
|
|
|
define <16 x i16> @test_mask_subs_epu16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
|
|
; CHECK-LABEL: test_mask_subs_epu16_rmk_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
|
; CHECK-NEXT: vpsubusw (%rdi), %ymm0, %ymm1 {%k1}
|
|
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
|
%cmp = icmp ugt <16 x i16> %a, %b
|
|
%sel = select <16 x i1> %cmp, <16 x i16> %a, <16 x i16> %b
|
|
%sub = sub <16 x i16> %sel, %b
|
|
%bc = bitcast i16 %mask to <16 x i1>
|
|
%res = select <16 x i1> %bc, <16 x i16> %sub, <16 x i16> %passThru
|
|
ret <16 x i16> %res
|
|
}
|
|
|
|
define <16 x i16> @test_mask_subs_epu16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
|
|
; CHECK-LABEL: test_mask_subs_epu16_rmkz_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
|
; CHECK-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%b = load <16 x i16>, <16 x i16>* %ptr_b
|
|
%cmp = icmp ugt <16 x i16> %a, %b
|
|
%sel = select <16 x i1> %cmp, <16 x i16> %a, <16 x i16> %b
|
|
%sub = sub <16 x i16> %sel, %b
|
|
%bc = bitcast i16 %mask to <16 x i1>
|
|
%res = select <16 x i1> %bc, <16 x i16> %sub, <16 x i16> zeroinitializer
|
|
ret <16 x i16> %res
|
|
}
|
|
|
|
define <16 x i8> @test_mask_adds_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) {
|
|
; CHECK-LABEL: test_mask_adds_epi8_rr_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpaddsb %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = sext <16 x i8> %a to <16 x i16>
|
|
%2 = sext <16 x i8> %b to <16 x i16>
|
|
%3 = add nsw <16 x i16> %1, %2
|
|
%4 = icmp slt <16 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%6 = icmp sgt <16 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%7 = select <16 x i1> %6, <16 x i16> %5, <16 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%8 = trunc <16 x i16> %7 to <16 x i8>
|
|
ret <16 x i8> %8
|
|
}
|
|
|
|
define <16 x i8> @test_mask_adds_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
|
|
; CHECK-LABEL: test_mask_adds_epi8_rrk_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
|
; CHECK-NEXT: vpaddsb %xmm1, %xmm0, %xmm2 {%k1}
|
|
; CHECK-NEXT: vmovdqa %xmm2, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = sext <16 x i8> %a to <16 x i16>
|
|
%2 = sext <16 x i8> %b to <16 x i16>
|
|
%3 = add nsw <16 x i16> %1, %2
|
|
%4 = icmp slt <16 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%6 = icmp sgt <16 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%7 = select <16 x i1> %6, <16 x i16> %5, <16 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%8 = trunc <16 x i16> %7 to <16 x i8>
|
|
%9 = bitcast i16 %mask to <16 x i1>
|
|
%10 = select <16 x i1> %9, <16 x i8> %8, <16 x i8> %passThru
|
|
ret <16 x i8> %10
|
|
}
|
|
|
|
define <16 x i8> @test_mask_adds_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
|
|
; CHECK-LABEL: test_mask_adds_epi8_rrkz_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
|
; CHECK-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%1 = sext <16 x i8> %a to <16 x i16>
|
|
%2 = sext <16 x i8> %b to <16 x i16>
|
|
%3 = add nsw <16 x i16> %1, %2
|
|
%4 = icmp slt <16 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%6 = icmp sgt <16 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%7 = select <16 x i1> %6, <16 x i16> %5, <16 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%8 = trunc <16 x i16> %7 to <16 x i8>
|
|
%9 = bitcast i16 %mask to <16 x i1>
|
|
%10 = select <16 x i1> %9, <16 x i8> %8, <16 x i8> zeroinitializer
|
|
ret <16 x i8> %10
|
|
}
|
|
|
|
define <16 x i8> @test_mask_adds_epi8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
|
|
; CHECK-LABEL: test_mask_adds_epi8_rm_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpaddsb (%rdi), %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%b = load <16 x i8>, <16 x i8>* %ptr_b
|
|
%1 = sext <16 x i8> %a to <16 x i16>
|
|
%2 = sext <16 x i8> %b to <16 x i16>
|
|
%3 = add nsw <16 x i16> %1, %2
|
|
%4 = icmp slt <16 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%6 = icmp sgt <16 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%7 = select <16 x i1> %6, <16 x i16> %5, <16 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%8 = trunc <16 x i16> %7 to <16 x i8>
|
|
ret <16 x i8> %8
|
|
}
|
|
|
|
define <16 x i8> @test_mask_adds_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
|
|
; CHECK-LABEL: test_mask_adds_epi8_rmk_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
|
; CHECK-NEXT: vpaddsb (%rdi), %xmm0, %xmm1 {%k1}
|
|
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%b = load <16 x i8>, <16 x i8>* %ptr_b
|
|
%1 = sext <16 x i8> %a to <16 x i16>
|
|
%2 = sext <16 x i8> %b to <16 x i16>
|
|
%3 = add nsw <16 x i16> %1, %2
|
|
%4 = icmp slt <16 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%6 = icmp sgt <16 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%7 = select <16 x i1> %6, <16 x i16> %5, <16 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%8 = trunc <16 x i16> %7 to <16 x i8>
|
|
%9 = bitcast i16 %mask to <16 x i1>
|
|
%10 = select <16 x i1> %9, <16 x i8> %8, <16 x i8> %passThru
|
|
ret <16 x i8> %10
|
|
}
|
|
|
|
define <16 x i8> @test_mask_adds_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
|
|
; CHECK-LABEL: test_mask_adds_epi8_rmkz_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
|
; CHECK-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%b = load <16 x i8>, <16 x i8>* %ptr_b
|
|
%1 = sext <16 x i8> %a to <16 x i16>
|
|
%2 = sext <16 x i8> %b to <16 x i16>
|
|
%3 = add nsw <16 x i16> %1, %2
|
|
%4 = icmp slt <16 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%6 = icmp sgt <16 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%7 = select <16 x i1> %6, <16 x i16> %5, <16 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%8 = trunc <16 x i16> %7 to <16 x i8>
|
|
%9 = bitcast i16 %mask to <16 x i1>
|
|
%10 = select <16 x i1> %9, <16 x i8> %8, <16 x i8> zeroinitializer
|
|
ret <16 x i8> %10
|
|
}
|
|
|
|
define <32 x i8> @test_mask_adds_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) {
|
|
; CHECK-LABEL: test_mask_adds_epi8_rr_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpaddsb %ymm1, %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%1 = sext <32 x i8> %a to <32 x i16>
|
|
%2 = sext <32 x i8> %b to <32 x i16>
|
|
%3 = add nsw <32 x i16> %1, %2
|
|
%4 = icmp slt <32 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%6 = icmp sgt <32 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%7 = select <32 x i1> %6, <32 x i16> %5, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%8 = trunc <32 x i16> %7 to <32 x i8>
|
|
ret <32 x i8> %8
|
|
}
|
|
|
|
define <32 x i8> @test_mask_adds_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
|
|
; CHECK-LABEL: test_mask_adds_epi8_rrk_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
|
; CHECK-NEXT: vpaddsb %ymm1, %ymm0, %ymm2 {%k1}
|
|
; CHECK-NEXT: vmovdqa %ymm2, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%1 = sext <32 x i8> %a to <32 x i16>
|
|
%2 = sext <32 x i8> %b to <32 x i16>
|
|
%3 = add nsw <32 x i16> %1, %2
|
|
%4 = icmp slt <32 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%6 = icmp sgt <32 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%7 = select <32 x i1> %6, <32 x i16> %5, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%8 = trunc <32 x i16> %7 to <32 x i8>
|
|
%9 = bitcast i32 %mask to <32 x i1>
|
|
%10 = select <32 x i1> %9, <32 x i8> %8, <32 x i8> %passThru
|
|
ret <32 x i8> %10
|
|
}
|
|
|
|
define <32 x i8> @test_mask_adds_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
|
|
; CHECK-LABEL: test_mask_adds_epi8_rrkz_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
|
; CHECK-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%1 = sext <32 x i8> %a to <32 x i16>
|
|
%2 = sext <32 x i8> %b to <32 x i16>
|
|
%3 = add nsw <32 x i16> %1, %2
|
|
%4 = icmp slt <32 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%6 = icmp sgt <32 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%7 = select <32 x i1> %6, <32 x i16> %5, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%8 = trunc <32 x i16> %7 to <32 x i8>
|
|
%9 = bitcast i32 %mask to <32 x i1>
|
|
%10 = select <32 x i1> %9, <32 x i8> %8, <32 x i8> zeroinitializer
|
|
ret <32 x i8> %10
|
|
}
|
|
|
|
define <32 x i8> @test_mask_adds_epi8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) {
|
|
; CHECK-LABEL: test_mask_adds_epi8_rm_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpaddsb (%rdi), %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%b = load <32 x i8>, <32 x i8>* %ptr_b
|
|
%1 = sext <32 x i8> %a to <32 x i16>
|
|
%2 = sext <32 x i8> %b to <32 x i16>
|
|
%3 = add nsw <32 x i16> %1, %2
|
|
%4 = icmp slt <32 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%6 = icmp sgt <32 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%7 = select <32 x i1> %6, <32 x i16> %5, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%8 = trunc <32 x i16> %7 to <32 x i8>
|
|
ret <32 x i8> %8
|
|
}
|
|
|
|
define <32 x i8> @test_mask_adds_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
|
|
; CHECK-LABEL: test_mask_adds_epi8_rmk_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
|
; CHECK-NEXT: vpaddsb (%rdi), %ymm0, %ymm1 {%k1}
|
|
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%b = load <32 x i8>, <32 x i8>* %ptr_b
|
|
%1 = sext <32 x i8> %a to <32 x i16>
|
|
%2 = sext <32 x i8> %b to <32 x i16>
|
|
%3 = add nsw <32 x i16> %1, %2
|
|
%4 = icmp slt <32 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%6 = icmp sgt <32 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%7 = select <32 x i1> %6, <32 x i16> %5, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%8 = trunc <32 x i16> %7 to <32 x i8>
|
|
%9 = bitcast i32 %mask to <32 x i1>
|
|
%10 = select <32 x i1> %9, <32 x i8> %8, <32 x i8> %passThru
|
|
ret <32 x i8> %10
|
|
}
|
|
|
|
define <32 x i8> @test_mask_adds_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
|
|
; CHECK-LABEL: test_mask_adds_epi8_rmkz_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
|
; CHECK-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%b = load <32 x i8>, <32 x i8>* %ptr_b
|
|
%1 = sext <32 x i8> %a to <32 x i16>
|
|
%2 = sext <32 x i8> %b to <32 x i16>
|
|
%3 = add nsw <32 x i16> %1, %2
|
|
%4 = icmp slt <32 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%6 = icmp sgt <32 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%7 = select <32 x i1> %6, <32 x i16> %5, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%8 = trunc <32 x i16> %7 to <32 x i8>
|
|
%9 = bitcast i32 %mask to <32 x i1>
|
|
%10 = select <32 x i1> %9, <32 x i8> %8, <32 x i8> zeroinitializer
|
|
ret <32 x i8> %10
|
|
}
|
|
|
|
define <16 x i8> @test_mask_subs_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) {
|
|
; CHECK-LABEL: test_mask_subs_epi8_rr_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpsubsb %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = sext <16 x i8> %a to <16 x i16>
|
|
%2 = sext <16 x i8> %b to <16 x i16>
|
|
%3 = sub nsw <16 x i16> %1, %2
|
|
%4 = icmp slt <16 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%6 = icmp sgt <16 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%7 = select <16 x i1> %6, <16 x i16> %5, <16 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%8 = trunc <16 x i16> %7 to <16 x i8>
|
|
ret <16 x i8> %8
|
|
}
|
|
|
|
define <16 x i8> @test_mask_subs_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
|
|
; CHECK-LABEL: test_mask_subs_epi8_rrk_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
|
; CHECK-NEXT: vpsubsb %xmm1, %xmm0, %xmm2 {%k1}
|
|
; CHECK-NEXT: vmovdqa %xmm2, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = sext <16 x i8> %a to <16 x i16>
|
|
%2 = sext <16 x i8> %b to <16 x i16>
|
|
%3 = sub nsw <16 x i16> %1, %2
|
|
%4 = icmp slt <16 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%6 = icmp sgt <16 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%7 = select <16 x i1> %6, <16 x i16> %5, <16 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%8 = trunc <16 x i16> %7 to <16 x i8>
|
|
%9 = bitcast i16 %mask to <16 x i1>
|
|
%10 = select <16 x i1> %9, <16 x i8> %8, <16 x i8> %passThru
|
|
ret <16 x i8> %10
|
|
}
|
|
|
|
define <16 x i8> @test_mask_subs_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
|
|
; CHECK-LABEL: test_mask_subs_epi8_rrkz_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
|
; CHECK-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%1 = sext <16 x i8> %a to <16 x i16>
|
|
%2 = sext <16 x i8> %b to <16 x i16>
|
|
%3 = sub nsw <16 x i16> %1, %2
|
|
%4 = icmp slt <16 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%6 = icmp sgt <16 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%7 = select <16 x i1> %6, <16 x i16> %5, <16 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%8 = trunc <16 x i16> %7 to <16 x i8>
|
|
%9 = bitcast i16 %mask to <16 x i1>
|
|
%10 = select <16 x i1> %9, <16 x i8> %8, <16 x i8> zeroinitializer
|
|
ret <16 x i8> %10
|
|
}
|
|
|
|
define <16 x i8> @test_mask_subs_epi8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
|
|
; CHECK-LABEL: test_mask_subs_epi8_rm_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpsubsb (%rdi), %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%b = load <16 x i8>, <16 x i8>* %ptr_b
|
|
%1 = sext <16 x i8> %a to <16 x i16>
|
|
%2 = sext <16 x i8> %b to <16 x i16>
|
|
%3 = sub nsw <16 x i16> %1, %2
|
|
%4 = icmp slt <16 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%6 = icmp sgt <16 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%7 = select <16 x i1> %6, <16 x i16> %5, <16 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%8 = trunc <16 x i16> %7 to <16 x i8>
|
|
ret <16 x i8> %8
|
|
}
|
|
|
|
define <16 x i8> @test_mask_subs_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
|
|
; CHECK-LABEL: test_mask_subs_epi8_rmk_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
|
; CHECK-NEXT: vpsubsb (%rdi), %xmm0, %xmm1 {%k1}
|
|
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%b = load <16 x i8>, <16 x i8>* %ptr_b
|
|
%1 = sext <16 x i8> %a to <16 x i16>
|
|
%2 = sext <16 x i8> %b to <16 x i16>
|
|
%3 = sub nsw <16 x i16> %1, %2
|
|
%4 = icmp slt <16 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%6 = icmp sgt <16 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%7 = select <16 x i1> %6, <16 x i16> %5, <16 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%8 = trunc <16 x i16> %7 to <16 x i8>
|
|
%9 = bitcast i16 %mask to <16 x i1>
|
|
%10 = select <16 x i1> %9, <16 x i8> %8, <16 x i8> %passThru
|
|
ret <16 x i8> %10
|
|
}
|
|
|
|
define <16 x i8> @test_mask_subs_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
|
|
; CHECK-LABEL: test_mask_subs_epi8_rmkz_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
|
; CHECK-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%b = load <16 x i8>, <16 x i8>* %ptr_b
|
|
%1 = sext <16 x i8> %a to <16 x i16>
|
|
%2 = sext <16 x i8> %b to <16 x i16>
|
|
%3 = sub nsw <16 x i16> %1, %2
|
|
%4 = icmp slt <16 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%6 = icmp sgt <16 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%7 = select <16 x i1> %6, <16 x i16> %5, <16 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%8 = trunc <16 x i16> %7 to <16 x i8>
|
|
%9 = bitcast i16 %mask to <16 x i1>
|
|
%10 = select <16 x i1> %9, <16 x i8> %8, <16 x i8> zeroinitializer
|
|
ret <16 x i8> %10
|
|
}
|
|
|
|
define <32 x i8> @test_mask_subs_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) {
|
|
; CHECK-LABEL: test_mask_subs_epi8_rr_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpsubsb %ymm1, %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%1 = sext <32 x i8> %a to <32 x i16>
|
|
%2 = sext <32 x i8> %b to <32 x i16>
|
|
%3 = sub nsw <32 x i16> %1, %2
|
|
%4 = icmp slt <32 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%6 = icmp sgt <32 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%7 = select <32 x i1> %6, <32 x i16> %5, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%8 = trunc <32 x i16> %7 to <32 x i8>
|
|
ret <32 x i8> %8
|
|
}
|
|
|
|
define <32 x i8> @test_mask_subs_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
|
|
; CHECK-LABEL: test_mask_subs_epi8_rrk_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
|
; CHECK-NEXT: vpsubsb %ymm1, %ymm0, %ymm2 {%k1}
|
|
; CHECK-NEXT: vmovdqa %ymm2, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%1 = sext <32 x i8> %a to <32 x i16>
|
|
%2 = sext <32 x i8> %b to <32 x i16>
|
|
%3 = sub nsw <32 x i16> %1, %2
|
|
%4 = icmp slt <32 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%6 = icmp sgt <32 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%7 = select <32 x i1> %6, <32 x i16> %5, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%8 = trunc <32 x i16> %7 to <32 x i8>
|
|
%9 = bitcast i32 %mask to <32 x i1>
|
|
%10 = select <32 x i1> %9, <32 x i8> %8, <32 x i8> %passThru
|
|
ret <32 x i8> %10
|
|
}
|
|
|
|
define <32 x i8> @test_mask_subs_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
|
|
; CHECK-LABEL: test_mask_subs_epi8_rrkz_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
|
; CHECK-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%1 = sext <32 x i8> %a to <32 x i16>
|
|
%2 = sext <32 x i8> %b to <32 x i16>
|
|
%3 = sub nsw <32 x i16> %1, %2
|
|
%4 = icmp slt <32 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%6 = icmp sgt <32 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%7 = select <32 x i1> %6, <32 x i16> %5, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%8 = trunc <32 x i16> %7 to <32 x i8>
|
|
%9 = bitcast i32 %mask to <32 x i1>
|
|
%10 = select <32 x i1> %9, <32 x i8> %8, <32 x i8> zeroinitializer
|
|
ret <32 x i8> %10
|
|
}
|
|
|
|
define <32 x i8> @test_mask_subs_epi8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) {
|
|
; CHECK-LABEL: test_mask_subs_epi8_rm_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpsubsb (%rdi), %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%b = load <32 x i8>, <32 x i8>* %ptr_b
|
|
%1 = sext <32 x i8> %a to <32 x i16>
|
|
%2 = sext <32 x i8> %b to <32 x i16>
|
|
%3 = sub nsw <32 x i16> %1, %2
|
|
%4 = icmp slt <32 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%6 = icmp sgt <32 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%7 = select <32 x i1> %6, <32 x i16> %5, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%8 = trunc <32 x i16> %7 to <32 x i8>
|
|
ret <32 x i8> %8
|
|
}
|
|
|
|
define <32 x i8> @test_mask_subs_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
|
|
; CHECK-LABEL: test_mask_subs_epi8_rmk_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
|
; CHECK-NEXT: vpsubsb (%rdi), %ymm0, %ymm1 {%k1}
|
|
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%b = load <32 x i8>, <32 x i8>* %ptr_b
|
|
%1 = sext <32 x i8> %a to <32 x i16>
|
|
%2 = sext <32 x i8> %b to <32 x i16>
|
|
%3 = sub nsw <32 x i16> %1, %2
|
|
%4 = icmp slt <32 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%6 = icmp sgt <32 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%7 = select <32 x i1> %6, <32 x i16> %5, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%8 = trunc <32 x i16> %7 to <32 x i8>
|
|
%9 = bitcast i32 %mask to <32 x i1>
|
|
%10 = select <32 x i1> %9, <32 x i8> %8, <32 x i8> %passThru
|
|
ret <32 x i8> %10
|
|
}
|
|
|
|
define <32 x i8> @test_mask_subs_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
|
|
; CHECK-LABEL: test_mask_subs_epi8_rmkz_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
|
; CHECK-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%b = load <32 x i8>, <32 x i8>* %ptr_b
|
|
%1 = sext <32 x i8> %a to <32 x i16>
|
|
%2 = sext <32 x i8> %b to <32 x i16>
|
|
%3 = sub nsw <32 x i16> %1, %2
|
|
%4 = icmp slt <32 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%6 = icmp sgt <32 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%7 = select <32 x i1> %6, <32 x i16> %5, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%8 = trunc <32 x i16> %7 to <32 x i8>
|
|
%9 = bitcast i32 %mask to <32 x i1>
|
|
%10 = select <32 x i1> %9, <32 x i8> %8, <32 x i8> zeroinitializer
|
|
ret <32 x i8> %10
|
|
}
|
|
|
|
define <16 x i8> @test_mask_adds_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) {
|
|
; CHECK-LABEL: test_mask_adds_epu8_rr_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpaddusb %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = zext <16 x i8> %a to <16 x i16>
|
|
%2 = zext <16 x i8> %b to <16 x i16>
|
|
%3 = add nsw <16 x i16> %1, %2
|
|
%4 = icmp ult <16 x i16> %3, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
|
|
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
|
|
%6 = trunc <16 x i16> %5 to <16 x i8>
|
|
ret <16 x i8> %6
|
|
}
|
|
|
|
define <16 x i8> @test_mask_adds_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
|
|
; CHECK-LABEL: test_mask_adds_epu8_rrk_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
|
; CHECK-NEXT: vpaddusb %xmm1, %xmm0, %xmm2 {%k1}
|
|
; CHECK-NEXT: vmovdqa %xmm2, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%1 = zext <16 x i8> %a to <16 x i16>
|
|
%2 = zext <16 x i8> %b to <16 x i16>
|
|
%3 = add nsw <16 x i16> %1, %2
|
|
%4 = icmp ult <16 x i16> %3, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
|
|
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
|
|
%6 = trunc <16 x i16> %5 to <16 x i8>
|
|
%7 = bitcast i16 %mask to <16 x i1>
|
|
%8 = select <16 x i1> %7, <16 x i8> %6, <16 x i8> %passThru
|
|
ret <16 x i8> %8
|
|
}
|
|
|
|
define <16 x i8> @test_mask_adds_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
|
|
; CHECK-LABEL: test_mask_adds_epu8_rrkz_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
|
; CHECK-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%1 = zext <16 x i8> %a to <16 x i16>
|
|
%2 = zext <16 x i8> %b to <16 x i16>
|
|
%3 = add nsw <16 x i16> %1, %2
|
|
%4 = icmp ult <16 x i16> %3, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
|
|
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
|
|
%6 = trunc <16 x i16> %5 to <16 x i8>
|
|
%7 = bitcast i16 %mask to <16 x i1>
|
|
%8 = select <16 x i1> %7, <16 x i8> %6, <16 x i8> zeroinitializer
|
|
ret <16 x i8> %8
|
|
}
|
|
|
|
define <16 x i8> @test_mask_adds_epu8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
|
|
; CHECK-LABEL: test_mask_adds_epu8_rm_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpaddusb (%rdi), %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%b = load <16 x i8>, <16 x i8>* %ptr_b
|
|
%1 = zext <16 x i8> %a to <16 x i16>
|
|
%2 = zext <16 x i8> %b to <16 x i16>
|
|
%3 = add nsw <16 x i16> %1, %2
|
|
%4 = icmp ult <16 x i16> %3, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
|
|
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
|
|
%6 = trunc <16 x i16> %5 to <16 x i8>
|
|
ret <16 x i8> %6
|
|
}
|
|
|
|
define <16 x i8> @test_mask_adds_epu8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
|
|
; CHECK-LABEL: test_mask_adds_epu8_rmk_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
|
; CHECK-NEXT: vpaddusb (%rdi), %xmm0, %xmm1 {%k1}
|
|
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%b = load <16 x i8>, <16 x i8>* %ptr_b
|
|
%1 = zext <16 x i8> %a to <16 x i16>
|
|
%2 = zext <16 x i8> %b to <16 x i16>
|
|
%3 = add nsw <16 x i16> %1, %2
|
|
%4 = icmp ult <16 x i16> %3, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
|
|
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
|
|
%6 = trunc <16 x i16> %5 to <16 x i8>
|
|
%7 = bitcast i16 %mask to <16 x i1>
|
|
%8 = select <16 x i1> %7, <16 x i8> %6, <16 x i8> %passThru
|
|
ret <16 x i8> %8
|
|
}
|
|
|
|
define <16 x i8> @test_mask_adds_epu8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
|
|
; CHECK-LABEL: test_mask_adds_epu8_rmkz_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
|
; CHECK-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%b = load <16 x i8>, <16 x i8>* %ptr_b
|
|
%1 = zext <16 x i8> %a to <16 x i16>
|
|
%2 = zext <16 x i8> %b to <16 x i16>
|
|
%3 = add nsw <16 x i16> %1, %2
|
|
%4 = icmp ult <16 x i16> %3, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
|
|
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
|
|
%6 = trunc <16 x i16> %5 to <16 x i8>
|
|
%7 = bitcast i16 %mask to <16 x i1>
|
|
%8 = select <16 x i1> %7, <16 x i8> %6, <16 x i8> zeroinitializer
|
|
ret <16 x i8> %8
|
|
}
|
|
|
|
define <32 x i8> @test_mask_adds_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) {
|
|
; CHECK-LABEL: test_mask_adds_epu8_rr_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpaddusb %ymm1, %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%1 = zext <32 x i8> %a to <32 x i16>
|
|
%2 = zext <32 x i8> %b to <32 x i16>
|
|
%3 = add nsw <32 x i16> %1, %2
|
|
%4 = icmp ult <32 x i16> %3, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
|
|
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
|
|
%6 = trunc <32 x i16> %5 to <32 x i8>
|
|
ret <32 x i8> %6
|
|
}
|
|
|
|
define <32 x i8> @test_mask_adds_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
|
|
; CHECK-LABEL: test_mask_adds_epu8_rrk_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
|
; CHECK-NEXT: vpaddusb %ymm1, %ymm0, %ymm2 {%k1}
|
|
; CHECK-NEXT: vmovdqa %ymm2, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%1 = zext <32 x i8> %a to <32 x i16>
|
|
%2 = zext <32 x i8> %b to <32 x i16>
|
|
%3 = add nsw <32 x i16> %1, %2
|
|
%4 = icmp ult <32 x i16> %3, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
|
|
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
|
|
%6 = trunc <32 x i16> %5 to <32 x i8>
|
|
%7 = bitcast i32 %mask to <32 x i1>
|
|
%8 = select <32 x i1> %7, <32 x i8> %6, <32 x i8> %passThru
|
|
ret <32 x i8> %8
|
|
}
|
|
|
|
define <32 x i8> @test_mask_adds_epu8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
|
|
; CHECK-LABEL: test_mask_adds_epu8_rrkz_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
|
; CHECK-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%1 = zext <32 x i8> %a to <32 x i16>
|
|
%2 = zext <32 x i8> %b to <32 x i16>
|
|
%3 = add nsw <32 x i16> %1, %2
|
|
%4 = icmp ult <32 x i16> %3, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
|
|
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
|
|
%6 = trunc <32 x i16> %5 to <32 x i8>
|
|
%7 = bitcast i32 %mask to <32 x i1>
|
|
%8 = select <32 x i1> %7, <32 x i8> %6, <32 x i8> zeroinitializer
|
|
ret <32 x i8> %8
|
|
}
|
|
|
|
define <32 x i8> @test_mask_adds_epu8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) {
|
|
; CHECK-LABEL: test_mask_adds_epu8_rm_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpaddusb (%rdi), %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%b = load <32 x i8>, <32 x i8>* %ptr_b
|
|
%1 = zext <32 x i8> %a to <32 x i16>
|
|
%2 = zext <32 x i8> %b to <32 x i16>
|
|
%3 = add nsw <32 x i16> %1, %2
|
|
%4 = icmp ult <32 x i16> %3, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
|
|
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
|
|
%6 = trunc <32 x i16> %5 to <32 x i8>
|
|
ret <32 x i8> %6
|
|
}
|
|
|
|
define <32 x i8> @test_mask_adds_epu8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
|
|
; CHECK-LABEL: test_mask_adds_epu8_rmk_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
|
; CHECK-NEXT: vpaddusb (%rdi), %ymm0, %ymm1 {%k1}
|
|
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%b = load <32 x i8>, <32 x i8>* %ptr_b
|
|
%1 = zext <32 x i8> %a to <32 x i16>
|
|
%2 = zext <32 x i8> %b to <32 x i16>
|
|
%3 = add nsw <32 x i16> %1, %2
|
|
%4 = icmp ult <32 x i16> %3, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
|
|
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
|
|
%6 = trunc <32 x i16> %5 to <32 x i8>
|
|
%7 = bitcast i32 %mask to <32 x i1>
|
|
%8 = select <32 x i1> %7, <32 x i8> %6, <32 x i8> %passThru
|
|
ret <32 x i8> %8
|
|
}
|
|
|
|
define <32 x i8> @test_mask_adds_epu8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
|
|
; CHECK-LABEL: test_mask_adds_epu8_rmkz_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
|
; CHECK-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%b = load <32 x i8>, <32 x i8>* %ptr_b
|
|
%1 = zext <32 x i8> %a to <32 x i16>
|
|
%2 = zext <32 x i8> %b to <32 x i16>
|
|
%3 = add nsw <32 x i16> %1, %2
|
|
%4 = icmp ult <32 x i16> %3, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
|
|
%5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
|
|
%6 = trunc <32 x i16> %5 to <32 x i8>
|
|
%7 = bitcast i32 %mask to <32 x i1>
|
|
%8 = select <32 x i1> %7, <32 x i8> %6, <32 x i8> zeroinitializer
|
|
ret <32 x i8> %8
|
|
}
|
|
|
|
define <16 x i8> @test_mask_subs_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) {
|
|
; CHECK-LABEL: test_mask_subs_epu8_rr_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%cmp = icmp ugt <16 x i8> %a, %b
|
|
%sel = select <16 x i1> %cmp, <16 x i8> %a, <16 x i8> %b
|
|
%sub = sub <16 x i8> %sel, %b
|
|
ret <16 x i8> %sub
|
|
}
|
|
|
|
define <16 x i8> @test_mask_subs_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
|
|
; CHECK-LABEL: test_mask_subs_epu8_rrk_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
|
; CHECK-NEXT: vpsubusb %xmm1, %xmm0, %xmm2 {%k1}
|
|
; CHECK-NEXT: vmovdqa %xmm2, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%cmp = icmp ugt <16 x i8> %a, %b
|
|
%sel = select <16 x i1> %cmp, <16 x i8> %a, <16 x i8> %b
|
|
%sub = sub <16 x i8> %sel, %b
|
|
%bc = bitcast i16 %mask to <16 x i1>
|
|
%res = select <16 x i1> %bc, <16 x i8> %sub, <16 x i8> %passThru
|
|
ret <16 x i8> %res
|
|
}
|
|
|
|
define <16 x i8> @test_mask_subs_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
|
|
; CHECK-LABEL: test_mask_subs_epu8_rrkz_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
|
; CHECK-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%cmp = icmp ugt <16 x i8> %a, %b
|
|
%sel = select <16 x i1> %cmp, <16 x i8> %a, <16 x i8> %b
|
|
%sub = sub <16 x i8> %sel, %b
|
|
%bc = bitcast i16 %mask to <16 x i1>
|
|
%res = select <16 x i1> %bc, <16 x i8> %sub, <16 x i8> zeroinitializer
|
|
ret <16 x i8> %res
|
|
}
|
|
|
|
define <16 x i8> @test_mask_subs_epu8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
|
|
; CHECK-LABEL: test_mask_subs_epu8_rm_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpsubusb (%rdi), %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%b = load <16 x i8>, <16 x i8>* %ptr_b
|
|
%cmp = icmp ugt <16 x i8> %a, %b
|
|
%sel = select <16 x i1> %cmp, <16 x i8> %a, <16 x i8> %b
|
|
%sub = sub <16 x i8> %sel, %b
|
|
ret <16 x i8> %sub
|
|
}
|
|
|
|
define <16 x i8> @test_mask_subs_epu8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
|
|
; CHECK-LABEL: test_mask_subs_epu8_rmk_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
|
; CHECK-NEXT: vpsubusb (%rdi), %xmm0, %xmm1 {%k1}
|
|
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%b = load <16 x i8>, <16 x i8>* %ptr_b
|
|
%cmp = icmp ugt <16 x i8> %a, %b
|
|
%sel = select <16 x i1> %cmp, <16 x i8> %a, <16 x i8> %b
|
|
%sub = sub <16 x i8> %sel, %b
|
|
%bc = bitcast i16 %mask to <16 x i1>
|
|
%res = select <16 x i1> %bc, <16 x i8> %sub, <16 x i8> %passThru
|
|
ret <16 x i8> %res
|
|
}
|
|
|
|
define <16 x i8> @test_mask_subs_epu8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
|
|
; CHECK-LABEL: test_mask_subs_epu8_rmkz_128:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
|
; CHECK-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%b = load <16 x i8>, <16 x i8>* %ptr_b
|
|
%cmp = icmp ugt <16 x i8> %a, %b
|
|
%sel = select <16 x i1> %cmp, <16 x i8> %a, <16 x i8> %b
|
|
%sub = sub <16 x i8> %sel, %b
|
|
%bc = bitcast i16 %mask to <16 x i1>
|
|
%res = select <16 x i1> %bc, <16 x i8> %sub, <16 x i8> zeroinitializer
|
|
ret <16 x i8> %res
|
|
}
|
|
|
|
define <32 x i8> @test_mask_subs_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) {
|
|
; CHECK-LABEL: test_mask_subs_epu8_rr_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpsubusb %ymm1, %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%cmp = icmp ugt <32 x i8> %a, %b
|
|
%sel = select <32 x i1> %cmp, <32 x i8> %a, <32 x i8> %b
|
|
%sub = sub <32 x i8> %sel, %b
|
|
ret <32 x i8> %sub
|
|
}
|
|
|
|
define <32 x i8> @test_mask_subs_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
|
|
; CHECK-LABEL: test_mask_subs_epu8_rrk_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
|
; CHECK-NEXT: vpsubusb %ymm1, %ymm0, %ymm2 {%k1}
|
|
; CHECK-NEXT: vmovdqa %ymm2, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%cmp = icmp ugt <32 x i8> %a, %b
|
|
%sel = select <32 x i1> %cmp, <32 x i8> %a, <32 x i8> %b
|
|
%sub = sub <32 x i8> %sel, %b
|
|
%bc = bitcast i32 %mask to <32 x i1>
|
|
%res = select <32 x i1> %bc, <32 x i8> %sub, <32 x i8> %passThru
|
|
ret <32 x i8> %res
|
|
}
|
|
|
|
define <32 x i8> @test_mask_subs_epu8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
|
|
; CHECK-LABEL: test_mask_subs_epu8_rrkz_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %edi, %k1
|
|
; CHECK-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%cmp = icmp ugt <32 x i8> %a, %b
|
|
%sel = select <32 x i1> %cmp, <32 x i8> %a, <32 x i8> %b
|
|
%sub = sub <32 x i8> %sel, %b
|
|
%bc = bitcast i32 %mask to <32 x i1>
|
|
%res = select <32 x i1> %bc, <32 x i8> %sub, <32 x i8> zeroinitializer
|
|
ret <32 x i8> %res
|
|
}
|
|
|
|
define <32 x i8> @test_mask_subs_epu8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) {
|
|
; CHECK-LABEL: test_mask_subs_epu8_rm_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpsubusb (%rdi), %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%b = load <32 x i8>, <32 x i8>* %ptr_b
|
|
%cmp = icmp ugt <32 x i8> %a, %b
|
|
%sel = select <32 x i1> %cmp, <32 x i8> %a, <32 x i8> %b
|
|
%sub = sub <32 x i8> %sel, %b
|
|
ret <32 x i8> %sub
|
|
}
|
|
|
|
define <32 x i8> @test_mask_subs_epu8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
|
|
; CHECK-LABEL: test_mask_subs_epu8_rmk_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
|
; CHECK-NEXT: vpsubusb (%rdi), %ymm0, %ymm1 {%k1}
|
|
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%b = load <32 x i8>, <32 x i8>* %ptr_b
|
|
%cmp = icmp ugt <32 x i8> %a, %b
|
|
%sel = select <32 x i1> %cmp, <32 x i8> %a, <32 x i8> %b
|
|
%sub = sub <32 x i8> %sel, %b
|
|
%bc = bitcast i32 %mask to <32 x i1>
|
|
%res = select <32 x i1> %bc, <32 x i8> %sub, <32 x i8> %passThru
|
|
ret <32 x i8> %res
|
|
}
|
|
|
|
define <32 x i8> @test_mask_subs_epu8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
|
|
; CHECK-LABEL: test_mask_subs_epu8_rmkz_256:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: kmovd %esi, %k1
|
|
; CHECK-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%b = load <32 x i8>, <32 x i8>* %ptr_b
|
|
%cmp = icmp ugt <32 x i8> %a, %b
|
|
%sel = select <32 x i1> %cmp, <32 x i8> %a, <32 x i8> %b
|
|
%sub = sub <32 x i8> %sel, %b
|
|
%bc = bitcast i32 %mask to <32 x i1>
|
|
%res = select <32 x i1> %bc, <32 x i8> %sub, <32 x i8> zeroinitializer
|
|
ret <32 x i8> %res
|
|
}
|
|
|
|
define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) {
|
|
; SSE-LABEL: test_x86_sse2_padds_b:
|
|
; SSE: ## %bb.0:
|
|
; SSE-NEXT: paddsb %xmm1, %xmm0
|
|
; SSE-NEXT: retl
|
|
%1 = sext <16 x i8> %a0 to <16 x i16>
|
|
%2 = sext <16 x i8> %a1 to <16 x i16>
|
|
%3 = add nsw <16 x i16> %1, %2
|
|
%4 = icmp slt <16 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%6 = icmp sgt <16 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%7 = select <16 x i1> %6, <16 x i16> %5, <16 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%8 = trunc <16 x i16> %7 to <16 x i8>
|
|
ret <16 x i8> %8
|
|
}
|
|
|
|
|
|
define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) {
|
|
; SSE-LABEL: test_x86_sse2_padds_w:
|
|
; SSE: ## %bb.0:
|
|
; SSE-NEXT: paddsw %xmm1, %xmm0
|
|
; SSE-NEXT: retl
|
|
%1 = sext <8 x i16> %a0 to <8 x i32>
|
|
%2 = sext <8 x i16> %a1 to <8 x i32>
|
|
%3 = add nsw <8 x i32> %1, %2
|
|
%4 = icmp slt <8 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <8 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <8 x i1> %6, <8 x i32> %5, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <8 x i32> %7 to <8 x i16>
|
|
ret <8 x i16> %8
|
|
}
|
|
|
|
|
|
define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) {
|
|
; SSE-LABEL: test_x86_sse2_paddus_b:
|
|
; SSE: ## %bb.0:
|
|
; SSE-NEXT: paddusb %xmm1, %xmm0
|
|
; SSE-NEXT: retl
|
|
%1 = zext <16 x i8> %a0 to <16 x i16>
|
|
%2 = zext <16 x i8> %a1 to <16 x i16>
|
|
%3 = add nsw <16 x i16> %1, %2
|
|
%4 = icmp ult <16 x i16> %3, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
|
|
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
|
|
%6 = trunc <16 x i16> %5 to <16 x i8>
|
|
ret <16 x i8> %6
|
|
}
|
|
|
|
|
|
define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) {
|
|
; SSE-LABEL: test_x86_sse2_paddus_w:
|
|
; SSE: ## %bb.0:
|
|
; SSE-NEXT: paddusw %xmm1, %xmm0
|
|
; SSE-NEXT: retl
|
|
%1 = zext <8 x i16> %a0 to <8 x i32>
|
|
%2 = zext <8 x i16> %a1 to <8 x i32>
|
|
%3 = add nsw <8 x i32> %1, %2
|
|
%4 = icmp ult <8 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%6 = trunc <8 x i32> %5 to <8 x i16>
|
|
ret <8 x i16> %6
|
|
}
|
|
|
|
define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) {
|
|
; SSE-LABEL: test_x86_sse2_psubs_b:
|
|
; SSE: ## %bb.0:
|
|
; SSE-NEXT: psubsb %xmm1, %xmm0
|
|
; SSE-NEXT: retl
|
|
%1 = sext <16 x i8> %a0 to <16 x i16>
|
|
%2 = sext <16 x i8> %a1 to <16 x i16>
|
|
%3 = sub nsw <16 x i16> %1, %2
|
|
%4 = icmp slt <16 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%6 = icmp sgt <16 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%7 = select <16 x i1> %6, <16 x i16> %5, <16 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%8 = trunc <16 x i16> %7 to <16 x i8>
|
|
ret <16 x i8> %8
|
|
}
|
|
|
|
|
|
define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) {
|
|
; SSE-LABEL: test_x86_sse2_psubs_w:
|
|
; SSE: ## %bb.0:
|
|
; SSE-NEXT: psubsw %xmm1, %xmm0
|
|
; SSE-NEXT: retl
|
|
%1 = sext <8 x i16> %a0 to <8 x i32>
|
|
%2 = sext <8 x i16> %a1 to <8 x i32>
|
|
%3 = sub nsw <8 x i32> %1, %2
|
|
%4 = icmp slt <8 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <8 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <8 x i1> %6, <8 x i32> %5, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <8 x i32> %7 to <8 x i16>
|
|
ret <8 x i16> %8
|
|
}
|
|
|
|
|
|
define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) {
|
|
; SSE-LABEL: test_x86_sse2_psubus_b:
|
|
; SSE: ## %bb.0:
|
|
; SSE-NEXT: psubusb %xmm1, %xmm0
|
|
; SSE-NEXT: retl
|
|
%cmp = icmp ugt <16 x i8> %a0, %a1
|
|
%sel = select <16 x i1> %cmp, <16 x i8> %a0, <16 x i8> %a1
|
|
%sub = sub <16 x i8> %sel, %a1
|
|
ret <16 x i8> %sub
|
|
}
|
|
|
|
|
|
define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) {
|
|
; SSE-LABEL: test_x86_sse2_psubus_w:
|
|
; SSE: ## %bb.0:
|
|
; SSE-NEXT: psubusw %xmm1, %xmm0
|
|
; SSE-NEXT: retl
|
|
%cmp = icmp ugt <8 x i16> %a0, %a1
|
|
%sel = select <8 x i1> %cmp, <8 x i16> %a0, <8 x i16> %a1
|
|
%sub = sub <8 x i16> %sel, %a1
|
|
ret <8 x i16> %sub
|
|
}
|
|
|
|
define <8 x i8> @test_x86_sse2_padds_b_64(<8 x i8> %a0, <8 x i8> %a1) {
|
|
; AVX512BW-LABEL: test_x86_sse2_padds_b_64:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: vpsllw $8, %xmm0, %xmm0
|
|
; AVX512BW-NEXT: vpsraw $8, %xmm0, %xmm0
|
|
; AVX512BW-NEXT: vpsllw $8, %xmm1, %xmm1
|
|
; AVX512BW-NEXT: vpsraw $8, %xmm1, %xmm1
|
|
; AVX512BW-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
|
; AVX512BW-NEXT: vpminsw {{.*}}(%rip), %xmm0, %xmm0
|
|
; AVX512BW-NEXT: vpmaxsw {{.*}}(%rip), %xmm0, %xmm0
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; SSE-LABEL: test_x86_sse2_padds_b_64:
|
|
; SSE: ## %bb.0:
|
|
; SSE-NEXT: psllw $8, %xmm0
|
|
; SSE-NEXT: psraw $8, %xmm0
|
|
; SSE-NEXT: psllw $8, %xmm1
|
|
; SSE-NEXT: psraw $8, %xmm1
|
|
; SSE-NEXT: paddw %xmm1, %xmm0
|
|
; SSE-NEXT: pminsw LCPI144_0, %xmm0
|
|
; SSE-NEXT: pmaxsw LCPI144_1, %xmm0
|
|
; SSE-NEXT: retl
|
|
%1 = sext <8 x i8> %a0 to <8 x i16>
|
|
%2 = sext <8 x i8> %a1 to <8 x i16>
|
|
%3 = add nsw <8 x i16> %1, %2
|
|
%4 = icmp slt <8 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%6 = icmp sgt <8 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%7 = select <8 x i1> %6, <8 x i16> %5, <8 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%8 = trunc <8 x i16> %7 to <8 x i8>
|
|
ret <8 x i8> %8
|
|
}
|
|
|
|
define <4 x i16> @test_x86_sse2_padds_w_64(<4 x i16> %a0, <4 x i16> %a1) {
|
|
; AVX512BW-LABEL: test_x86_sse2_padds_w_64:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: vpslld $16, %xmm0, %xmm0
|
|
; AVX512BW-NEXT: vpsrad $16, %xmm0, %xmm0
|
|
; AVX512BW-NEXT: vpslld $16, %xmm1, %xmm1
|
|
; AVX512BW-NEXT: vpsrad $16, %xmm1, %xmm1
|
|
; AVX512BW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
|
; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [32767,32767,32767,32767]
|
|
; AVX512BW-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
|
; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294934528,4294934528,4294934528,4294934528]
|
|
; AVX512BW-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; SSE-LABEL: test_x86_sse2_padds_w_64:
|
|
; SSE: ## %bb.0:
|
|
; SSE-NEXT: pslld $16, %xmm0
|
|
; SSE-NEXT: psrad $16, %xmm0
|
|
; SSE-NEXT: pslld $16, %xmm1
|
|
; SSE-NEXT: psrad $16, %xmm1
|
|
; SSE-NEXT: paddd %xmm0, %xmm1
|
|
; SSE-NEXT: movdqa {{.*#+}} xmm0 = [32767,32767,32767,32767]
|
|
; SSE-NEXT: movdqa %xmm0, %xmm2
|
|
; SSE-NEXT: pcmpgtd %xmm1, %xmm2
|
|
; SSE-NEXT: pand %xmm2, %xmm1
|
|
; SSE-NEXT: pandn %xmm0, %xmm2
|
|
; SSE-NEXT: por %xmm1, %xmm2
|
|
; SSE-NEXT: movdqa {{.*#+}} xmm1 = [4294934528,4294934528,4294934528,4294934528]
|
|
; SSE-NEXT: movdqa %xmm2, %xmm0
|
|
; SSE-NEXT: pcmpgtd %xmm1, %xmm0
|
|
; SSE-NEXT: pand %xmm0, %xmm2
|
|
; SSE-NEXT: pandn %xmm1, %xmm0
|
|
; SSE-NEXT: por %xmm2, %xmm0
|
|
; SSE-NEXT: retl
|
|
%1 = sext <4 x i16> %a0 to <4 x i32>
|
|
%2 = sext <4 x i16> %a1 to <4 x i32>
|
|
%3 = add nsw <4 x i32> %1, %2
|
|
%4 = icmp slt <4 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <4 x i1> %4, <4 x i32> %3, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <4 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <4 x i1> %6, <4 x i32> %5, <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <4 x i32> %7 to <4 x i16>
|
|
ret <4 x i16> %8
|
|
}
|
|
|
|
|
|
define <8 x i8> @test_x86_sse2_paddus_b_64(<8 x i8> %a0, <8 x i8> %a1) {
|
|
; AVX512BW-LABEL: test_x86_sse2_paddus_b_64:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
|
|
; AVX512BW-NEXT: vpand %xmm2, %xmm0, %xmm0
|
|
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
|
; AVX512BW-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
|
; AVX512BW-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; SSE-LABEL: test_x86_sse2_paddus_b_64:
|
|
; SSE: ## %bb.0:
|
|
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
|
|
; SSE-NEXT: pand %xmm2, %xmm0
|
|
; SSE-NEXT: pand %xmm2, %xmm1
|
|
; SSE-NEXT: paddw %xmm1, %xmm0
|
|
; SSE-NEXT: pminsw LCPI146_0, %xmm0
|
|
; SSE-NEXT: retl
|
|
%1 = zext <8 x i8> %a0 to <8 x i16>
|
|
%2 = zext <8 x i8> %a1 to <8 x i16>
|
|
%3 = add nsw <8 x i16> %1, %2
|
|
%4 = icmp ult <8 x i16> %3, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
|
|
%5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
|
|
%6 = trunc <8 x i16> %5 to <8 x i8>
|
|
ret <8 x i8> %6
|
|
}
|
|
|
|
|
|
define <4 x i16> @test_x86_sse2_paddus_w_64(<4 x i16> %a0, <4 x i16> %a1) {
|
|
; AVX512BW-LABEL: test_x86_sse2_paddus_w_64:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
|
; AVX512BW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
|
|
; AVX512BW-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
|
|
; AVX512BW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
|
; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [65535,65535,65535,65535]
|
|
; AVX512BW-NEXT: vpminud %xmm1, %xmm0, %xmm0
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; SSE-LABEL: test_x86_sse2_paddus_w_64:
|
|
; SSE: ## %bb.0:
|
|
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [65535,0,65535,0,65535,0,65535,0]
|
|
; SSE-NEXT: pand %xmm2, %xmm0
|
|
; SSE-NEXT: pand %xmm2, %xmm1
|
|
; SSE-NEXT: paddd %xmm0, %xmm1
|
|
; SSE-NEXT: movdqa %xmm2, %xmm0
|
|
; SSE-NEXT: pcmpgtd %xmm1, %xmm0
|
|
; SSE-NEXT: pand %xmm0, %xmm1
|
|
; SSE-NEXT: pandn %xmm2, %xmm0
|
|
; SSE-NEXT: por %xmm1, %xmm0
|
|
; SSE-NEXT: retl
|
|
%1 = zext <4 x i16> %a0 to <4 x i32>
|
|
%2 = zext <4 x i16> %a1 to <4 x i32>
|
|
%3 = add nsw <4 x i32> %1, %2
|
|
%4 = icmp ult <4 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%5 = select <4 x i1> %4, <4 x i32> %3, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%6 = trunc <4 x i32> %5 to <4 x i16>
|
|
ret <4 x i16> %6
|
|
}
|
|
|
|
define <8 x i8> @test_x86_sse2_psubs_b_64(<8 x i8> %a0, <8 x i8> %a1) {
|
|
; AVX512BW-LABEL: test_x86_sse2_psubs_b_64:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: vpsllw $8, %xmm0, %xmm0
|
|
; AVX512BW-NEXT: vpsraw $8, %xmm0, %xmm0
|
|
; AVX512BW-NEXT: vpsllw $8, %xmm1, %xmm1
|
|
; AVX512BW-NEXT: vpsraw $8, %xmm1, %xmm1
|
|
; AVX512BW-NEXT: vpsubw %xmm1, %xmm0, %xmm0
|
|
; AVX512BW-NEXT: vpminsw {{.*}}(%rip), %xmm0, %xmm0
|
|
; AVX512BW-NEXT: vpmaxsw {{.*}}(%rip), %xmm0, %xmm0
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; SSE-LABEL: test_x86_sse2_psubs_b_64:
|
|
; SSE: ## %bb.0:
|
|
; SSE-NEXT: psllw $8, %xmm0
|
|
; SSE-NEXT: psraw $8, %xmm0
|
|
; SSE-NEXT: psllw $8, %xmm1
|
|
; SSE-NEXT: psraw $8, %xmm1
|
|
; SSE-NEXT: psubw %xmm1, %xmm0
|
|
; SSE-NEXT: pminsw LCPI148_0, %xmm0
|
|
; SSE-NEXT: pmaxsw LCPI148_1, %xmm0
|
|
; SSE-NEXT: retl
|
|
%1 = sext <8 x i8> %a0 to <8 x i16>
|
|
%2 = sext <8 x i8> %a1 to <8 x i16>
|
|
%3 = sub nsw <8 x i16> %1, %2
|
|
%4 = icmp slt <8 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
|
%6 = icmp sgt <8 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%7 = select <8 x i1> %6, <8 x i16> %5, <8 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
|
%8 = trunc <8 x i16> %7 to <8 x i8>
|
|
ret <8 x i8> %8
|
|
}
|
|
|
|
|
|
define <4 x i16> @test_x86_sse2_psubs_w_64(<4 x i16> %a0, <4 x i16> %a1) {
|
|
; AVX512BW-LABEL: test_x86_sse2_psubs_w_64:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: vpslld $16, %xmm0, %xmm0
|
|
; AVX512BW-NEXT: vpsrad $16, %xmm0, %xmm0
|
|
; AVX512BW-NEXT: vpslld $16, %xmm1, %xmm1
|
|
; AVX512BW-NEXT: vpsrad $16, %xmm1, %xmm1
|
|
; AVX512BW-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
|
; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [32767,32767,32767,32767]
|
|
; AVX512BW-NEXT: vpminsd %xmm1, %xmm0, %xmm0
|
|
; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294934528,4294934528,4294934528,4294934528]
|
|
; AVX512BW-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; SSE-LABEL: test_x86_sse2_psubs_w_64:
|
|
; SSE: ## %bb.0:
|
|
; SSE-NEXT: pslld $16, %xmm0
|
|
; SSE-NEXT: psrad $16, %xmm0
|
|
; SSE-NEXT: pslld $16, %xmm1
|
|
; SSE-NEXT: psrad $16, %xmm1
|
|
; SSE-NEXT: psubd %xmm1, %xmm0
|
|
; SSE-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767]
|
|
; SSE-NEXT: movdqa %xmm1, %xmm2
|
|
; SSE-NEXT: pcmpgtd %xmm0, %xmm2
|
|
; SSE-NEXT: pand %xmm2, %xmm0
|
|
; SSE-NEXT: pandn %xmm1, %xmm2
|
|
; SSE-NEXT: por %xmm0, %xmm2
|
|
; SSE-NEXT: movdqa {{.*#+}} xmm1 = [4294934528,4294934528,4294934528,4294934528]
|
|
; SSE-NEXT: movdqa %xmm2, %xmm0
|
|
; SSE-NEXT: pcmpgtd %xmm1, %xmm0
|
|
; SSE-NEXT: pand %xmm0, %xmm2
|
|
; SSE-NEXT: pandn %xmm1, %xmm0
|
|
; SSE-NEXT: por %xmm2, %xmm0
|
|
; SSE-NEXT: retl
|
|
%1 = sext <4 x i16> %a0 to <4 x i32>
|
|
%2 = sext <4 x i16> %a1 to <4 x i32>
|
|
%3 = sub nsw <4 x i32> %1, %2
|
|
%4 = icmp slt <4 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%5 = select <4 x i1> %4, <4 x i32> %3, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>
|
|
%6 = icmp sgt <4 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%7 = select <4 x i1> %6, <4 x i32> %5, <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
|
%8 = trunc <4 x i32> %7 to <4 x i16>
|
|
ret <4 x i16> %8
|
|
}
|
|
|
|
|
|
define <8 x i8> @test_x86_sse2_psubus_b_64(<8 x i8> %a0, <8 x i8> %a1) {
|
|
; AVX512BW-LABEL: test_x86_sse2_psubus_b_64:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
|
|
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
|
|
; AVX512BW-NEXT: vpand %xmm2, %xmm0, %xmm0
|
|
; AVX512BW-NEXT: vpmaxuw %xmm3, %xmm0, %xmm0
|
|
; AVX512BW-NEXT: vpsubw %xmm1, %xmm0, %xmm0
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; SSE-LABEL: test_x86_sse2_psubus_b_64:
|
|
; SSE: ## %bb.0:
|
|
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
|
|
; SSE-NEXT: movdqa %xmm1, %xmm3
|
|
; SSE-NEXT: pand %xmm2, %xmm3
|
|
; SSE-NEXT: pand %xmm2, %xmm0
|
|
; SSE-NEXT: pmaxsw %xmm3, %xmm0
|
|
; SSE-NEXT: psubw %xmm1, %xmm0
|
|
; SSE-NEXT: retl
|
|
%cmp = icmp ugt <8 x i8> %a0, %a1
|
|
%sel = select <8 x i1> %cmp, <8 x i8> %a0, <8 x i8> %a1
|
|
%sub = sub <8 x i8> %sel, %a1
|
|
ret <8 x i8> %sub
|
|
}
|
|
|
|
|
|
define <4 x i16> @test_x86_sse2_psubus_w_64(<4 x i16> %a0, <4 x i16> %a1) {
|
|
; AVX512BW-LABEL: test_x86_sse2_psubus_w_64:
|
|
; AVX512BW: ## %bb.0:
|
|
; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
|
; AVX512BW-NEXT: vpblendw {{.*#+}} xmm3 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
|
|
; AVX512BW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
|
|
; AVX512BW-NEXT: vpmaxud %xmm3, %xmm0, %xmm0
|
|
; AVX512BW-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
|
; AVX512BW-NEXT: retq
|
|
;
|
|
; SSE-LABEL: test_x86_sse2_psubus_w_64:
|
|
; SSE: ## %bb.0:
|
|
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [65535,0,65535,0,65535,0,65535,0]
|
|
; SSE-NEXT: movdqa %xmm1, %xmm3
|
|
; SSE-NEXT: pand %xmm2, %xmm3
|
|
; SSE-NEXT: pand %xmm2, %xmm0
|
|
; SSE-NEXT: movdqa %xmm0, %xmm2
|
|
; SSE-NEXT: pcmpgtd %xmm3, %xmm2
|
|
; SSE-NEXT: pand %xmm2, %xmm0
|
|
; SSE-NEXT: pandn %xmm3, %xmm2
|
|
; SSE-NEXT: por %xmm0, %xmm2
|
|
; SSE-NEXT: psubd %xmm1, %xmm2
|
|
; SSE-NEXT: movdqa %xmm2, %xmm0
|
|
; SSE-NEXT: retl
|
|
%cmp = icmp ugt <4 x i16> %a0, %a1
|
|
%sel = select <4 x i1> %cmp, <4 x i16> %a0, <4 x i16> %a1
|
|
%sub = sub <4 x i16> %sel, %a1
|
|
ret <4 x i16> %sub
|
|
}
|