292 lines
9.9 KiB
LLVM
292 lines
9.9 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -vector-combine -S -mtriple=x86_64-- | FileCheck %s
|
|
|
|
; Eliminating extract is profitable.
|
|
|
|
define i8 @ext0_ext0_add(<16 x i8> %x, <16 x i8> %y) {
|
|
; CHECK-LABEL: @ext0_ext0_add(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = add <16 x i8> [[X:%.*]], [[Y:%.*]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x i8> [[TMP1]], i32 0
|
|
; CHECK-NEXT: ret i8 [[TMP2]]
|
|
;
|
|
%e0 = extractelement <16 x i8> %x, i32 0
|
|
%e1 = extractelement <16 x i8> %y, i32 0
|
|
%r = add i8 %e0, %e1
|
|
ret i8 %r
|
|
}
|
|
|
|
; Eliminating extract is still profitable. Flags propagate.
|
|
|
|
define i8 @ext1_ext1_add_flags(<16 x i8> %x, <16 x i8> %y) {
|
|
; CHECK-LABEL: @ext1_ext1_add_flags(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw <16 x i8> [[X:%.*]], [[Y:%.*]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x i8> [[TMP1]], i32 1
|
|
; CHECK-NEXT: ret i8 [[TMP2]]
|
|
;
|
|
%e0 = extractelement <16 x i8> %x, i32 1
|
|
%e1 = extractelement <16 x i8> %y, i32 1
|
|
%r = add nsw nuw i8 %e0, %e1
|
|
ret i8 %r
|
|
}
|
|
|
|
; Negative test - eliminating extract is profitable, but vector shift is expensive.
|
|
|
|
define i8 @ext1_ext1_shl(<16 x i8> %x, <16 x i8> %y) {
|
|
; CHECK-LABEL: @ext1_ext1_shl(
|
|
; CHECK-NEXT: [[E0:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 1
|
|
; CHECK-NEXT: [[E1:%.*]] = extractelement <16 x i8> [[Y:%.*]], i32 1
|
|
; CHECK-NEXT: [[R:%.*]] = shl i8 [[E0]], [[E1]]
|
|
; CHECK-NEXT: ret i8 [[R]]
|
|
;
|
|
%e0 = extractelement <16 x i8> %x, i32 1
|
|
%e1 = extractelement <16 x i8> %y, i32 1
|
|
%r = shl i8 %e0, %e1
|
|
ret i8 %r
|
|
}
|
|
|
|
; Negative test - eliminating extract is profitable, but vector multiply is expensive.
|
|
|
|
define i8 @ext13_ext13_mul(<16 x i8> %x, <16 x i8> %y) {
|
|
; CHECK-LABEL: @ext13_ext13_mul(
|
|
; CHECK-NEXT: [[E0:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 13
|
|
; CHECK-NEXT: [[E1:%.*]] = extractelement <16 x i8> [[Y:%.*]], i32 13
|
|
; CHECK-NEXT: [[R:%.*]] = mul i8 [[E0]], [[E1]]
|
|
; CHECK-NEXT: ret i8 [[R]]
|
|
;
|
|
%e0 = extractelement <16 x i8> %x, i32 13
|
|
%e1 = extractelement <16 x i8> %y, i32 13
|
|
%r = mul i8 %e0, %e1
|
|
ret i8 %r
|
|
}
|
|
|
|
; Negative test - cost is irrelevant because sdiv has potential UB.
|
|
|
|
define i8 @ext0_ext0_sdiv(<16 x i8> %x, <16 x i8> %y) {
|
|
; CHECK-LABEL: @ext0_ext0_sdiv(
|
|
; CHECK-NEXT: [[E0:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 0
|
|
; CHECK-NEXT: [[E1:%.*]] = extractelement <16 x i8> [[Y:%.*]], i32 0
|
|
; CHECK-NEXT: [[R:%.*]] = sdiv i8 [[E0]], [[E1]]
|
|
; CHECK-NEXT: ret i8 [[R]]
|
|
;
|
|
%e0 = extractelement <16 x i8> %x, i32 0
|
|
%e1 = extractelement <16 x i8> %y, i32 0
|
|
%r = sdiv i8 %e0, %e1
|
|
ret i8 %r
|
|
}
|
|
|
|
; Extracts are free and vector op has same cost as scalar, but we
|
|
; speculatively transform to vector to create more optimization
|
|
; opportunities..
|
|
|
|
define double @ext0_ext0_fadd(<2 x double> %x, <2 x double> %y) {
|
|
; CHECK-LABEL: @ext0_ext0_fadd(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> [[X:%.*]], [[Y:%.*]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
|
|
; CHECK-NEXT: ret double [[TMP2]]
|
|
;
|
|
%e0 = extractelement <2 x double> %x, i32 0
|
|
%e1 = extractelement <2 x double> %y, i32 0
|
|
%r = fadd double %e0, %e1
|
|
ret double %r
|
|
}
|
|
|
|
; Eliminating extract is profitable. Flags propagate.
|
|
|
|
define double @ext1_ext1_fsub(<2 x double> %x, <2 x double> %y) {
|
|
; CHECK-LABEL: @ext1_ext1_fsub(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = fsub fast <2 x double> [[X:%.*]], [[Y:%.*]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
|
|
; CHECK-NEXT: ret double [[TMP2]]
|
|
;
|
|
%e0 = extractelement <2 x double> %x, i32 1
|
|
%e1 = extractelement <2 x double> %y, i32 1
|
|
%r = fsub fast double %e0, %e1
|
|
ret double %r
|
|
}
|
|
|
|
; Negative test - type mismatch.
|
|
|
|
define double @ext1_ext1_fadd_different_types(<2 x double> %x, <4 x double> %y) {
|
|
; CHECK-LABEL: @ext1_ext1_fadd_different_types(
|
|
; CHECK-NEXT: [[E0:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1
|
|
; CHECK-NEXT: [[E1:%.*]] = extractelement <4 x double> [[Y:%.*]], i32 1
|
|
; CHECK-NEXT: [[R:%.*]] = fadd fast double [[E0]], [[E1]]
|
|
; CHECK-NEXT: ret double [[R]]
|
|
;
|
|
%e0 = extractelement <2 x double> %x, i32 1
|
|
%e1 = extractelement <4 x double> %y, i32 1
|
|
%r = fadd fast double %e0, %e1
|
|
ret double %r
|
|
}
|
|
|
|
; Disguised same vector operand; scalar code is not cheaper (with default
|
|
; x86 target), so aggressively form vector binop.
|
|
|
|
define i32 @ext1_ext1_add_same_vec(<4 x i32> %x) {
|
|
; CHECK-LABEL: @ext1_ext1_add_same_vec(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[X:%.*]], [[X]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1
|
|
; CHECK-NEXT: ret i32 [[TMP2]]
|
|
;
|
|
%e0 = extractelement <4 x i32> %x, i32 1
|
|
%e1 = extractelement <4 x i32> %x, i32 1
|
|
%r = add i32 %e0, %e1
|
|
ret i32 %r
|
|
}
|
|
|
|
; Functionally equivalent to above test; should transform as above.
|
|
|
|
define i32 @ext1_ext1_add_same_vec_cse(<4 x i32> %x) {
|
|
; CHECK-LABEL: @ext1_ext1_add_same_vec_cse(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[X:%.*]], [[X]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1
|
|
; CHECK-NEXT: ret i32 [[TMP2]]
|
|
;
|
|
%e0 = extractelement <4 x i32> %x, i32 1
|
|
%r = add i32 %e0, %e0
|
|
ret i32 %r
|
|
}
|
|
|
|
; Don't assert if extract indices have different types.
|
|
|
|
define i32 @ext1_ext1_add_same_vec_diff_idx_ty(<4 x i32> %x) {
|
|
; CHECK-LABEL: @ext1_ext1_add_same_vec_diff_idx_ty(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[X:%.*]], [[X]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1
|
|
; CHECK-NEXT: ret i32 [[TMP2]]
|
|
;
|
|
%e0 = extractelement <4 x i32> %x, i32 1
|
|
%e1 = extractelement <4 x i32> %x, i64 1
|
|
%r = add i32 %e0, %e1
|
|
ret i32 %r
|
|
}
|
|
|
|
declare void @use_i8(i8)
|
|
|
|
; Negative test - same vector operand; scalar code is cheaper than general case
|
|
; and vector code would be more expensive still.
|
|
|
|
define i8 @ext1_ext1_add_same_vec_extra_use0(<16 x i8> %x) {
|
|
; CHECK-LABEL: @ext1_ext1_add_same_vec_extra_use0(
|
|
; CHECK-NEXT: [[E0:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 0
|
|
; CHECK-NEXT: call void @use_i8(i8 [[E0]])
|
|
; CHECK-NEXT: [[E1:%.*]] = extractelement <16 x i8> [[X]], i32 0
|
|
; CHECK-NEXT: [[R:%.*]] = add i8 [[E0]], [[E1]]
|
|
; CHECK-NEXT: ret i8 [[R]]
|
|
;
|
|
%e0 = extractelement <16 x i8> %x, i32 0
|
|
call void @use_i8(i8 %e0)
|
|
%e1 = extractelement <16 x i8> %x, i32 0
|
|
%r = add i8 %e0, %e1
|
|
ret i8 %r
|
|
}
|
|
|
|
; Negative test - same vector operand; scalar code is cheaper than general case
|
|
; and vector code would be more expensive still.
|
|
|
|
define i8 @ext1_ext1_add_same_vec_extra_use1(<16 x i8> %x) {
|
|
; CHECK-LABEL: @ext1_ext1_add_same_vec_extra_use1(
|
|
; CHECK-NEXT: [[E0:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 0
|
|
; CHECK-NEXT: [[E1:%.*]] = extractelement <16 x i8> [[X]], i32 0
|
|
; CHECK-NEXT: call void @use_i8(i8 [[E1]])
|
|
; CHECK-NEXT: [[R:%.*]] = add i8 [[E0]], [[E1]]
|
|
; CHECK-NEXT: ret i8 [[R]]
|
|
;
|
|
%e0 = extractelement <16 x i8> %x, i32 0
|
|
%e1 = extractelement <16 x i8> %x, i32 0
|
|
call void @use_i8(i8 %e1)
|
|
%r = add i8 %e0, %e1
|
|
ret i8 %r
|
|
}
|
|
|
|
; Negative test - same vector operand; scalar code is cheaper than general case
|
|
; and vector code would be more expensive still.
|
|
|
|
define i8 @ext1_ext1_add_same_vec_cse_extra_use(<16 x i8> %x) {
|
|
; CHECK-LABEL: @ext1_ext1_add_same_vec_cse_extra_use(
|
|
; CHECK-NEXT: [[E:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 0
|
|
; CHECK-NEXT: call void @use_i8(i8 [[E]])
|
|
; CHECK-NEXT: [[R:%.*]] = add i8 [[E]], [[E]]
|
|
; CHECK-NEXT: ret i8 [[R]]
|
|
;
|
|
%e = extractelement <16 x i8> %x, i32 0
|
|
call void @use_i8(i8 %e)
|
|
%r = add i8 %e, %e
|
|
ret i8 %r
|
|
}
|
|
|
|
; Vector code costs the same as scalar, so aggressively form vector op.
|
|
|
|
define i8 @ext1_ext1_add_uses1(<16 x i8> %x, <16 x i8> %y) {
|
|
; CHECK-LABEL: @ext1_ext1_add_uses1(
|
|
; CHECK-NEXT: [[E0:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 0
|
|
; CHECK-NEXT: call void @use_i8(i8 [[E0]])
|
|
; CHECK-NEXT: [[TMP1:%.*]] = add <16 x i8> [[X]], [[Y:%.*]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x i8> [[TMP1]], i32 0
|
|
; CHECK-NEXT: ret i8 [[TMP2]]
|
|
;
|
|
%e0 = extractelement <16 x i8> %x, i32 0
|
|
call void @use_i8(i8 %e0)
|
|
%e1 = extractelement <16 x i8> %y, i32 0
|
|
%r = add i8 %e0, %e1
|
|
ret i8 %r
|
|
}
|
|
|
|
; Vector code costs the same as scalar, so aggressively form vector op.
|
|
|
|
define i8 @ext1_ext1_add_uses2(<16 x i8> %x, <16 x i8> %y) {
|
|
; CHECK-LABEL: @ext1_ext1_add_uses2(
|
|
; CHECK-NEXT: [[E1:%.*]] = extractelement <16 x i8> [[Y:%.*]], i32 0
|
|
; CHECK-NEXT: call void @use_i8(i8 [[E1]])
|
|
; CHECK-NEXT: [[TMP1:%.*]] = add <16 x i8> [[X:%.*]], [[Y]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x i8> [[TMP1]], i32 0
|
|
; CHECK-NEXT: ret i8 [[TMP2]]
|
|
;
|
|
%e0 = extractelement <16 x i8> %x, i32 0
|
|
%e1 = extractelement <16 x i8> %y, i32 0
|
|
call void @use_i8(i8 %e1)
|
|
%r = add i8 %e0, %e1
|
|
ret i8 %r
|
|
}
|
|
|
|
; TODO: Different extract indexes requires a shuffle.
|
|
|
|
define i8 @ext0_ext1_add(<16 x i8> %x, <16 x i8> %y) {
|
|
; CHECK-LABEL: @ext0_ext1_add(
|
|
; CHECK-NEXT: [[E0:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 0
|
|
; CHECK-NEXT: [[E1:%.*]] = extractelement <16 x i8> [[Y:%.*]], i32 1
|
|
; CHECK-NEXT: [[R:%.*]] = add i8 [[E0]], [[E1]]
|
|
; CHECK-NEXT: ret i8 [[R]]
|
|
;
|
|
%e0 = extractelement <16 x i8> %x, i32 0
|
|
%e1 = extractelement <16 x i8> %y, i32 1
|
|
%r = add i8 %e0, %e1
|
|
ret i8 %r
|
|
}
|
|
|
|
define i8 @ext5_ext0_add(<16 x i8> %x, <16 x i8> %y) {
|
|
; CHECK-LABEL: @ext5_ext0_add(
|
|
; CHECK-NEXT: [[E0:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 5
|
|
; CHECK-NEXT: [[E1:%.*]] = extractelement <16 x i8> [[Y:%.*]], i32 0
|
|
; CHECK-NEXT: [[R:%.*]] = sub i8 [[E0]], [[E1]]
|
|
; CHECK-NEXT: ret i8 [[R]]
|
|
;
|
|
%e0 = extractelement <16 x i8> %x, i32 5
|
|
%e1 = extractelement <16 x i8> %y, i32 0
|
|
%r = sub i8 %e0, %e1
|
|
ret i8 %r
|
|
}
|
|
|
|
define i8 @ext5_ext6_add(<16 x i8> %x, <16 x i8> %y) {
|
|
; CHECK-LABEL: @ext5_ext6_add(
|
|
; CHECK-NEXT: [[E0:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 5
|
|
; CHECK-NEXT: [[E1:%.*]] = extractelement <16 x i8> [[Y:%.*]], i32 6
|
|
; CHECK-NEXT: [[R:%.*]] = and i8 [[E0]], [[E1]]
|
|
; CHECK-NEXT: ret i8 [[R]]
|
|
;
|
|
%e0 = extractelement <16 x i8> %x, i32 5
|
|
%e1 = extractelement <16 x i8> %y, i32 6
|
|
%r = and i8 %e0, %e1
|
|
ret i8 %r
|
|
}
|