Re-trying r344082 because it unintentionally included extra diffs. Original commit message: icmp ne (and X, 1), 0 --> trunc X to N x i1 Ideally, we'd do the same for scalars, but there will likely be regressions unless we add more trunc folds as we're doing here for vectors. The motivating vector case is from PR37549: https://bugs.llvm.org/show_bug.cgi?id=37549 define <4 x float> @bitwise_select(<4 x float> %x, <4 x float> %y, <4 x float> %z, <4 x float> %w) { %c = fcmp ole <4 x float> %x, %y %s = sext <4 x i1> %c to <4 x i32> %s1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1> %s2 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3> %cond = or <4 x i32> %s1, %s2 %condtr = trunc <4 x i32> %cond to <4 x i1> %r = select <4 x i1> %condtr, <4 x float> %z, <4 x float> %w ret <4 x float> %r } Here's a sampling of the vector codegen for that case using mask+icmp (current behavior) vs. trunc (with this patch): AVX before: vcmpleps %xmm1, %xmm0, %xmm0 vpermilps $80, %xmm0, %xmm1 ## xmm1 = xmm0[0,0,1,1] vpermilps $250, %xmm0, %xmm0 ## xmm0 = xmm0[2,2,3,3] vorps %xmm0, %xmm1, %xmm0 vandps LCPI0_0(%rip), %xmm0, %xmm0 vxorps %xmm1, %xmm1, %xmm1 vpcmpeqd %xmm1, %xmm0, %xmm0 vblendvps %xmm0, %xmm3, %xmm2, %xmm0 AVX after: vcmpleps %xmm1, %xmm0, %xmm0 vpermilps $80, %xmm0, %xmm1 ## xmm1 = xmm0[0,0,1,1] vpermilps $250, %xmm0, %xmm0 ## xmm0 = xmm0[2,2,3,3] vorps %xmm0, %xmm1, %xmm0 vblendvps %xmm0, %xmm2, %xmm3, %xmm0 AVX512f before: vcmpleps %xmm1, %xmm0, %xmm0 vpermilps $80, %xmm0, %xmm1 ## xmm1 = xmm0[0,0,1,1] vpermilps $250, %xmm0, %xmm0 ## xmm0 = xmm0[2,2,3,3] vorps %xmm0, %xmm1, %xmm0 vpbroadcastd LCPI0_0(%rip), %xmm1 ## xmm1 = [1,1,1,1] vptestnmd %zmm1, %zmm0, %k1 vblendmps %zmm3, %zmm2, %zmm0 {%k1} AVX512f after: vcmpleps %xmm1, %xmm0, %xmm0 vpermilps $80, %xmm0, %xmm1 ## xmm1 = xmm0[0,0,1,1] vpermilps $250, %xmm0, %xmm0 ## xmm0 = xmm0[2,2,3,3] vorps %xmm0, %xmm1, %xmm0 vpslld $31, %xmm0, %xmm0 vptestmd %zmm0, %zmm0, %k1 vblendmps %zmm2, %zmm3, %zmm0 {%k1} AArch64 before: fcmge v0.4s, v1.4s, v0.4s zip1 v1.4s, v0.4s, v0.4s zip2 v0.4s, v0.4s, v0.4s orr v0.16b, v1.16b, v0.16b movi v1.4s, #1 and v0.16b, v0.16b, v1.16b cmeq v0.4s, v0.4s, #0 bsl v0.16b, v3.16b, v2.16b AArch64 after: fcmge v0.4s, v1.4s, v0.4s zip1 v1.4s, v0.4s, v0.4s zip2 v0.4s, v0.4s, v0.4s orr v0.16b, v1.16b, v0.16b bsl v0.16b, v2.16b, v3.16b PowerPC-le before: xvcmpgesp 34, 35, 34 vspltisw 0, 1 vmrglw 3, 2, 2 vmrghw 2, 2, 2 xxlor 0, 35, 34 xxlxor 35, 35, 35 xxland 34, 0, 32 vcmpequw 2, 2, 3 xxsel 34, 36, 37, 34 PowerPC-le after: xvcmpgesp 34, 35, 34 vmrglw 3, 2, 2 vmrghw 2, 2, 2 xxlor 0, 35, 34 xxsel 34, 37, 36, 0 Differential Revision: https://reviews.llvm.org/D52747 llvm-svn: 344181  | 
			||
|---|---|---|
| .. | ||
| AArch64 | ||
| AMDGPU | ||
| ARM | ||
| Hexagon | ||
| PowerPC | ||
| SystemZ | ||
| X86 | ||
| XCore | ||
| 12-12-11-if-conv.ll | ||
| 2012-10-20-infloop.ll | ||
| 2012-10-22-isconsec.ll | ||
| 2016-07-27-loop-vec.ll | ||
| align.ll | ||
| bsd_regex.ll | ||
| bzip_reverse_loops.ll | ||
| calloc.ll | ||
| cast-induction.ll | ||
| conditional-assignment.ll | ||
| consec_no_gep.ll | ||
| consecutive-ptr-uniforms.ll | ||
| control-flow.ll | ||
| cpp-new-array.ll | ||
| dbg.value.ll | ||
| dead_instructions.ll | ||
| debugloc.ll | ||
| diag-missing-instr-debug-loc.ll | ||
| diag-with-hotness-info-2.ll | ||
| diag-with-hotness-info.ll | ||
| discriminator.ll | ||
| ee-crash.ll | ||
| exact.ll | ||
| explicit_outer_detection.ll | ||
| explicit_outer_nonuniform_inner.ll | ||
| explicit_outer_uniform_diverg_branch.ll | ||
| fcmp-vectorize.ll | ||
| first-order-recurrence.ll | ||
| flags.ll | ||
| float-induction.ll | ||
| float-reduction.ll | ||
| funcall.ll | ||
| gcc-examples.ll | ||
| gep_with_bitcast.ll | ||
| global_alias.ll | ||
| hints-trans.ll | ||
| hoist-loads.ll | ||
| i8-induction.ll | ||
| icmp-uniforms.ll | ||
| if-conv-crash.ll | ||
| if-conversion-edgemasks.ll | ||
| if-conversion-nest.ll | ||
| if-conversion-reduction.ll | ||
| if-conversion.ll | ||
| if-pred-non-void.ll | ||
| if-pred-not-when-safe.ll | ||
| if-pred-stores.ll | ||
| if-reduction.ll | ||
| incorrect-dom-info.ll | ||
| increment.ll | ||
| induction-step.ll | ||
| induction.ll | ||
| induction_plus.ll | ||
| infiniteloop.ll | ||
| int_sideeffect.ll | ||
| interleaved-accesses-1.ll | ||
| interleaved-accesses-2.ll | ||
| interleaved-accesses-3.ll | ||
| interleaved-accesses-alias.ll | ||
| interleaved-accesses-pred-stores.ll | ||
| interleaved-accesses.ll | ||
| interleaved-acess-with-remarks.ll | ||
| intrinsic.ll | ||
| invariant-store-vectorization.ll | ||
| iv_outside_user.ll | ||
| lcssa-crash.ll | ||
| legal_preheader_check.ll | ||
| lifetime.ll | ||
| loop-form.ll | ||
| loop-scalars.ll | ||
| loop-vect-memdep.ll | ||
| memdep.ll | ||
| metadata-unroll.ll | ||
| metadata-width.ll | ||
| metadata.ll | ||
| miniters.ll | ||
| minmax_reduction.ll | ||
| multi-use-reduction-bug.ll | ||
| multiple-address-spaces.ll | ||
| multiple-strides-vectorization.ll | ||
| no_array_bounds.ll | ||
| no_idiv_reduction.ll | ||
| no_int_induction.ll | ||
| no_outside_user.ll | ||
| no_switch.ll | ||
| noalias-md-licm.ll | ||
| noalias-md.ll | ||
| nofloat.ll | ||
| non-const-n.ll | ||
| nontemporal.ll | ||
| nsw-crash.ll | ||
| opt.ll | ||
| optsize.ll | ||
| outer_loop_test1.ll | ||
| outer_loop_test2.ll | ||
| partial-lcssa.ll | ||
| phi-cost.ll | ||
| phi-hang.ll | ||
| pr25281.ll | ||
| pr28541.ll | ||
| pr30654-phiscev-sext-trunc.ll | ||
| pr31098.ll | ||
| pr31190.ll | ||
| pr32859.ll | ||
| pr33706.ll | ||
| pr34681.ll | ||
| pr35743.ll | ||
| pr35773.ll | ||
| pr36311.ll | ||
| pr36983.ll | ||
| pr37248.ll | ||
| pr37515.ll | ||
| pr38800.ll | ||
| pr39099.ll | ||
| preserve-dbg-loc-and-loop-metadata.ll | ||
| ptr-induction.ll | ||
| ptr_loops.ll | ||
| read-only.ll | ||
| reduction-small-size.ll | ||
| reduction.ll | ||
| reverse_induction.ll | ||
| reverse_iter.ll | ||
| runtime-check-address-space.ll | ||
| runtime-check-readonly-address-space.ll | ||
| runtime-check-readonly.ll | ||
| runtime-check.ll | ||
| runtime-limit.ll | ||
| safegep.ll | ||
| same-base-access.ll | ||
| scalar-select.ll | ||
| scalar_after_vectorization.ll | ||
| scev-exitlim-crash.ll | ||
| simple-unroll.ll | ||
| skip-iterations.ll | ||
| small-loop.ll | ||
| start-non-zero.ll | ||
| store-shuffle-bug.ll | ||
| struct_access.ll | ||
| tbaa-nodep.ll | ||
| tripcount.ll | ||
| undef-inst-bug.ll | ||
| unroll-novec-memcheck-metadata.ll | ||
| unroll.ll | ||
| unroll_novec.ll | ||
| unsafe-dep-remark.ll | ||
| unsized-pointee-crash.ll | ||
| value-ptr-bug.ll | ||
| vect-phiscev-sext-trunc.ll | ||
| vect.omp.persistence.ll | ||
| vect.stats.ll | ||
| vector-geps.ll | ||
| vectorize-once.ll | ||
| version-mem-access.ll | ||
| vplan_hcfg_stress_test.ll | ||
| write-only.ll | ||
| zero-sized-pointee-crash.ll | ||