llvm-project/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll

668 lines
42 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=x86_64-unknown-linux -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,SSE
; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=corei7-avx -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,AVX,AVX1
; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=core-avx2 -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,AVX,AVX2
; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=skx -slp-vectorizer -S -slp-threshold=-100 | FileCheck %s --check-prefixes=CHECK,THRESH
@arr = local_unnamed_addr global [32 x i32] zeroinitializer, align 16
@arr1 = local_unnamed_addr global [32 x float] zeroinitializer, align 16
@arrp = local_unnamed_addr global [32 x i32*] zeroinitializer, align 16
@var = global i32 zeroinitializer, align 8
define i32 @maxi8(i32) {
; CHECK-LABEL: @maxi8(
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr to <8 x i32>*), align 16
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
%3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
%4 = icmp sgt i32 %2, %3
%5 = select i1 %4, i32 %2, i32 %3
%6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
%7 = icmp sgt i32 %5, %6
%8 = select i1 %7, i32 %5, i32 %6
%9 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
%10 = icmp sgt i32 %8, %9
%11 = select i1 %10, i32 %8, i32 %9
%12 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
%13 = icmp sgt i32 %11, %12
%14 = select i1 %13, i32 %11, i32 %12
%15 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
%16 = icmp sgt i32 %14, %15
%17 = select i1 %16, i32 %14, i32 %15
%18 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
%19 = icmp sgt i32 %17, %18
%20 = select i1 %19, i32 %17, i32 %18
%21 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
%22 = icmp sgt i32 %20, %21
%23 = select i1 %22, i32 %20, i32 %21
ret i32 %23
}
define i32 @maxi16(i32) {
; CHECK-LABEL: @maxi16(
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([32 x i32]* @arr to <16 x i32>*), align 16
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
%3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
%4 = icmp sgt i32 %2, %3
%5 = select i1 %4, i32 %2, i32 %3
%6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
%7 = icmp sgt i32 %5, %6
%8 = select i1 %7, i32 %5, i32 %6
%9 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
%10 = icmp sgt i32 %8, %9
%11 = select i1 %10, i32 %8, i32 %9
%12 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
%13 = icmp sgt i32 %11, %12
%14 = select i1 %13, i32 %11, i32 %12
%15 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
%16 = icmp sgt i32 %14, %15
%17 = select i1 %16, i32 %14, i32 %15
%18 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
%19 = icmp sgt i32 %17, %18
%20 = select i1 %19, i32 %17, i32 %18
%21 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
%22 = icmp sgt i32 %20, %21
%23 = select i1 %22, i32 %20, i32 %21
%24 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 8), align 16
%25 = icmp sgt i32 %23, %24
%26 = select i1 %25, i32 %23, i32 %24
%27 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 9), align 4
%28 = icmp sgt i32 %26, %27
%29 = select i1 %28, i32 %26, i32 %27
%30 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 10), align 8
%31 = icmp sgt i32 %29, %30
%32 = select i1 %31, i32 %29, i32 %30
%33 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 11), align 4
%34 = icmp sgt i32 %32, %33
%35 = select i1 %34, i32 %32, i32 %33
%36 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 12), align 16
%37 = icmp sgt i32 %35, %36
%38 = select i1 %37, i32 %35, i32 %36
%39 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 13), align 4
%40 = icmp sgt i32 %38, %39
%41 = select i1 %40, i32 %38, i32 %39
%42 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 14), align 8
%43 = icmp sgt i32 %41, %42
%44 = select i1 %43, i32 %41, i32 %42
%45 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 15), align 4
%46 = icmp sgt i32 %44, %45
%47 = select i1 %46, i32 %44, i32 %45
ret i32 %47
}
define i32 @maxi32(i32) {
; CHECK-LABEL: @maxi32(
; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i32>, <32 x i32>* bitcast ([32 x i32]* @arr to <32 x i32>*), align 16
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
%3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
%4 = icmp sgt i32 %2, %3
%5 = select i1 %4, i32 %2, i32 %3
%6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
%7 = icmp sgt i32 %5, %6
%8 = select i1 %7, i32 %5, i32 %6
%9 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
%10 = icmp sgt i32 %8, %9
%11 = select i1 %10, i32 %8, i32 %9
%12 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
%13 = icmp sgt i32 %11, %12
%14 = select i1 %13, i32 %11, i32 %12
%15 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
%16 = icmp sgt i32 %14, %15
%17 = select i1 %16, i32 %14, i32 %15
%18 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
%19 = icmp sgt i32 %17, %18
%20 = select i1 %19, i32 %17, i32 %18
%21 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
%22 = icmp sgt i32 %20, %21
%23 = select i1 %22, i32 %20, i32 %21
%24 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 8), align 16
%25 = icmp sgt i32 %23, %24
%26 = select i1 %25, i32 %23, i32 %24
%27 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 9), align 4
%28 = icmp sgt i32 %26, %27
%29 = select i1 %28, i32 %26, i32 %27
%30 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 10), align 8
%31 = icmp sgt i32 %29, %30
%32 = select i1 %31, i32 %29, i32 %30
%33 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 11), align 4
%34 = icmp sgt i32 %32, %33
%35 = select i1 %34, i32 %32, i32 %33
%36 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 12), align 16
%37 = icmp sgt i32 %35, %36
%38 = select i1 %37, i32 %35, i32 %36
%39 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 13), align 4
%40 = icmp sgt i32 %38, %39
%41 = select i1 %40, i32 %38, i32 %39
%42 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 14), align 8
%43 = icmp sgt i32 %41, %42
%44 = select i1 %43, i32 %41, i32 %42
%45 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 15), align 4
%46 = icmp sgt i32 %44, %45
%47 = select i1 %46, i32 %44, i32 %45
%48 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 16), align 16
%49 = icmp sgt i32 %47, %48
%50 = select i1 %49, i32 %47, i32 %48
%51 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 17), align 4
%52 = icmp sgt i32 %50, %51
%53 = select i1 %52, i32 %50, i32 %51
%54 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 18), align 8
%55 = icmp sgt i32 %53, %54
%56 = select i1 %55, i32 %53, i32 %54
%57 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 19), align 4
%58 = icmp sgt i32 %56, %57
%59 = select i1 %58, i32 %56, i32 %57
%60 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 20), align 16
%61 = icmp sgt i32 %59, %60
%62 = select i1 %61, i32 %59, i32 %60
%63 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 21), align 4
%64 = icmp sgt i32 %62, %63
%65 = select i1 %64, i32 %62, i32 %63
%66 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 22), align 8
%67 = icmp sgt i32 %65, %66
%68 = select i1 %67, i32 %65, i32 %66
%69 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 23), align 4
%70 = icmp sgt i32 %68, %69
%71 = select i1 %70, i32 %68, i32 %69
%72 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 24), align 16
%73 = icmp sgt i32 %71, %72
%74 = select i1 %73, i32 %71, i32 %72
%75 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 25), align 4
%76 = icmp sgt i32 %74, %75
%77 = select i1 %76, i32 %74, i32 %75
%78 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 26), align 8
%79 = icmp sgt i32 %77, %78
%80 = select i1 %79, i32 %77, i32 %78
%81 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 27), align 4
%82 = icmp sgt i32 %80, %81
%83 = select i1 %82, i32 %80, i32 %81
%84 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 28), align 16
%85 = icmp sgt i32 %83, %84
%86 = select i1 %85, i32 %83, i32 %84
%87 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 29), align 4
%88 = icmp sgt i32 %86, %87
%89 = select i1 %88, i32 %86, i32 %87
%90 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 30), align 8
%91 = icmp sgt i32 %89, %90
%92 = select i1 %91, i32 %89, i32 %90
%93 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 31), align 4
%94 = icmp sgt i32 %92, %93
%95 = select i1 %94, i32 %92, i32 %93
ret i32 %95
}
define float @maxf8(float) {
; CHECK-LABEL: @maxf8(
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([32 x float]* @arr1 to <8 x float>*), align 16
; CHECK-NEXT: [[TMP3:%.*]] = call fast float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> [[TMP2]])
; CHECK-NEXT: ret float [[TMP3]]
;
%2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16
%3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4
%4 = fcmp fast ogt float %2, %3
%5 = select i1 %4, float %2, float %3
%6 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8
%7 = fcmp fast ogt float %5, %6
%8 = select i1 %7, float %5, float %6
%9 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4
%10 = fcmp fast ogt float %8, %9
%11 = select i1 %10, float %8, float %9
%12 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16
%13 = fcmp fast ogt float %11, %12
%14 = select i1 %13, float %11, float %12
%15 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4
%16 = fcmp fast ogt float %14, %15
%17 = select i1 %16, float %14, float %15
%18 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8
%19 = fcmp fast ogt float %17, %18
%20 = select i1 %19, float %17, float %18
%21 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4
%22 = fcmp fast ogt float %20, %21
%23 = select i1 %22, float %20, float %21
ret float %23
}
define float @maxf16(float) {
; CHECK-LABEL: @maxf16(
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x float>, <16 x float>* bitcast ([32 x float]* @arr1 to <16 x float>*), align 16
; CHECK-NEXT: [[TMP3:%.*]] = call fast float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> [[TMP2]])
; CHECK-NEXT: ret float [[TMP3]]
;
%2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16
%3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4
%4 = fcmp fast ogt float %2, %3
%5 = select i1 %4, float %2, float %3
%6 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8
%7 = fcmp fast ogt float %5, %6
%8 = select i1 %7, float %5, float %6
%9 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4
%10 = fcmp fast ogt float %8, %9
%11 = select i1 %10, float %8, float %9
%12 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16
%13 = fcmp fast ogt float %11, %12
%14 = select i1 %13, float %11, float %12
%15 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4
%16 = fcmp fast ogt float %14, %15
%17 = select i1 %16, float %14, float %15
%18 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8
%19 = fcmp fast ogt float %17, %18
%20 = select i1 %19, float %17, float %18
%21 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4
%22 = fcmp fast ogt float %20, %21
%23 = select i1 %22, float %20, float %21
%24 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 8), align 16
%25 = fcmp fast ogt float %23, %24
%26 = select i1 %25, float %23, float %24
%27 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 9), align 4
%28 = fcmp fast ogt float %26, %27
%29 = select i1 %28, float %26, float %27
%30 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 10), align 8
%31 = fcmp fast ogt float %29, %30
%32 = select i1 %31, float %29, float %30
%33 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 11), align 4
%34 = fcmp fast ogt float %32, %33
%35 = select i1 %34, float %32, float %33
%36 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 12), align 16
%37 = fcmp fast ogt float %35, %36
%38 = select i1 %37, float %35, float %36
%39 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 13), align 4
%40 = fcmp fast ogt float %38, %39
%41 = select i1 %40, float %38, float %39
%42 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 14), align 8
%43 = fcmp fast ogt float %41, %42
%44 = select i1 %43, float %41, float %42
%45 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 15), align 4
%46 = fcmp fast ogt float %44, %45
%47 = select i1 %46, float %44, float %45
ret float %47
}
define float @maxf32(float) {
; CHECK-LABEL: @maxf32(
; CHECK-NEXT: [[TMP2:%.*]] = load <32 x float>, <32 x float>* bitcast ([32 x float]* @arr1 to <32 x float>*), align 16
; CHECK-NEXT: [[TMP3:%.*]] = call fast float @llvm.experimental.vector.reduce.fmax.v32f32(<32 x float> [[TMP2]])
; CHECK-NEXT: ret float [[TMP3]]
;
%2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16
%3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4
%4 = fcmp fast ogt float %2, %3
%5 = select i1 %4, float %2, float %3
%6 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8
%7 = fcmp fast ogt float %5, %6
%8 = select i1 %7, float %5, float %6
%9 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4
%10 = fcmp fast ogt float %8, %9
%11 = select i1 %10, float %8, float %9
%12 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16
%13 = fcmp fast ogt float %11, %12
%14 = select i1 %13, float %11, float %12
%15 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4
%16 = fcmp fast ogt float %14, %15
%17 = select i1 %16, float %14, float %15
%18 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8
%19 = fcmp fast ogt float %17, %18
%20 = select i1 %19, float %17, float %18
%21 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4
%22 = fcmp fast ogt float %20, %21
%23 = select i1 %22, float %20, float %21
%24 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 8), align 16
%25 = fcmp fast ogt float %23, %24
%26 = select i1 %25, float %23, float %24
%27 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 9), align 4
%28 = fcmp fast ogt float %26, %27
%29 = select i1 %28, float %26, float %27
%30 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 10), align 8
%31 = fcmp fast ogt float %29, %30
%32 = select i1 %31, float %29, float %30
%33 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 11), align 4
%34 = fcmp fast ogt float %32, %33
%35 = select i1 %34, float %32, float %33
%36 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 12), align 16
%37 = fcmp fast ogt float %35, %36
%38 = select i1 %37, float %35, float %36
%39 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 13), align 4
%40 = fcmp fast ogt float %38, %39
%41 = select i1 %40, float %38, float %39
%42 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 14), align 8
%43 = fcmp fast ogt float %41, %42
%44 = select i1 %43, float %41, float %42
%45 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 15), align 4
%46 = fcmp fast ogt float %44, %45
%47 = select i1 %46, float %44, float %45
%48 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 16), align 16
%49 = fcmp fast ogt float %47, %48
%50 = select i1 %49, float %47, float %48
%51 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 17), align 4
%52 = fcmp fast ogt float %50, %51
%53 = select i1 %52, float %50, float %51
%54 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 18), align 8
%55 = fcmp fast ogt float %53, %54
%56 = select i1 %55, float %53, float %54
%57 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 19), align 4
%58 = fcmp fast ogt float %56, %57
%59 = select i1 %58, float %56, float %57
%60 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 20), align 16
%61 = fcmp fast ogt float %59, %60
%62 = select i1 %61, float %59, float %60
%63 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 21), align 4
%64 = fcmp fast ogt float %62, %63
%65 = select i1 %64, float %62, float %63
%66 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 22), align 8
%67 = fcmp fast ogt float %65, %66
%68 = select i1 %67, float %65, float %66
%69 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 23), align 4
%70 = fcmp fast ogt float %68, %69
%71 = select i1 %70, float %68, float %69
%72 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 24), align 16
%73 = fcmp fast ogt float %71, %72
%74 = select i1 %73, float %71, float %72
%75 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 25), align 4
%76 = fcmp fast ogt float %74, %75
%77 = select i1 %76, float %74, float %75
%78 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 26), align 8
%79 = fcmp fast ogt float %77, %78
%80 = select i1 %79, float %77, float %78
%81 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 27), align 4
%82 = fcmp fast ogt float %80, %81
%83 = select i1 %82, float %80, float %81
%84 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 28), align 16
%85 = fcmp fast ogt float %83, %84
%86 = select i1 %85, float %83, float %84
%87 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 29), align 4
%88 = fcmp fast ogt float %86, %87
%89 = select i1 %88, float %86, float %87
%90 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 30), align 8
%91 = fcmp fast ogt float %89, %90
%92 = select i1 %91, float %89, float %90
%93 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 31), align 4
%94 = fcmp fast ogt float %92, %93
%95 = select i1 %94, float %92, float %93
ret float %95
}
define i32 @maxi8_mutiple_uses(i32) {
; SSE-LABEL: @maxi8_mutiple_uses(
; SSE-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
; SSE-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
; SSE-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
; SSE-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
; SSE-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
; SSE-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]]
; SSE-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 [[TMP6]]
; SSE-NEXT: [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
; SSE-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
; SSE-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]]
; SSE-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
; SSE-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]]
; SSE-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP11]], i32 [[TMP12]]
; SSE-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
; SSE-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
; SSE-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]]
; SSE-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
; SSE-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
; SSE-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]]
; SSE-NEXT: [[TMP21:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
; SSE-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]]
; SSE-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP20]], i32 [[TMP21]]
; SSE-NEXT: [[TMP24:%.*]] = select i1 [[TMP4]], i32 3, i32 4
; SSE-NEXT: store i32 [[TMP24]], i32* @var, align 8
; SSE-NEXT: ret i32 [[TMP23]]
;
; AVX-LABEL: @maxi8_mutiple_uses(
; AVX-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
; AVX-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
; AVX-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
; AVX-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
; AVX-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
; AVX-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
; AVX-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> [[TMP6]])
; AVX-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], [[TMP7]]
; AVX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[TMP7]]
; AVX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], [[TMP5]]
; AVX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 [[TMP5]]
; AVX-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
; AVX-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP12]]
; AVX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[OP_EXTRA]], i32 [[TMP12]]
; AVX-NEXT: [[TMP15:%.*]] = select i1 [[TMP4]], i32 3, i32 4
; AVX-NEXT: store i32 [[TMP15]], i32* @var, align 8
; AVX-NEXT: ret i32 [[TMP14]]
;
; THRESH-LABEL: @maxi8_mutiple_uses(
; THRESH-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([32 x i32]* @arr to <2 x i32>*), align 16
; THRESH-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
; THRESH-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
; THRESH-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
; THRESH-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
; THRESH-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> [[TMP5]])
; THRESH-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> undef, i32 [[TMP7]], i32 0
; THRESH-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> [[TMP8]], i32 [[TMP3]], i32 1
; THRESH-NEXT: [[TMP10:%.*]] = insertelement <2 x i32> undef, i32 [[TMP6]], i32 0
; THRESH-NEXT: [[TMP11:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP4]], i32 1
; THRESH-NEXT: [[TMP12:%.*]] = icmp sgt <2 x i32> [[TMP9]], [[TMP11]]
; THRESH-NEXT: [[TMP13:%.*]] = select <2 x i1> [[TMP12]], <2 x i32> [[TMP9]], <2 x i32> [[TMP11]]
; THRESH-NEXT: [[TMP14:%.*]] = extractelement <2 x i32> [[TMP13]], i32 1
; THRESH-NEXT: [[TMP15:%.*]] = extractelement <2 x i32> [[TMP13]], i32 0
; THRESH-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP15]], [[TMP14]]
; THRESH-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP16]], i32 [[TMP15]], i32 [[TMP14]]
; THRESH-NEXT: [[TMP17:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
; THRESH-NEXT: [[TMP18:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP17]]
; THRESH-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[OP_EXTRA]], i32 [[TMP17]]
; THRESH-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP12]], i32 1
; THRESH-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 3, i32 4
; THRESH-NEXT: store i32 [[TMP21]], i32* @var, align 8
; THRESH-NEXT: ret i32 [[TMP19]]
;
%2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
%3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
%4 = icmp sgt i32 %2, %3
%5 = select i1 %4, i32 %2, i32 %3
%6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
%7 = icmp sgt i32 %5, %6
%8 = select i1 %7, i32 %5, i32 %6
%9 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
%10 = icmp sgt i32 %8, %9
%11 = select i1 %10, i32 %8, i32 %9
%12 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
%13 = icmp sgt i32 %11, %12
%14 = select i1 %13, i32 %11, i32 %12
%15 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
%16 = icmp sgt i32 %14, %15
%17 = select i1 %16, i32 %14, i32 %15
%18 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
%19 = icmp sgt i32 %17, %18
%20 = select i1 %19, i32 %17, i32 %18
%21 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
%22 = icmp sgt i32 %20, %21
%23 = select i1 %22, i32 %20, i32 %21
%24 = select i1 %4, i32 3, i32 4
store i32 %24, i32* @var, align 8
ret i32 %23
}
define i32 @maxi8_wrong_parent(i32) {
; SSE-LABEL: @maxi8_wrong_parent(
; SSE-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
; SSE-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
; SSE-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
; SSE-NEXT: br label [[PP:%.*]]
; SSE: pp:
; SSE-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
; SSE-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
; SSE-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]]
; SSE-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 [[TMP6]]
; SSE-NEXT: [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
; SSE-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
; SSE-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]]
; SSE-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
; SSE-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]]
; SSE-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP11]], i32 [[TMP12]]
; SSE-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
; SSE-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
; SSE-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]]
; SSE-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
; SSE-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
; SSE-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]]
; SSE-NEXT: [[TMP21:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
; SSE-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]]
; SSE-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP20]], i32 [[TMP21]]
; SSE-NEXT: ret i32 [[TMP23]]
;
; AVX-LABEL: @maxi8_wrong_parent(
; AVX-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
; AVX-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
; AVX-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
; AVX-NEXT: br label [[PP:%.*]]
; AVX: pp:
; AVX-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
; AVX-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
; AVX-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
; AVX-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
; AVX-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> [[TMP6]])
; AVX-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
; AVX-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32 [[TMP7]]
; AVX-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]]
; AVX-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 [[TMP8]]
; AVX-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP13]], [[TMP5]]
; AVX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 [[TMP5]]
; AVX-NEXT: ret i32 [[OP_EXTRA]]
;
; THRESH-LABEL: @maxi8_wrong_parent(
; THRESH-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([32 x i32]* @arr to <2 x i32>*), align 16
; THRESH-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
; THRESH-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
; THRESH-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]]
; THRESH-NEXT: br label [[PP:%.*]]
; THRESH: pp:
; THRESH-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
; THRESH-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
; THRESH-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
; THRESH-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> [[TMP6]])
; THRESH-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
; THRESH-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32 [[TMP7]]
; THRESH-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]]
; THRESH-NEXT: [[TMP13:%.*]] = insertelement <2 x i1> undef, i1 [[TMP12]], i32 0
; THRESH-NEXT: [[TMP14:%.*]] = insertelement <2 x i1> [[TMP13]], i1 [[TMP5]], i32 1
; THRESH-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> undef, i32 [[TMP11]], i32 0
; THRESH-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP15]], i32 [[TMP3]], i32 1
; THRESH-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> undef, i32 [[TMP8]], i32 0
; THRESH-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> [[TMP17]], i32 [[TMP4]], i32 1
; THRESH-NEXT: [[TMP19:%.*]] = select <2 x i1> [[TMP14]], <2 x i32> [[TMP16]], <2 x i32> [[TMP18]]
; THRESH-NEXT: [[TMP20:%.*]] = extractelement <2 x i32> [[TMP19]], i32 1
; THRESH-NEXT: [[TMP21:%.*]] = extractelement <2 x i32> [[TMP19]], i32 0
; THRESH-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP21]], [[TMP20]]
; THRESH-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP22]], i32 [[TMP21]], i32 [[TMP20]]
; THRESH-NEXT: ret i32 [[OP_EXTRA]]
;
%2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
%3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
%4 = icmp sgt i32 %2, %3
br label %pp
pp:
%5 = select i1 %4, i32 %2, i32 %3
%6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
%7 = icmp sgt i32 %5, %6
%8 = select i1 %7, i32 %5, i32 %6
%9 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
%10 = icmp sgt i32 %8, %9
%11 = select i1 %10, i32 %8, i32 %9
%12 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
%13 = icmp sgt i32 %11, %12
%14 = select i1 %13, i32 %11, i32 %12
%15 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
%16 = icmp sgt i32 %14, %15
%17 = select i1 %16, i32 %14, i32 %15
%18 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
%19 = icmp sgt i32 %17, %18
%20 = select i1 %19, i32 %17, i32 %18
%21 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
%22 = icmp sgt i32 %20, %21
%23 = select i1 %22, i32 %20, i32 %21
ret i32 %23
}
; PR38191 - We don't handle array-of-pointer reductions.
define i32* @maxp8(i32) {
; DEFAULT-LABEL: @maxp8(
; DEFAULT-NEXT: [[TMP2:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 0), align 16
; DEFAULT-NEXT: [[TMP3:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 1), align 4
; DEFAULT-NEXT: [[TMP4:%.*]] = icmp ugt i32* [[TMP2]], [[TMP3]]
; DEFAULT-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32* [[TMP2]], i32* [[TMP3]]
; DEFAULT-NEXT: [[TMP6:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 2), align 8
; DEFAULT-NEXT: [[TMP7:%.*]] = icmp ugt i32* [[TMP5]], [[TMP6]]
; DEFAULT-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32* [[TMP5]], i32* [[TMP6]]
; DEFAULT-NEXT: [[TMP9:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 3), align 4
; DEFAULT-NEXT: [[TMP10:%.*]] = icmp ugt i32* [[TMP8]], [[TMP9]]
; DEFAULT-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32* [[TMP8]], i32* [[TMP9]]
; DEFAULT-NEXT: [[TMP12:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 4), align 16
; DEFAULT-NEXT: [[TMP13:%.*]] = icmp ugt i32* [[TMP11]], [[TMP12]]
; DEFAULT-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32* [[TMP11]], i32* [[TMP12]]
; DEFAULT-NEXT: [[TMP15:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 5), align 4
; DEFAULT-NEXT: [[TMP16:%.*]] = icmp ugt i32* [[TMP14]], [[TMP15]]
; DEFAULT-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32* [[TMP14]], i32* [[TMP15]]
; DEFAULT-NEXT: [[TMP18:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 6), align 8
; DEFAULT-NEXT: [[TMP19:%.*]] = icmp ugt i32* [[TMP17]], [[TMP18]]
; DEFAULT-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32* [[TMP17]], i32* [[TMP18]]
; DEFAULT-NEXT: [[TMP21:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 7), align 4
; DEFAULT-NEXT: [[TMP22:%.*]] = icmp ugt i32* [[TMP20]], [[TMP21]]
; DEFAULT-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32* [[TMP20]], i32* [[TMP21]]
; DEFAULT-NEXT: ret i32* [[TMP23]]
;
; THRESH-LABEL: @maxp8(
; THRESH-NEXT: [[TMP2:%.*]] = load <2 x i32*>, <2 x i32*>* bitcast ([32 x i32*]* @arrp to <2 x i32*>*), align 16
; THRESH-NEXT: [[TMP3:%.*]] = extractelement <2 x i32*> [[TMP2]], i32 0
; THRESH-NEXT: [[TMP4:%.*]] = extractelement <2 x i32*> [[TMP2]], i32 1
; THRESH-NEXT: [[TMP5:%.*]] = icmp ugt i32* [[TMP3]], [[TMP4]]
; THRESH-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32* [[TMP3]], i32* [[TMP4]]
; THRESH-NEXT: [[TMP7:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 2), align 8
; THRESH-NEXT: [[TMP8:%.*]] = icmp ugt i32* [[TMP6]], [[TMP7]]
; THRESH-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32* [[TMP6]], i32* [[TMP7]]
; THRESH-NEXT: [[TMP10:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 3), align 4
; THRESH-NEXT: [[TMP11:%.*]] = icmp ugt i32* [[TMP9]], [[TMP10]]
; THRESH-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32* [[TMP9]], i32* [[TMP10]]
; THRESH-NEXT: [[TMP13:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 4), align 16
; THRESH-NEXT: [[TMP14:%.*]] = icmp ugt i32* [[TMP12]], [[TMP13]]
; THRESH-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32* [[TMP12]], i32* [[TMP13]]
; THRESH-NEXT: [[TMP16:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 5), align 4
; THRESH-NEXT: [[TMP17:%.*]] = icmp ugt i32* [[TMP15]], [[TMP16]]
; THRESH-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32* [[TMP15]], i32* [[TMP16]]
; THRESH-NEXT: [[TMP19:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 6), align 8
; THRESH-NEXT: [[TMP20:%.*]] = icmp ugt i32* [[TMP18]], [[TMP19]]
; THRESH-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32* [[TMP18]], i32* [[TMP19]]
; THRESH-NEXT: [[TMP22:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 7), align 4
; THRESH-NEXT: [[TMP23:%.*]] = icmp ugt i32* [[TMP21]], [[TMP22]]
; THRESH-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32* [[TMP21]], i32* [[TMP22]]
; THRESH-NEXT: ret i32* [[TMP24]]
;
%2 = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 0), align 16
%3 = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 1), align 4
%4 = icmp ugt i32* %2, %3
%5 = select i1 %4, i32* %2, i32* %3
%6 = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 2), align 8
%7 = icmp ugt i32* %5, %6
%8 = select i1 %7, i32* %5, i32* %6
%9 = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 3), align 4
%10 = icmp ugt i32* %8, %9
%11 = select i1 %10, i32* %8, i32* %9
%12 = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 4), align 16
%13 = icmp ugt i32* %11, %12
%14 = select i1 %13, i32* %11, i32* %12
%15 = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 5), align 4
%16 = icmp ugt i32* %14, %15
%17 = select i1 %16, i32* %14, i32* %15
%18 = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 6), align 8
%19 = icmp ugt i32* %17, %18
%20 = select i1 %19, i32* %17, i32* %18
%21 = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 7), align 4
%22 = icmp ugt i32* %20, %21
%23 = select i1 %22, i32* %20, i32* %21
ret i32* %23
}