Files
clang-p2996/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
Alexey Bataev f4fb854811 [SLP]Do not consider deleted instruction as external users.
If the instruction was previously deleted, it should not be treated as
an external user. This fixes cost estimation and removes dead
extractelement instructions.

Differential Revision: https://reviews.llvm.org/D107106
2021-07-30 05:37:43 -07:00

1363 lines
93 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=x86_64-unknown-linux -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,SSE
; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=corei7-avx -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,AVX
; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=core-avx2 -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,AVX2
; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=skx -slp-vectorizer -S -slp-threshold=-100 | FileCheck %s --check-prefixes=CHECK,THRESH
@arr = local_unnamed_addr global [32 x i32] zeroinitializer, align 16
@arr1 = local_unnamed_addr global [32 x float] zeroinitializer, align 16
@arrp = local_unnamed_addr global [32 x i32*] zeroinitializer, align 16
@var = global i32 zeroinitializer, align 8
declare i32 @llvm.smax.i32(i32, i32)
declare i16 @llvm.smin.i16(i16, i16)
declare i64 @llvm.umax.i64(i64, i64)
declare i8 @llvm.umin.i8(i8, i8)
declare i32 @llvm.smin.i32(i32, i32)
declare i32 @llvm.umin.i32(i32, i32)
define i32 @maxi8(i32) {
; CHECK-LABEL: @maxi8(
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr to <8 x i32>*), align 16
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
%3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
%4 = icmp sgt i32 %2, %3
%5 = select i1 %4, i32 %2, i32 %3
%6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
%7 = icmp sgt i32 %5, %6
%8 = select i1 %7, i32 %5, i32 %6
%9 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
%10 = icmp sgt i32 %8, %9
%11 = select i1 %10, i32 %8, i32 %9
%12 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
%13 = icmp sgt i32 %11, %12
%14 = select i1 %13, i32 %11, i32 %12
%15 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
%16 = icmp sgt i32 %14, %15
%17 = select i1 %16, i32 %14, i32 %15
%18 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
%19 = icmp sgt i32 %17, %18
%20 = select i1 %19, i32 %17, i32 %18
%21 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
%22 = icmp sgt i32 %20, %21
%23 = select i1 %22, i32 %20, i32 %21
ret i32 %23
}
define i32 @maxi8_store_in(i32) {
; CHECK-LABEL: @maxi8_store_in(
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr to <8 x i32>*), align 16
; CHECK-NEXT: store i32 0, i32* @var, align 8
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
%3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
%4 = icmp sgt i32 %2, %3
%5 = select i1 %4, i32 %2, i32 %3
%6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
%7 = icmp sgt i32 %5, %6
%8 = select i1 %7, i32 %5, i32 %6
%9 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
%10 = icmp sgt i32 %8, %9
%11 = select i1 %10, i32 %8, i32 %9
%12 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
%13 = icmp sgt i32 %11, %12
%14 = select i1 %13, i32 %11, i32 %12
store i32 0, i32* @var, align 8
%15 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
%16 = icmp sgt i32 %14, %15
%17 = select i1 %16, i32 %14, i32 %15
%18 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
%19 = icmp sgt i32 %17, %18
%20 = select i1 %19, i32 %17, i32 %18
%21 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
%22 = icmp sgt i32 %20, %21
%23 = select i1 %22, i32 %20, i32 %21
ret i32 %23
}
define i32 @maxi16(i32) {
; CHECK-LABEL: @maxi16(
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([32 x i32]* @arr to <16 x i32>*), align 16
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
%3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
%4 = icmp sgt i32 %2, %3
%5 = select i1 %4, i32 %2, i32 %3
%6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
%7 = icmp sgt i32 %5, %6
%8 = select i1 %7, i32 %5, i32 %6
%9 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
%10 = icmp sgt i32 %8, %9
%11 = select i1 %10, i32 %8, i32 %9
%12 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
%13 = icmp sgt i32 %11, %12
%14 = select i1 %13, i32 %11, i32 %12
%15 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
%16 = icmp sgt i32 %14, %15
%17 = select i1 %16, i32 %14, i32 %15
%18 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
%19 = icmp sgt i32 %17, %18
%20 = select i1 %19, i32 %17, i32 %18
%21 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
%22 = icmp sgt i32 %20, %21
%23 = select i1 %22, i32 %20, i32 %21
%24 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 8), align 16
%25 = icmp sgt i32 %23, %24
%26 = select i1 %25, i32 %23, i32 %24
%27 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 9), align 4
%28 = icmp sgt i32 %26, %27
%29 = select i1 %28, i32 %26, i32 %27
%30 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 10), align 8
%31 = icmp sgt i32 %29, %30
%32 = select i1 %31, i32 %29, i32 %30
%33 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 11), align 4
%34 = icmp sgt i32 %32, %33
%35 = select i1 %34, i32 %32, i32 %33
%36 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 12), align 16
%37 = icmp sgt i32 %35, %36
%38 = select i1 %37, i32 %35, i32 %36
%39 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 13), align 4
%40 = icmp sgt i32 %38, %39
%41 = select i1 %40, i32 %38, i32 %39
%42 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 14), align 8
%43 = icmp sgt i32 %41, %42
%44 = select i1 %43, i32 %41, i32 %42
%45 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 15), align 4
%46 = icmp sgt i32 %44, %45
%47 = select i1 %46, i32 %44, i32 %45
ret i32 %47
}
define i32 @maxi32(i32) {
; CHECK-LABEL: @maxi32(
; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i32>, <32 x i32>* bitcast ([32 x i32]* @arr to <32 x i32>*), align 16
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
%3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
%4 = icmp sgt i32 %2, %3
%5 = select i1 %4, i32 %2, i32 %3
%6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
%7 = icmp sgt i32 %5, %6
%8 = select i1 %7, i32 %5, i32 %6
%9 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
%10 = icmp sgt i32 %8, %9
%11 = select i1 %10, i32 %8, i32 %9
%12 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
%13 = icmp sgt i32 %11, %12
%14 = select i1 %13, i32 %11, i32 %12
%15 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
%16 = icmp sgt i32 %14, %15
%17 = select i1 %16, i32 %14, i32 %15
%18 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
%19 = icmp sgt i32 %17, %18
%20 = select i1 %19, i32 %17, i32 %18
%21 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
%22 = icmp sgt i32 %20, %21
%23 = select i1 %22, i32 %20, i32 %21
%24 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 8), align 16
%25 = icmp sgt i32 %23, %24
%26 = select i1 %25, i32 %23, i32 %24
%27 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 9), align 4
%28 = icmp sgt i32 %26, %27
%29 = select i1 %28, i32 %26, i32 %27
%30 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 10), align 8
%31 = icmp sgt i32 %29, %30
%32 = select i1 %31, i32 %29, i32 %30
%33 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 11), align 4
%34 = icmp sgt i32 %32, %33
%35 = select i1 %34, i32 %32, i32 %33
%36 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 12), align 16
%37 = icmp sgt i32 %35, %36
%38 = select i1 %37, i32 %35, i32 %36
%39 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 13), align 4
%40 = icmp sgt i32 %38, %39
%41 = select i1 %40, i32 %38, i32 %39
%42 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 14), align 8
%43 = icmp sgt i32 %41, %42
%44 = select i1 %43, i32 %41, i32 %42
%45 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 15), align 4
%46 = icmp sgt i32 %44, %45
%47 = select i1 %46, i32 %44, i32 %45
%48 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 16), align 16
%49 = icmp sgt i32 %47, %48
%50 = select i1 %49, i32 %47, i32 %48
%51 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 17), align 4
%52 = icmp sgt i32 %50, %51
%53 = select i1 %52, i32 %50, i32 %51
%54 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 18), align 8
%55 = icmp sgt i32 %53, %54
%56 = select i1 %55, i32 %53, i32 %54
%57 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 19), align 4
%58 = icmp sgt i32 %56, %57
%59 = select i1 %58, i32 %56, i32 %57
%60 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 20), align 16
%61 = icmp sgt i32 %59, %60
%62 = select i1 %61, i32 %59, i32 %60
%63 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 21), align 4
%64 = icmp sgt i32 %62, %63
%65 = select i1 %64, i32 %62, i32 %63
%66 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 22), align 8
%67 = icmp sgt i32 %65, %66
%68 = select i1 %67, i32 %65, i32 %66
%69 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 23), align 4
%70 = icmp sgt i32 %68, %69
%71 = select i1 %70, i32 %68, i32 %69
%72 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 24), align 16
%73 = icmp sgt i32 %71, %72
%74 = select i1 %73, i32 %71, i32 %72
%75 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 25), align 4
%76 = icmp sgt i32 %74, %75
%77 = select i1 %76, i32 %74, i32 %75
%78 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 26), align 8
%79 = icmp sgt i32 %77, %78
%80 = select i1 %79, i32 %77, i32 %78
%81 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 27), align 4
%82 = icmp sgt i32 %80, %81
%83 = select i1 %82, i32 %80, i32 %81
%84 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 28), align 16
%85 = icmp sgt i32 %83, %84
%86 = select i1 %85, i32 %83, i32 %84
%87 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 29), align 4
%88 = icmp sgt i32 %86, %87
%89 = select i1 %88, i32 %86, i32 %87
%90 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 30), align 8
%91 = icmp sgt i32 %89, %90
%92 = select i1 %91, i32 %89, i32 %90
%93 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 31), align 4
%94 = icmp sgt i32 %92, %93
%95 = select i1 %94, i32 %92, i32 %93
ret i32 %95
}
; Note: legacy test - InstCombine creates maxnum intrinsics for fcmp+select with fastmath on the select.
define float @maxf8(float) {
; DEFAULT-LABEL: @maxf8(
; DEFAULT-NEXT: [[TMP2:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16
; DEFAULT-NEXT: [[TMP3:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4
; DEFAULT-NEXT: [[TMP4:%.*]] = fcmp fast ogt float [[TMP2]], [[TMP3]]
; DEFAULT-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], float [[TMP2]], float [[TMP3]]
; DEFAULT-NEXT: [[TMP6:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8
; DEFAULT-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP5]], [[TMP6]]
; DEFAULT-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP5]], float [[TMP6]]
; DEFAULT-NEXT: [[TMP9:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4
; DEFAULT-NEXT: [[TMP10:%.*]] = fcmp fast ogt float [[TMP8]], [[TMP9]]
; DEFAULT-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], float [[TMP8]], float [[TMP9]]
; DEFAULT-NEXT: [[TMP12:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16
; DEFAULT-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP11]], [[TMP12]]
; DEFAULT-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP11]], float [[TMP12]]
; DEFAULT-NEXT: [[TMP15:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4
; DEFAULT-NEXT: [[TMP16:%.*]] = fcmp fast ogt float [[TMP14]], [[TMP15]]
; DEFAULT-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], float [[TMP14]], float [[TMP15]]
; DEFAULT-NEXT: [[TMP18:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8
; DEFAULT-NEXT: [[TMP19:%.*]] = fcmp fast ogt float [[TMP17]], [[TMP18]]
; DEFAULT-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP17]], float [[TMP18]]
; DEFAULT-NEXT: [[TMP21:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4
; DEFAULT-NEXT: [[TMP22:%.*]] = fcmp fast ogt float [[TMP20]], [[TMP21]]
; DEFAULT-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], float [[TMP20]], float [[TMP21]]
; DEFAULT-NEXT: ret float [[TMP23]]
;
; THRESH-LABEL: @maxf8(
; THRESH-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* bitcast ([32 x float]* @arr1 to <2 x float>*), align 16
; THRESH-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
; THRESH-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
; THRESH-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]]
; THRESH-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP4]]
; THRESH-NEXT: [[TMP7:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8
; THRESH-NEXT: [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]]
; THRESH-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float [[TMP7]]
; THRESH-NEXT: [[TMP10:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4
; THRESH-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]]
; THRESH-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float [[TMP10]]
; THRESH-NEXT: [[TMP13:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16
; THRESH-NEXT: [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]]
; THRESH-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float [[TMP13]]
; THRESH-NEXT: [[TMP16:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4
; THRESH-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]]
; THRESH-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float [[TMP16]]
; THRESH-NEXT: [[TMP19:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8
; THRESH-NEXT: [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]]
; THRESH-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float [[TMP19]]
; THRESH-NEXT: [[TMP22:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4
; THRESH-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]]
; THRESH-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP21]], float [[TMP22]]
; THRESH-NEXT: ret float [[TMP24]]
;
%2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16
%3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4
%4 = fcmp fast ogt float %2, %3
%5 = select i1 %4, float %2, float %3
%6 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8
%7 = fcmp fast ogt float %5, %6
%8 = select i1 %7, float %5, float %6
%9 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4
%10 = fcmp fast ogt float %8, %9
%11 = select i1 %10, float %8, float %9
%12 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16
%13 = fcmp fast ogt float %11, %12
%14 = select i1 %13, float %11, float %12
%15 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4
%16 = fcmp fast ogt float %14, %15
%17 = select i1 %16, float %14, float %15
%18 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8
%19 = fcmp fast ogt float %17, %18
%20 = select i1 %19, float %17, float %18
%21 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4
%22 = fcmp fast ogt float %20, %21
%23 = select i1 %22, float %20, float %21
ret float %23
}
; Note: legacy test - maxnum intrinsics match what InstCombine creates for fcmp+select with fastmath on the select.
define float @maxf16(float) {
; DEFAULT-LABEL: @maxf16(
; DEFAULT-NEXT: [[TMP2:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16
; DEFAULT-NEXT: [[TMP3:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4
; DEFAULT-NEXT: [[TMP4:%.*]] = fcmp fast ogt float [[TMP2]], [[TMP3]]
; DEFAULT-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], float [[TMP2]], float [[TMP3]]
; DEFAULT-NEXT: [[TMP6:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8
; DEFAULT-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP5]], [[TMP6]]
; DEFAULT-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP5]], float [[TMP6]]
; DEFAULT-NEXT: [[TMP9:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4
; DEFAULT-NEXT: [[TMP10:%.*]] = fcmp fast ogt float [[TMP8]], [[TMP9]]
; DEFAULT-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], float [[TMP8]], float [[TMP9]]
; DEFAULT-NEXT: [[TMP12:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16
; DEFAULT-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP11]], [[TMP12]]
; DEFAULT-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP11]], float [[TMP12]]
; DEFAULT-NEXT: [[TMP15:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4
; DEFAULT-NEXT: [[TMP16:%.*]] = fcmp fast ogt float [[TMP14]], [[TMP15]]
; DEFAULT-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], float [[TMP14]], float [[TMP15]]
; DEFAULT-NEXT: [[TMP18:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8
; DEFAULT-NEXT: [[TMP19:%.*]] = fcmp fast ogt float [[TMP17]], [[TMP18]]
; DEFAULT-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP17]], float [[TMP18]]
; DEFAULT-NEXT: [[TMP21:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4
; DEFAULT-NEXT: [[TMP22:%.*]] = fcmp fast ogt float [[TMP20]], [[TMP21]]
; DEFAULT-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], float [[TMP20]], float [[TMP21]]
; DEFAULT-NEXT: [[TMP24:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 8), align 16
; DEFAULT-NEXT: [[TMP25:%.*]] = fcmp fast ogt float [[TMP23]], [[TMP24]]
; DEFAULT-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP23]], float [[TMP24]]
; DEFAULT-NEXT: [[TMP27:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 9), align 4
; DEFAULT-NEXT: [[TMP28:%.*]] = fcmp fast ogt float [[TMP26]], [[TMP27]]
; DEFAULT-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], float [[TMP26]], float [[TMP27]]
; DEFAULT-NEXT: [[TMP30:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 10), align 8
; DEFAULT-NEXT: [[TMP31:%.*]] = fcmp fast ogt float [[TMP29]], [[TMP30]]
; DEFAULT-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], float [[TMP29]], float [[TMP30]]
; DEFAULT-NEXT: [[TMP33:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 11), align 4
; DEFAULT-NEXT: [[TMP34:%.*]] = fcmp fast ogt float [[TMP32]], [[TMP33]]
; DEFAULT-NEXT: [[TMP35:%.*]] = select i1 [[TMP34]], float [[TMP32]], float [[TMP33]]
; DEFAULT-NEXT: [[TMP36:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 12), align 16
; DEFAULT-NEXT: [[TMP37:%.*]] = fcmp fast ogt float [[TMP35]], [[TMP36]]
; DEFAULT-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], float [[TMP35]], float [[TMP36]]
; DEFAULT-NEXT: [[TMP39:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 13), align 4
; DEFAULT-NEXT: [[TMP40:%.*]] = fcmp fast ogt float [[TMP38]], [[TMP39]]
; DEFAULT-NEXT: [[TMP41:%.*]] = select i1 [[TMP40]], float [[TMP38]], float [[TMP39]]
; DEFAULT-NEXT: [[TMP42:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 14), align 8
; DEFAULT-NEXT: [[TMP43:%.*]] = fcmp fast ogt float [[TMP41]], [[TMP42]]
; DEFAULT-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], float [[TMP41]], float [[TMP42]]
; DEFAULT-NEXT: [[TMP45:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 15), align 4
; DEFAULT-NEXT: [[TMP46:%.*]] = fcmp fast ogt float [[TMP44]], [[TMP45]]
; DEFAULT-NEXT: [[TMP47:%.*]] = select i1 [[TMP46]], float [[TMP44]], float [[TMP45]]
; DEFAULT-NEXT: ret float [[TMP47]]
;
; THRESH-LABEL: @maxf16(
; THRESH-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* bitcast ([32 x float]* @arr1 to <2 x float>*), align 16
; THRESH-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
; THRESH-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
; THRESH-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]]
; THRESH-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP4]]
; THRESH-NEXT: [[TMP7:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8
; THRESH-NEXT: [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]]
; THRESH-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float [[TMP7]]
; THRESH-NEXT: [[TMP10:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4
; THRESH-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]]
; THRESH-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float [[TMP10]]
; THRESH-NEXT: [[TMP13:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16
; THRESH-NEXT: [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]]
; THRESH-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float [[TMP13]]
; THRESH-NEXT: [[TMP16:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4
; THRESH-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]]
; THRESH-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float [[TMP16]]
; THRESH-NEXT: [[TMP19:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8
; THRESH-NEXT: [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]]
; THRESH-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float [[TMP19]]
; THRESH-NEXT: [[TMP22:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4
; THRESH-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]]
; THRESH-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP21]], float [[TMP22]]
; THRESH-NEXT: [[TMP25:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 8), align 16
; THRESH-NEXT: [[TMP26:%.*]] = fcmp fast ogt float [[TMP24]], [[TMP25]]
; THRESH-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], float [[TMP24]], float [[TMP25]]
; THRESH-NEXT: [[TMP28:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 9), align 4
; THRESH-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP27]], [[TMP28]]
; THRESH-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP27]], float [[TMP28]]
; THRESH-NEXT: [[TMP31:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 10), align 8
; THRESH-NEXT: [[TMP32:%.*]] = fcmp fast ogt float [[TMP30]], [[TMP31]]
; THRESH-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], float [[TMP30]], float [[TMP31]]
; THRESH-NEXT: [[TMP34:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 11), align 4
; THRESH-NEXT: [[TMP35:%.*]] = fcmp fast ogt float [[TMP33]], [[TMP34]]
; THRESH-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], float [[TMP33]], float [[TMP34]]
; THRESH-NEXT: [[TMP37:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 12), align 16
; THRESH-NEXT: [[TMP38:%.*]] = fcmp fast ogt float [[TMP36]], [[TMP37]]
; THRESH-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], float [[TMP36]], float [[TMP37]]
; THRESH-NEXT: [[TMP40:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 13), align 4
; THRESH-NEXT: [[TMP41:%.*]] = fcmp fast ogt float [[TMP39]], [[TMP40]]
; THRESH-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], float [[TMP39]], float [[TMP40]]
; THRESH-NEXT: [[TMP43:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 14), align 8
; THRESH-NEXT: [[TMP44:%.*]] = fcmp fast ogt float [[TMP42]], [[TMP43]]
; THRESH-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], float [[TMP42]], float [[TMP43]]
; THRESH-NEXT: [[TMP46:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 15), align 4
; THRESH-NEXT: [[TMP47:%.*]] = fcmp fast ogt float [[TMP45]], [[TMP46]]
; THRESH-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], float [[TMP45]], float [[TMP46]]
; THRESH-NEXT: ret float [[TMP48]]
;
%2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16
%3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4
%4 = fcmp fast ogt float %2, %3
%5 = select i1 %4, float %2, float %3
%6 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8
%7 = fcmp fast ogt float %5, %6
%8 = select i1 %7, float %5, float %6
%9 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4
%10 = fcmp fast ogt float %8, %9
%11 = select i1 %10, float %8, float %9
%12 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16
%13 = fcmp fast ogt float %11, %12
%14 = select i1 %13, float %11, float %12
%15 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4
%16 = fcmp fast ogt float %14, %15
%17 = select i1 %16, float %14, float %15
%18 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8
%19 = fcmp fast ogt float %17, %18
%20 = select i1 %19, float %17, float %18
%21 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4
%22 = fcmp fast ogt float %20, %21
%23 = select i1 %22, float %20, float %21
%24 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 8), align 16
%25 = fcmp fast ogt float %23, %24
%26 = select i1 %25, float %23, float %24
%27 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 9), align 4
%28 = fcmp fast ogt float %26, %27
%29 = select i1 %28, float %26, float %27
%30 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 10), align 8
%31 = fcmp fast ogt float %29, %30
%32 = select i1 %31, float %29, float %30
%33 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 11), align 4
%34 = fcmp fast ogt float %32, %33
%35 = select i1 %34, float %32, float %33
%36 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 12), align 16
%37 = fcmp fast ogt float %35, %36
%38 = select i1 %37, float %35, float %36
%39 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 13), align 4
%40 = fcmp fast ogt float %38, %39
%41 = select i1 %40, float %38, float %39
%42 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 14), align 8
%43 = fcmp fast ogt float %41, %42
%44 = select i1 %43, float %41, float %42
%45 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 15), align 4
%46 = fcmp fast ogt float %44, %45
%47 = select i1 %46, float %44, float %45
ret float %47
}
; Note: legacy test - InstCombine creates maxnum intrinsics for fcmp+select with fastmath on the select.
define float @maxf32(float) {
; DEFAULT-LABEL: @maxf32(
; DEFAULT-NEXT: [[TMP2:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16
; DEFAULT-NEXT: [[TMP3:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4
; DEFAULT-NEXT: [[TMP4:%.*]] = fcmp fast ogt float [[TMP2]], [[TMP3]]
; DEFAULT-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], float [[TMP2]], float [[TMP3]]
; DEFAULT-NEXT: [[TMP6:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8
; DEFAULT-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP5]], [[TMP6]]
; DEFAULT-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP5]], float [[TMP6]]
; DEFAULT-NEXT: [[TMP9:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4
; DEFAULT-NEXT: [[TMP10:%.*]] = fcmp fast ogt float [[TMP8]], [[TMP9]]
; DEFAULT-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], float [[TMP8]], float [[TMP9]]
; DEFAULT-NEXT: [[TMP12:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16
; DEFAULT-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP11]], [[TMP12]]
; DEFAULT-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP11]], float [[TMP12]]
; DEFAULT-NEXT: [[TMP15:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4
; DEFAULT-NEXT: [[TMP16:%.*]] = fcmp fast ogt float [[TMP14]], [[TMP15]]
; DEFAULT-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], float [[TMP14]], float [[TMP15]]
; DEFAULT-NEXT: [[TMP18:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8
; DEFAULT-NEXT: [[TMP19:%.*]] = fcmp fast ogt float [[TMP17]], [[TMP18]]
; DEFAULT-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP17]], float [[TMP18]]
; DEFAULT-NEXT: [[TMP21:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4
; DEFAULT-NEXT: [[TMP22:%.*]] = fcmp fast ogt float [[TMP20]], [[TMP21]]
; DEFAULT-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], float [[TMP20]], float [[TMP21]]
; DEFAULT-NEXT: [[TMP24:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 8), align 16
; DEFAULT-NEXT: [[TMP25:%.*]] = fcmp fast ogt float [[TMP23]], [[TMP24]]
; DEFAULT-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP23]], float [[TMP24]]
; DEFAULT-NEXT: [[TMP27:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 9), align 4
; DEFAULT-NEXT: [[TMP28:%.*]] = fcmp fast ogt float [[TMP26]], [[TMP27]]
; DEFAULT-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], float [[TMP26]], float [[TMP27]]
; DEFAULT-NEXT: [[TMP30:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 10), align 8
; DEFAULT-NEXT: [[TMP31:%.*]] = fcmp fast ogt float [[TMP29]], [[TMP30]]
; DEFAULT-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], float [[TMP29]], float [[TMP30]]
; DEFAULT-NEXT: [[TMP33:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 11), align 4
; DEFAULT-NEXT: [[TMP34:%.*]] = fcmp fast ogt float [[TMP32]], [[TMP33]]
; DEFAULT-NEXT: [[TMP35:%.*]] = select i1 [[TMP34]], float [[TMP32]], float [[TMP33]]
; DEFAULT-NEXT: [[TMP36:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 12), align 16
; DEFAULT-NEXT: [[TMP37:%.*]] = fcmp fast ogt float [[TMP35]], [[TMP36]]
; DEFAULT-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], float [[TMP35]], float [[TMP36]]
; DEFAULT-NEXT: [[TMP39:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 13), align 4
; DEFAULT-NEXT: [[TMP40:%.*]] = fcmp fast ogt float [[TMP38]], [[TMP39]]
; DEFAULT-NEXT: [[TMP41:%.*]] = select i1 [[TMP40]], float [[TMP38]], float [[TMP39]]
; DEFAULT-NEXT: [[TMP42:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 14), align 8
; DEFAULT-NEXT: [[TMP43:%.*]] = fcmp fast ogt float [[TMP41]], [[TMP42]]
; DEFAULT-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], float [[TMP41]], float [[TMP42]]
; DEFAULT-NEXT: [[TMP45:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 15), align 4
; DEFAULT-NEXT: [[TMP46:%.*]] = fcmp fast ogt float [[TMP44]], [[TMP45]]
; DEFAULT-NEXT: [[TMP47:%.*]] = select i1 [[TMP46]], float [[TMP44]], float [[TMP45]]
; DEFAULT-NEXT: [[TMP48:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 16), align 16
; DEFAULT-NEXT: [[TMP49:%.*]] = fcmp fast ogt float [[TMP47]], [[TMP48]]
; DEFAULT-NEXT: [[TMP50:%.*]] = select i1 [[TMP49]], float [[TMP47]], float [[TMP48]]
; DEFAULT-NEXT: [[TMP51:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 17), align 4
; DEFAULT-NEXT: [[TMP52:%.*]] = fcmp fast ogt float [[TMP50]], [[TMP51]]
; DEFAULT-NEXT: [[TMP53:%.*]] = select i1 [[TMP52]], float [[TMP50]], float [[TMP51]]
; DEFAULT-NEXT: [[TMP54:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 18), align 8
; DEFAULT-NEXT: [[TMP55:%.*]] = fcmp fast ogt float [[TMP53]], [[TMP54]]
; DEFAULT-NEXT: [[TMP56:%.*]] = select i1 [[TMP55]], float [[TMP53]], float [[TMP54]]
; DEFAULT-NEXT: [[TMP57:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 19), align 4
; DEFAULT-NEXT: [[TMP58:%.*]] = fcmp fast ogt float [[TMP56]], [[TMP57]]
; DEFAULT-NEXT: [[TMP59:%.*]] = select i1 [[TMP58]], float [[TMP56]], float [[TMP57]]
; DEFAULT-NEXT: [[TMP60:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 20), align 16
; DEFAULT-NEXT: [[TMP61:%.*]] = fcmp fast ogt float [[TMP59]], [[TMP60]]
; DEFAULT-NEXT: [[TMP62:%.*]] = select i1 [[TMP61]], float [[TMP59]], float [[TMP60]]
; DEFAULT-NEXT: [[TMP63:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 21), align 4
; DEFAULT-NEXT: [[TMP64:%.*]] = fcmp fast ogt float [[TMP62]], [[TMP63]]
; DEFAULT-NEXT: [[TMP65:%.*]] = select i1 [[TMP64]], float [[TMP62]], float [[TMP63]]
; DEFAULT-NEXT: [[TMP66:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 22), align 8
; DEFAULT-NEXT: [[TMP67:%.*]] = fcmp fast ogt float [[TMP65]], [[TMP66]]
; DEFAULT-NEXT: [[TMP68:%.*]] = select i1 [[TMP67]], float [[TMP65]], float [[TMP66]]
; DEFAULT-NEXT: [[TMP69:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 23), align 4
; DEFAULT-NEXT: [[TMP70:%.*]] = fcmp fast ogt float [[TMP68]], [[TMP69]]
; DEFAULT-NEXT: [[TMP71:%.*]] = select i1 [[TMP70]], float [[TMP68]], float [[TMP69]]
; DEFAULT-NEXT: [[TMP72:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 24), align 16
; DEFAULT-NEXT: [[TMP73:%.*]] = fcmp fast ogt float [[TMP71]], [[TMP72]]
; DEFAULT-NEXT: [[TMP74:%.*]] = select i1 [[TMP73]], float [[TMP71]], float [[TMP72]]
; DEFAULT-NEXT: [[TMP75:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 25), align 4
; DEFAULT-NEXT: [[TMP76:%.*]] = fcmp fast ogt float [[TMP74]], [[TMP75]]
; DEFAULT-NEXT: [[TMP77:%.*]] = select i1 [[TMP76]], float [[TMP74]], float [[TMP75]]
; DEFAULT-NEXT: [[TMP78:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 26), align 8
; DEFAULT-NEXT: [[TMP79:%.*]] = fcmp fast ogt float [[TMP77]], [[TMP78]]
; DEFAULT-NEXT: [[TMP80:%.*]] = select i1 [[TMP79]], float [[TMP77]], float [[TMP78]]
; DEFAULT-NEXT: [[TMP81:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 27), align 4
; DEFAULT-NEXT: [[TMP82:%.*]] = fcmp fast ogt float [[TMP80]], [[TMP81]]
; DEFAULT-NEXT: [[TMP83:%.*]] = select i1 [[TMP82]], float [[TMP80]], float [[TMP81]]
; DEFAULT-NEXT: [[TMP84:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 28), align 16
; DEFAULT-NEXT: [[TMP85:%.*]] = fcmp fast ogt float [[TMP83]], [[TMP84]]
; DEFAULT-NEXT: [[TMP86:%.*]] = select i1 [[TMP85]], float [[TMP83]], float [[TMP84]]
; DEFAULT-NEXT: [[TMP87:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 29), align 4
; DEFAULT-NEXT: [[TMP88:%.*]] = fcmp fast ogt float [[TMP86]], [[TMP87]]
; DEFAULT-NEXT: [[TMP89:%.*]] = select i1 [[TMP88]], float [[TMP86]], float [[TMP87]]
; DEFAULT-NEXT: [[TMP90:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 30), align 8
; DEFAULT-NEXT: [[TMP91:%.*]] = fcmp fast ogt float [[TMP89]], [[TMP90]]
; DEFAULT-NEXT: [[TMP92:%.*]] = select i1 [[TMP91]], float [[TMP89]], float [[TMP90]]
; DEFAULT-NEXT: [[TMP93:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 31), align 4
; DEFAULT-NEXT: [[TMP94:%.*]] = fcmp fast ogt float [[TMP92]], [[TMP93]]
; DEFAULT-NEXT: [[TMP95:%.*]] = select i1 [[TMP94]], float [[TMP92]], float [[TMP93]]
; DEFAULT-NEXT: ret float [[TMP95]]
;
; THRESH-LABEL: @maxf32(
; THRESH-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* bitcast ([32 x float]* @arr1 to <2 x float>*), align 16
; THRESH-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
; THRESH-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
; THRESH-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]]
; THRESH-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP4]]
; THRESH-NEXT: [[TMP7:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8
; THRESH-NEXT: [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]]
; THRESH-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float [[TMP7]]
; THRESH-NEXT: [[TMP10:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4
; THRESH-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]]
; THRESH-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float [[TMP10]]
; THRESH-NEXT: [[TMP13:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16
; THRESH-NEXT: [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]]
; THRESH-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float [[TMP13]]
; THRESH-NEXT: [[TMP16:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4
; THRESH-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]]
; THRESH-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float [[TMP16]]
; THRESH-NEXT: [[TMP19:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8
; THRESH-NEXT: [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]]
; THRESH-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float [[TMP19]]
; THRESH-NEXT: [[TMP22:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4
; THRESH-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]]
; THRESH-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP21]], float [[TMP22]]
; THRESH-NEXT: [[TMP25:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 8), align 16
; THRESH-NEXT: [[TMP26:%.*]] = fcmp fast ogt float [[TMP24]], [[TMP25]]
; THRESH-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], float [[TMP24]], float [[TMP25]]
; THRESH-NEXT: [[TMP28:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 9), align 4
; THRESH-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP27]], [[TMP28]]
; THRESH-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP27]], float [[TMP28]]
; THRESH-NEXT: [[TMP31:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 10), align 8
; THRESH-NEXT: [[TMP32:%.*]] = fcmp fast ogt float [[TMP30]], [[TMP31]]
; THRESH-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], float [[TMP30]], float [[TMP31]]
; THRESH-NEXT: [[TMP34:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 11), align 4
; THRESH-NEXT: [[TMP35:%.*]] = fcmp fast ogt float [[TMP33]], [[TMP34]]
; THRESH-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], float [[TMP33]], float [[TMP34]]
; THRESH-NEXT: [[TMP37:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 12), align 16
; THRESH-NEXT: [[TMP38:%.*]] = fcmp fast ogt float [[TMP36]], [[TMP37]]
; THRESH-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], float [[TMP36]], float [[TMP37]]
; THRESH-NEXT: [[TMP40:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 13), align 4
; THRESH-NEXT: [[TMP41:%.*]] = fcmp fast ogt float [[TMP39]], [[TMP40]]
; THRESH-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], float [[TMP39]], float [[TMP40]]
; THRESH-NEXT: [[TMP43:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 14), align 8
; THRESH-NEXT: [[TMP44:%.*]] = fcmp fast ogt float [[TMP42]], [[TMP43]]
; THRESH-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], float [[TMP42]], float [[TMP43]]
; THRESH-NEXT: [[TMP46:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 15), align 4
; THRESH-NEXT: [[TMP47:%.*]] = fcmp fast ogt float [[TMP45]], [[TMP46]]
; THRESH-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], float [[TMP45]], float [[TMP46]]
; THRESH-NEXT: [[TMP49:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 16), align 16
; THRESH-NEXT: [[TMP50:%.*]] = fcmp fast ogt float [[TMP48]], [[TMP49]]
; THRESH-NEXT: [[TMP51:%.*]] = select i1 [[TMP50]], float [[TMP48]], float [[TMP49]]
; THRESH-NEXT: [[TMP52:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 17), align 4
; THRESH-NEXT: [[TMP53:%.*]] = fcmp fast ogt float [[TMP51]], [[TMP52]]
; THRESH-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], float [[TMP51]], float [[TMP52]]
; THRESH-NEXT: [[TMP55:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 18), align 8
; THRESH-NEXT: [[TMP56:%.*]] = fcmp fast ogt float [[TMP54]], [[TMP55]]
; THRESH-NEXT: [[TMP57:%.*]] = select i1 [[TMP56]], float [[TMP54]], float [[TMP55]]
; THRESH-NEXT: [[TMP58:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 19), align 4
; THRESH-NEXT: [[TMP59:%.*]] = fcmp fast ogt float [[TMP57]], [[TMP58]]
; THRESH-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], float [[TMP57]], float [[TMP58]]
; THRESH-NEXT: [[TMP61:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 20), align 16
; THRESH-NEXT: [[TMP62:%.*]] = fcmp fast ogt float [[TMP60]], [[TMP61]]
; THRESH-NEXT: [[TMP63:%.*]] = select i1 [[TMP62]], float [[TMP60]], float [[TMP61]]
; THRESH-NEXT: [[TMP64:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 21), align 4
; THRESH-NEXT: [[TMP65:%.*]] = fcmp fast ogt float [[TMP63]], [[TMP64]]
; THRESH-NEXT: [[TMP66:%.*]] = select i1 [[TMP65]], float [[TMP63]], float [[TMP64]]
; THRESH-NEXT: [[TMP67:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 22), align 8
; THRESH-NEXT: [[TMP68:%.*]] = fcmp fast ogt float [[TMP66]], [[TMP67]]
; THRESH-NEXT: [[TMP69:%.*]] = select i1 [[TMP68]], float [[TMP66]], float [[TMP67]]
; THRESH-NEXT: [[TMP70:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 23), align 4
; THRESH-NEXT: [[TMP71:%.*]] = fcmp fast ogt float [[TMP69]], [[TMP70]]
; THRESH-NEXT: [[TMP72:%.*]] = select i1 [[TMP71]], float [[TMP69]], float [[TMP70]]
; THRESH-NEXT: [[TMP73:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 24), align 16
; THRESH-NEXT: [[TMP74:%.*]] = fcmp fast ogt float [[TMP72]], [[TMP73]]
; THRESH-NEXT: [[TMP75:%.*]] = select i1 [[TMP74]], float [[TMP72]], float [[TMP73]]
; THRESH-NEXT: [[TMP76:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 25), align 4
; THRESH-NEXT: [[TMP77:%.*]] = fcmp fast ogt float [[TMP75]], [[TMP76]]
; THRESH-NEXT: [[TMP78:%.*]] = select i1 [[TMP77]], float [[TMP75]], float [[TMP76]]
; THRESH-NEXT: [[TMP79:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 26), align 8
; THRESH-NEXT: [[TMP80:%.*]] = fcmp fast ogt float [[TMP78]], [[TMP79]]
; THRESH-NEXT: [[TMP81:%.*]] = select i1 [[TMP80]], float [[TMP78]], float [[TMP79]]
; THRESH-NEXT: [[TMP82:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 27), align 4
; THRESH-NEXT: [[TMP83:%.*]] = fcmp fast ogt float [[TMP81]], [[TMP82]]
; THRESH-NEXT: [[TMP84:%.*]] = select i1 [[TMP83]], float [[TMP81]], float [[TMP82]]
; THRESH-NEXT: [[TMP85:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 28), align 16
; THRESH-NEXT: [[TMP86:%.*]] = fcmp fast ogt float [[TMP84]], [[TMP85]]
; THRESH-NEXT: [[TMP87:%.*]] = select i1 [[TMP86]], float [[TMP84]], float [[TMP85]]
; THRESH-NEXT: [[TMP88:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 29), align 4
; THRESH-NEXT: [[TMP89:%.*]] = fcmp fast ogt float [[TMP87]], [[TMP88]]
; THRESH-NEXT: [[TMP90:%.*]] = select i1 [[TMP89]], float [[TMP87]], float [[TMP88]]
; THRESH-NEXT: [[TMP91:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 30), align 8
; THRESH-NEXT: [[TMP92:%.*]] = fcmp fast ogt float [[TMP90]], [[TMP91]]
; THRESH-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], float [[TMP90]], float [[TMP91]]
; THRESH-NEXT: [[TMP94:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 31), align 4
; THRESH-NEXT: [[TMP95:%.*]] = fcmp fast ogt float [[TMP93]], [[TMP94]]
; THRESH-NEXT: [[TMP96:%.*]] = select i1 [[TMP95]], float [[TMP93]], float [[TMP94]]
; THRESH-NEXT: ret float [[TMP96]]
;
%2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16
%3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4
%4 = fcmp fast ogt float %2, %3
%5 = select i1 %4, float %2, float %3
%6 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8
%7 = fcmp fast ogt float %5, %6
%8 = select i1 %7, float %5, float %6
%9 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4
%10 = fcmp fast ogt float %8, %9
%11 = select i1 %10, float %8, float %9
%12 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16
%13 = fcmp fast ogt float %11, %12
%14 = select i1 %13, float %11, float %12
%15 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4
%16 = fcmp fast ogt float %14, %15
%17 = select i1 %16, float %14, float %15
%18 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8
%19 = fcmp fast ogt float %17, %18
%20 = select i1 %19, float %17, float %18
%21 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4
%22 = fcmp fast ogt float %20, %21
%23 = select i1 %22, float %20, float %21
%24 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 8), align 16
%25 = fcmp fast ogt float %23, %24
%26 = select i1 %25, float %23, float %24
%27 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 9), align 4
%28 = fcmp fast ogt float %26, %27
%29 = select i1 %28, float %26, float %27
%30 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 10), align 8
%31 = fcmp fast ogt float %29, %30
%32 = select i1 %31, float %29, float %30
%33 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 11), align 4
%34 = fcmp fast ogt float %32, %33
%35 = select i1 %34, float %32, float %33
%36 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 12), align 16
%37 = fcmp fast ogt float %35, %36
%38 = select i1 %37, float %35, float %36
%39 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 13), align 4
%40 = fcmp fast ogt float %38, %39
%41 = select i1 %40, float %38, float %39
%42 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 14), align 8
%43 = fcmp fast ogt float %41, %42
%44 = select i1 %43, float %41, float %42
%45 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 15), align 4
%46 = fcmp fast ogt float %44, %45
%47 = select i1 %46, float %44, float %45
%48 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 16), align 16
%49 = fcmp fast ogt float %47, %48
%50 = select i1 %49, float %47, float %48
%51 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 17), align 4
%52 = fcmp fast ogt float %50, %51
%53 = select i1 %52, float %50, float %51
%54 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 18), align 8
%55 = fcmp fast ogt float %53, %54
%56 = select i1 %55, float %53, float %54
%57 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 19), align 4
%58 = fcmp fast ogt float %56, %57
%59 = select i1 %58, float %56, float %57
%60 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 20), align 16
%61 = fcmp fast ogt float %59, %60
%62 = select i1 %61, float %59, float %60
%63 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 21), align 4
%64 = fcmp fast ogt float %62, %63
%65 = select i1 %64, float %62, float %63
%66 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 22), align 8
%67 = fcmp fast ogt float %65, %66
%68 = select i1 %67, float %65, float %66
%69 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 23), align 4
%70 = fcmp fast ogt float %68, %69
%71 = select i1 %70, float %68, float %69
%72 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 24), align 16
%73 = fcmp fast ogt float %71, %72
%74 = select i1 %73, float %71, float %72
%75 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 25), align 4
%76 = fcmp fast ogt float %74, %75
%77 = select i1 %76, float %74, float %75
%78 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 26), align 8
%79 = fcmp fast ogt float %77, %78
%80 = select i1 %79, float %77, float %78
%81 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 27), align 4
%82 = fcmp fast ogt float %80, %81
%83 = select i1 %82, float %80, float %81
%84 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 28), align 16
%85 = fcmp fast ogt float %83, %84
%86 = select i1 %85, float %83, float %84
%87 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 29), align 4
%88 = fcmp fast ogt float %86, %87
%89 = select i1 %88, float %86, float %87
%90 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 30), align 8
%91 = fcmp fast ogt float %89, %90
%92 = select i1 %91, float %89, float %90
%93 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 31), align 4
%94 = fcmp fast ogt float %92, %93
%95 = select i1 %94, float %92, float %93
ret float %95
}
define i32 @maxi8_mutiple_uses(i32) {
; SSE-LABEL: @maxi8_mutiple_uses(
; SSE-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
; SSE-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
; SSE-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
; SSE-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
; SSE-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
; SSE-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]]
; SSE-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 [[TMP6]]
; SSE-NEXT: [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
; SSE-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
; SSE-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]]
; SSE-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
; SSE-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]]
; SSE-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP11]], i32 [[TMP12]]
; SSE-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
; SSE-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
; SSE-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]]
; SSE-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
; SSE-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
; SSE-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]]
; SSE-NEXT: [[TMP21:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
; SSE-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]]
; SSE-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP20]], i32 [[TMP21]]
; SSE-NEXT: [[TMP24:%.*]] = select i1 [[TMP4]], i32 3, i32 4
; SSE-NEXT: store i32 [[TMP24]], i32* @var, align 8
; SSE-NEXT: ret i32 [[TMP23]]
;
; AVX-LABEL: @maxi8_mutiple_uses(
; AVX-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
; AVX-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
; AVX-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
; AVX-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
; AVX-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
; AVX-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
; AVX-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
; AVX-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP6]])
; AVX-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
; AVX-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32 [[TMP7]]
; AVX-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]]
; AVX-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 [[TMP8]]
; AVX-NEXT: [[OP_EXTRA:%.*]] = icmp sgt i32 [[TMP13]], [[TMP5]]
; AVX-NEXT: [[OP_EXTRA1:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP13]], i32 [[TMP5]]
; AVX-NEXT: [[TMP14:%.*]] = select i1 [[TMP4]], i32 3, i32 4
; AVX-NEXT: store i32 [[TMP14]], i32* @var, align 8
; AVX-NEXT: ret i32 [[OP_EXTRA1]]
;
; AVX2-LABEL: @maxi8_mutiple_uses(
; AVX2-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
; AVX2-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
; AVX2-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
; AVX2-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
; AVX2-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
; AVX2-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
; AVX2-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
; AVX2-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP6]])
; AVX2-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
; AVX2-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32 [[TMP7]]
; AVX2-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]]
; AVX2-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 [[TMP8]]
; AVX2-NEXT: [[OP_EXTRA:%.*]] = icmp sgt i32 [[TMP13]], [[TMP5]]
; AVX2-NEXT: [[OP_EXTRA1:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP13]], i32 [[TMP5]]
; AVX2-NEXT: [[TMP14:%.*]] = select i1 [[TMP4]], i32 3, i32 4
; AVX2-NEXT: store i32 [[TMP14]], i32* @var, align 8
; AVX2-NEXT: ret i32 [[OP_EXTRA1]]
;
; THRESH-LABEL: @maxi8_mutiple_uses(
; THRESH-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([32 x i32]* @arr to <2 x i32>*), align 16
; THRESH-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
; THRESH-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
; THRESH-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
; THRESH-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
; THRESH-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
; THRESH-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP5]])
; THRESH-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], [[TMP6]]
; THRESH-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[TMP6]]
; THRESH-NEXT: [[TMP11:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0
; THRESH-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> [[TMP11]], i32 [[TMP3]], i32 1
; THRESH-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
; THRESH-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP4]], i32 1
; THRESH-NEXT: [[TMP15:%.*]] = icmp sgt <2 x i32> [[TMP12]], [[TMP14]]
; THRESH-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> [[TMP12]], <2 x i32> [[TMP14]]
; THRESH-NEXT: [[TMP17:%.*]] = extractelement <2 x i32> [[TMP16]], i32 0
; THRESH-NEXT: [[TMP18:%.*]] = extractelement <2 x i32> [[TMP16]], i32 1
; THRESH-NEXT: [[OP_EXTRA:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
; THRESH-NEXT: [[OP_EXTRA1:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP17]], i32 [[TMP18]]
; THRESH-NEXT: [[TMP19:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1
; THRESH-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 3, i32 4
; THRESH-NEXT: store i32 [[TMP20]], i32* @var, align 8
; THRESH-NEXT: ret i32 [[OP_EXTRA1]]
;
%2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
%3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
%4 = icmp sgt i32 %2, %3
%5 = select i1 %4, i32 %2, i32 %3
%6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
%7 = icmp sgt i32 %5, %6
%8 = select i1 %7, i32 %5, i32 %6
%9 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
%10 = icmp sgt i32 %8, %9
%11 = select i1 %10, i32 %8, i32 %9
%12 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
%13 = icmp sgt i32 %11, %12
%14 = select i1 %13, i32 %11, i32 %12
%15 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
%16 = icmp sgt i32 %14, %15
%17 = select i1 %16, i32 %14, i32 %15
%18 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
%19 = icmp sgt i32 %17, %18
%20 = select i1 %19, i32 %17, i32 %18
%21 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
%22 = icmp sgt i32 %20, %21
%23 = select i1 %22, i32 %20, i32 %21
%24 = select i1 %4, i32 3, i32 4
store i32 %24, i32* @var, align 8
ret i32 %23
}
define i32 @maxi8_mutiple_uses2(i32) {
; DEFAULT-LABEL: @maxi8_mutiple_uses2(
; DEFAULT-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
; DEFAULT-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
; DEFAULT-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
; DEFAULT-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
; DEFAULT-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
; DEFAULT-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]]
; DEFAULT-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 [[TMP6]]
; DEFAULT-NEXT: [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
; DEFAULT-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
; DEFAULT-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]]
; DEFAULT-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
; DEFAULT-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]]
; DEFAULT-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP11]], i32 [[TMP12]]
; DEFAULT-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
; DEFAULT-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
; DEFAULT-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]]
; DEFAULT-NEXT: [[TMP18:%.*]] = select i1 [[TMP10]], i32 3, i32 4
; DEFAULT-NEXT: store i32 [[TMP18]], i32* @var, align 8
; DEFAULT-NEXT: ret i32 [[TMP17]]
;
; THRESH-LABEL: @maxi8_mutiple_uses2(
; THRESH-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([32 x i32]* @arr to <2 x i32>*), align 16
; THRESH-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
; THRESH-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
; THRESH-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]]
; THRESH-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP3]], i32 [[TMP4]]
; THRESH-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
; THRESH-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]]
; THRESH-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 [[TMP7]]
; THRESH-NEXT: [[TMP10:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
; THRESH-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]]
; THRESH-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 [[TMP10]]
; THRESH-NEXT: [[TMP13:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
; THRESH-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]]
; THRESH-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 [[TMP13]]
; THRESH-NEXT: [[TMP16:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
; THRESH-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]]
; THRESH-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 [[TMP16]]
; THRESH-NEXT: [[TMP19:%.*]] = select i1 [[TMP11]], i32 3, i32 4
; THRESH-NEXT: store i32 [[TMP19]], i32* @var, align 8
; THRESH-NEXT: ret i32 [[TMP18]]
;
%2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
%3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
%4 = icmp sgt i32 %2, %3
%5 = select i1 %4, i32 %2, i32 %3
%6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
%7 = icmp sgt i32 %5, %6
%8 = select i1 %7, i32 %5, i32 %6
%9 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
%10 = icmp sgt i32 %8, %9
%11 = select i1 %10, i32 %8, i32 %9
%12 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
%13 = icmp sgt i32 %11, %12
%14 = select i1 %13, i32 %11, i32 %12
%15 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
%16 = icmp sgt i32 %14, %15
%17 = select i1 %16, i32 %14, i32 %15
%18 = select i1 %10, i32 3, i32 4
store i32 %18, i32* @var, align 8
ret i32 %17
}
define i32 @maxi8_wrong_parent(i32) {
; SSE-LABEL: @maxi8_wrong_parent(
; SSE-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
; SSE-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
; SSE-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
; SSE-NEXT: br label [[PP:%.*]]
; SSE: pp:
; SSE-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
; SSE-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
; SSE-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]]
; SSE-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 [[TMP6]]
; SSE-NEXT: [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
; SSE-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
; SSE-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]]
; SSE-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
; SSE-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]]
; SSE-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP11]], i32 [[TMP12]]
; SSE-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
; SSE-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
; SSE-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]]
; SSE-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
; SSE-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
; SSE-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]]
; SSE-NEXT: [[TMP21:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
; SSE-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]]
; SSE-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP20]], i32 [[TMP21]]
; SSE-NEXT: ret i32 [[TMP23]]
;
; AVX-LABEL: @maxi8_wrong_parent(
; AVX-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
; AVX-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
; AVX-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
; AVX-NEXT: br label [[PP:%.*]]
; AVX: pp:
; AVX-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
; AVX-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
; AVX-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
; AVX-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
; AVX-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP6]])
; AVX-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
; AVX-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32 [[TMP7]]
; AVX-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]]
; AVX-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 [[TMP8]]
; AVX-NEXT: [[OP_EXTRA:%.*]] = icmp sgt i32 [[TMP13]], [[TMP5]]
; AVX-NEXT: [[OP_EXTRA1:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP13]], i32 [[TMP5]]
; AVX-NEXT: ret i32 [[OP_EXTRA1]]
;
; AVX2-LABEL: @maxi8_wrong_parent(
; AVX2-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
; AVX2-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
; AVX2-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
; AVX2-NEXT: br label [[PP:%.*]]
; AVX2: pp:
; AVX2-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
; AVX2-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
; AVX2-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
; AVX2-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
; AVX2-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP6]])
; AVX2-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
; AVX2-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32 [[TMP7]]
; AVX2-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]]
; AVX2-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 [[TMP8]]
; AVX2-NEXT: [[OP_EXTRA:%.*]] = icmp sgt i32 [[TMP13]], [[TMP5]]
; AVX2-NEXT: [[OP_EXTRA1:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP13]], i32 [[TMP5]]
; AVX2-NEXT: ret i32 [[OP_EXTRA1]]
;
; THRESH-LABEL: @maxi8_wrong_parent(
; THRESH-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([32 x i32]* @arr to <2 x i32>*), align 16
; THRESH-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
; THRESH-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
; THRESH-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]]
; THRESH-NEXT: br label [[PP:%.*]]
; THRESH: pp:
; THRESH-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
; THRESH-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
; THRESH-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
; THRESH-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP6]])
; THRESH-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
; THRESH-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32 [[TMP7]]
; THRESH-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]]
; THRESH-NEXT: [[TMP13:%.*]] = insertelement <2 x i1> poison, i1 [[TMP12]], i32 0
; THRESH-NEXT: [[TMP14:%.*]] = insertelement <2 x i1> [[TMP13]], i1 [[TMP5]], i32 1
; THRESH-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0
; THRESH-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP15]], i32 [[TMP3]], i32 1
; THRESH-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> poison, i32 [[TMP8]], i32 0
; THRESH-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> [[TMP17]], i32 [[TMP4]], i32 1
; THRESH-NEXT: [[TMP19:%.*]] = select <2 x i1> [[TMP14]], <2 x i32> [[TMP16]], <2 x i32> [[TMP18]]
; THRESH-NEXT: [[TMP20:%.*]] = extractelement <2 x i32> [[TMP19]], i32 0
; THRESH-NEXT: [[TMP21:%.*]] = extractelement <2 x i32> [[TMP19]], i32 1
; THRESH-NEXT: [[OP_EXTRA:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]]
; THRESH-NEXT: [[OP_EXTRA1:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP20]], i32 [[TMP21]]
; THRESH-NEXT: ret i32 [[OP_EXTRA1]]
;
%2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
%3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
%4 = icmp sgt i32 %2, %3
br label %pp
pp:
%5 = select i1 %4, i32 %2, i32 %3
%6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
%7 = icmp sgt i32 %5, %6
%8 = select i1 %7, i32 %5, i32 %6
%9 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
%10 = icmp sgt i32 %8, %9
%11 = select i1 %10, i32 %8, i32 %9
%12 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
%13 = icmp sgt i32 %11, %12
%14 = select i1 %13, i32 %11, i32 %12
%15 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
%16 = icmp sgt i32 %14, %15
%17 = select i1 %16, i32 %14, i32 %15
%18 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
%19 = icmp sgt i32 %17, %18
%20 = select i1 %19, i32 %17, i32 %18
%21 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
%22 = icmp sgt i32 %20, %21
%23 = select i1 %22, i32 %20, i32 %21
ret i32 %23
}
; PR38191 - We don't handle array-of-pointer reductions.
define i32* @maxp8(i32) {
; DEFAULT-LABEL: @maxp8(
; DEFAULT-NEXT: [[TMP2:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 0), align 16
; DEFAULT-NEXT: [[TMP3:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 1), align 4
; DEFAULT-NEXT: [[TMP4:%.*]] = icmp ugt i32* [[TMP2]], [[TMP3]]
; DEFAULT-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32* [[TMP2]], i32* [[TMP3]]
; DEFAULT-NEXT: [[TMP6:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 2), align 8
; DEFAULT-NEXT: [[TMP7:%.*]] = icmp ugt i32* [[TMP5]], [[TMP6]]
; DEFAULT-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32* [[TMP5]], i32* [[TMP6]]
; DEFAULT-NEXT: [[TMP9:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 3), align 4
; DEFAULT-NEXT: [[TMP10:%.*]] = icmp ugt i32* [[TMP8]], [[TMP9]]
; DEFAULT-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32* [[TMP8]], i32* [[TMP9]]
; DEFAULT-NEXT: [[TMP12:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 4), align 16
; DEFAULT-NEXT: [[TMP13:%.*]] = icmp ugt i32* [[TMP11]], [[TMP12]]
; DEFAULT-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32* [[TMP11]], i32* [[TMP12]]
; DEFAULT-NEXT: [[TMP15:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 5), align 4
; DEFAULT-NEXT: [[TMP16:%.*]] = icmp ugt i32* [[TMP14]], [[TMP15]]
; DEFAULT-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32* [[TMP14]], i32* [[TMP15]]
; DEFAULT-NEXT: [[TMP18:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 6), align 8
; DEFAULT-NEXT: [[TMP19:%.*]] = icmp ugt i32* [[TMP17]], [[TMP18]]
; DEFAULT-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32* [[TMP17]], i32* [[TMP18]]
; DEFAULT-NEXT: [[TMP21:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 7), align 4
; DEFAULT-NEXT: [[TMP22:%.*]] = icmp ugt i32* [[TMP20]], [[TMP21]]
; DEFAULT-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32* [[TMP20]], i32* [[TMP21]]
; DEFAULT-NEXT: ret i32* [[TMP23]]
;
; THRESH-LABEL: @maxp8(
; THRESH-NEXT: [[TMP2:%.*]] = load <2 x i32*>, <2 x i32*>* bitcast ([32 x i32*]* @arrp to <2 x i32*>*), align 16
; THRESH-NEXT: [[TMP3:%.*]] = extractelement <2 x i32*> [[TMP2]], i32 0
; THRESH-NEXT: [[TMP4:%.*]] = extractelement <2 x i32*> [[TMP2]], i32 1
; THRESH-NEXT: [[TMP5:%.*]] = icmp ugt i32* [[TMP3]], [[TMP4]]
; THRESH-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32* [[TMP3]], i32* [[TMP4]]
; THRESH-NEXT: [[TMP7:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 2), align 8
; THRESH-NEXT: [[TMP8:%.*]] = icmp ugt i32* [[TMP6]], [[TMP7]]
; THRESH-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32* [[TMP6]], i32* [[TMP7]]
; THRESH-NEXT: [[TMP10:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 3), align 4
; THRESH-NEXT: [[TMP11:%.*]] = icmp ugt i32* [[TMP9]], [[TMP10]]
; THRESH-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32* [[TMP9]], i32* [[TMP10]]
; THRESH-NEXT: [[TMP13:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 4), align 16
; THRESH-NEXT: [[TMP14:%.*]] = icmp ugt i32* [[TMP12]], [[TMP13]]
; THRESH-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32* [[TMP12]], i32* [[TMP13]]
; THRESH-NEXT: [[TMP16:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 5), align 4
; THRESH-NEXT: [[TMP17:%.*]] = icmp ugt i32* [[TMP15]], [[TMP16]]
; THRESH-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32* [[TMP15]], i32* [[TMP16]]
; THRESH-NEXT: [[TMP19:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 6), align 8
; THRESH-NEXT: [[TMP20:%.*]] = icmp ugt i32* [[TMP18]], [[TMP19]]
; THRESH-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32* [[TMP18]], i32* [[TMP19]]
; THRESH-NEXT: [[TMP22:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 7), align 4
; THRESH-NEXT: [[TMP23:%.*]] = icmp ugt i32* [[TMP21]], [[TMP22]]
; THRESH-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32* [[TMP21]], i32* [[TMP22]]
; THRESH-NEXT: ret i32* [[TMP24]]
;
%2 = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 0), align 16
%3 = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 1), align 4
%4 = icmp ugt i32* %2, %3
%5 = select i1 %4, i32* %2, i32* %3
%6 = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 2), align 8
%7 = icmp ugt i32* %5, %6
%8 = select i1 %7, i32* %5, i32* %6
%9 = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 3), align 4
%10 = icmp ugt i32* %8, %9
%11 = select i1 %10, i32* %8, i32* %9
%12 = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 4), align 16
%13 = icmp ugt i32* %11, %12
%14 = select i1 %13, i32* %11, i32* %12
%15 = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 5), align 4
%16 = icmp ugt i32* %14, %15
%17 = select i1 %16, i32* %14, i32* %15
%18 = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 6), align 8
%19 = icmp ugt i32* %17, %18
%20 = select i1 %19, i32* %17, i32* %18
%21 = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 7), align 4
%22 = icmp ugt i32* %20, %21
%23 = select i1 %22, i32* %20, i32* %21
ret i32* %23
}
define i32 @smax_intrinsic_rdx_v8i32(i32* %p0) {
; CHECK-LABEL: @smax_intrinsic_rdx_v8i32(
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2
; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3
; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 4
; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 5
; CHECK-NEXT: [[P6:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 6
; CHECK-NEXT: [[P7:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 7
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <8 x i32>*
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%p1 = getelementptr inbounds i32, i32* %p0, i64 1
%p2 = getelementptr inbounds i32, i32* %p0, i64 2
%p3 = getelementptr inbounds i32, i32* %p0, i64 3
%p4 = getelementptr inbounds i32, i32* %p0, i64 4
%p5 = getelementptr inbounds i32, i32* %p0, i64 5
%p6 = getelementptr inbounds i32, i32* %p0, i64 6
%p7 = getelementptr inbounds i32, i32* %p0, i64 7
%t0 = load i32, i32* %p0, align 4
%t1 = load i32, i32* %p1, align 4
%t2 = load i32, i32* %p2, align 4
%t3 = load i32, i32* %p3, align 4
%t4 = load i32, i32* %p4, align 4
%t5 = load i32, i32* %p5, align 4
%t6 = load i32, i32* %p6, align 4
%t7 = load i32, i32* %p7, align 4
%m10 = tail call i32 @llvm.smax.i32(i32 %t1, i32 %t0)
%m32 = tail call i32 @llvm.smax.i32(i32 %t3, i32 %t2)
%m54 = tail call i32 @llvm.smax.i32(i32 %t5, i32 %t4)
%m76 = tail call i32 @llvm.smax.i32(i32 %t7, i32 %t6)
%m3210 = tail call i32 @llvm.smax.i32(i32 %m32, i32 %m10)
%m7654 = tail call i32 @llvm.smax.i32(i32 %m76, i32 %m54)
%m = tail call i32 @llvm.smax.i32(i32 %m7654, i32 %m3210)
ret i32 %m
}
define i16 @smin_intrinsic_rdx_v8i16(i16* %p0) {
; CHECK-LABEL: @smin_intrinsic_rdx_v8i16(
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 4
; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 5
; CHECK-NEXT: [[P6:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 6
; CHECK-NEXT: [[P7:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 7
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>*
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> [[TMP2]])
; CHECK-NEXT: ret i16 [[TMP3]]
;
%p1 = getelementptr inbounds i16, i16* %p0, i64 1
%p2 = getelementptr inbounds i16, i16* %p0, i64 2
%p3 = getelementptr inbounds i16, i16* %p0, i64 3
%p4 = getelementptr inbounds i16, i16* %p0, i64 4
%p5 = getelementptr inbounds i16, i16* %p0, i64 5
%p6 = getelementptr inbounds i16, i16* %p0, i64 6
%p7 = getelementptr inbounds i16, i16* %p0, i64 7
%t0 = load i16, i16* %p0, align 4
%t1 = load i16, i16* %p1, align 4
%t2 = load i16, i16* %p2, align 4
%t3 = load i16, i16* %p3, align 4
%t4 = load i16, i16* %p4, align 4
%t5 = load i16, i16* %p5, align 4
%t6 = load i16, i16* %p6, align 4
%t7 = load i16, i16* %p7, align 4
%m10 = tail call i16 @llvm.smin.i16(i16 %t1, i16 %t0)
%m32 = tail call i16 @llvm.smin.i16(i16 %t3, i16 %t2)
%m54 = tail call i16 @llvm.smin.i16(i16 %t5, i16 %t4)
%m76 = tail call i16 @llvm.smin.i16(i16 %t7, i16 %t6)
%m3210 = tail call i16 @llvm.smin.i16(i16 %m32, i16 %m10)
%m7654 = tail call i16 @llvm.smin.i16(i16 %m76, i16 %m54)
%m = tail call i16 @llvm.smin.i16(i16 %m7654, i16 %m3210)
ret i16 %m
}
define i64 @umax_intrinsic_rdx_v4i64(i64* %p0) {
; DEFAULT-LABEL: @umax_intrinsic_rdx_v4i64(
; DEFAULT-NEXT: [[P1:%.*]] = getelementptr inbounds i64, i64* [[P0:%.*]], i64 1
; DEFAULT-NEXT: [[P2:%.*]] = getelementptr inbounds i64, i64* [[P0]], i64 2
; DEFAULT-NEXT: [[P3:%.*]] = getelementptr inbounds i64, i64* [[P0]], i64 3
; DEFAULT-NEXT: [[T0:%.*]] = load i64, i64* [[P0]], align 4
; DEFAULT-NEXT: [[T1:%.*]] = load i64, i64* [[P1]], align 4
; DEFAULT-NEXT: [[T2:%.*]] = load i64, i64* [[P2]], align 4
; DEFAULT-NEXT: [[T3:%.*]] = load i64, i64* [[P3]], align 4
; DEFAULT-NEXT: [[M10:%.*]] = tail call i64 @llvm.umax.i64(i64 [[T1]], i64 [[T0]])
; DEFAULT-NEXT: [[M32:%.*]] = tail call i64 @llvm.umax.i64(i64 [[T3]], i64 [[T2]])
; DEFAULT-NEXT: [[M:%.*]] = tail call i64 @llvm.umax.i64(i64 [[M32]], i64 [[M10]])
; DEFAULT-NEXT: ret i64 [[M]]
;
; THRESH-LABEL: @umax_intrinsic_rdx_v4i64(
; THRESH-NEXT: [[P1:%.*]] = getelementptr inbounds i64, i64* [[P0:%.*]], i64 1
; THRESH-NEXT: [[P2:%.*]] = getelementptr inbounds i64, i64* [[P0]], i64 2
; THRESH-NEXT: [[P3:%.*]] = getelementptr inbounds i64, i64* [[P0]], i64 3
; THRESH-NEXT: [[TMP1:%.*]] = bitcast i64* [[P0]] to <4 x i64>*
; THRESH-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* [[TMP1]], align 4
; THRESH-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> [[TMP2]])
; THRESH-NEXT: ret i64 [[TMP3]]
;
%p1 = getelementptr inbounds i64, i64* %p0, i64 1
%p2 = getelementptr inbounds i64, i64* %p0, i64 2
%p3 = getelementptr inbounds i64, i64* %p0, i64 3
%t0 = load i64, i64* %p0, align 4
%t1 = load i64, i64* %p1, align 4
%t2 = load i64, i64* %p2, align 4
%t3 = load i64, i64* %p3, align 4
%m10 = tail call i64 @llvm.umax.i64(i64 %t1, i64 %t0)
%m32 = tail call i64 @llvm.umax.i64(i64 %t3, i64 %t2)
%m = tail call i64 @llvm.umax.i64(i64 %m32, i64 %m10)
ret i64 %m
}
define i8 @umin_intrinsic_rdx_v16i8(i8* %p0) {
; CHECK-LABEL: @umin_intrinsic_rdx_v16i8(
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4
; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5
; CHECK-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6
; CHECK-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7
; CHECK-NEXT: [[P8:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 8
; CHECK-NEXT: [[P9:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 9
; CHECK-NEXT: [[PA:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 10
; CHECK-NEXT: [[PB:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 11
; CHECK-NEXT: [[PC:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 12
; CHECK-NEXT: [[PD:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 13
; CHECK-NEXT: [[PE:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 14
; CHECK-NEXT: [[PF:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 15
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <16 x i8>*
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> [[TMP2]])
; CHECK-NEXT: ret i8 [[TMP3]]
;
%p1 = getelementptr inbounds i8, i8* %p0, i64 1
%p2 = getelementptr inbounds i8, i8* %p0, i64 2
%p3 = getelementptr inbounds i8, i8* %p0, i64 3
%p4 = getelementptr inbounds i8, i8* %p0, i64 4
%p5 = getelementptr inbounds i8, i8* %p0, i64 5
%p6 = getelementptr inbounds i8, i8* %p0, i64 6
%p7 = getelementptr inbounds i8, i8* %p0, i64 7
%p8 = getelementptr inbounds i8, i8* %p0, i64 8
%p9 = getelementptr inbounds i8, i8* %p0, i64 9
%pa = getelementptr inbounds i8, i8* %p0, i64 10
%pb = getelementptr inbounds i8, i8* %p0, i64 11
%pc = getelementptr inbounds i8, i8* %p0, i64 12
%pd = getelementptr inbounds i8, i8* %p0, i64 13
%pe = getelementptr inbounds i8, i8* %p0, i64 14
%pf = getelementptr inbounds i8, i8* %p0, i64 15
%t0 = load i8, i8* %p0, align 4
%t1 = load i8, i8* %p1, align 4
%t2 = load i8, i8* %p2, align 4
%t3 = load i8, i8* %p3, align 4
%t4 = load i8, i8* %p4, align 4
%t5 = load i8, i8* %p5, align 4
%t6 = load i8, i8* %p6, align 4
%t7 = load i8, i8* %p7, align 4
%t8 = load i8, i8* %p8, align 4
%t9 = load i8, i8* %p9, align 4
%ta = load i8, i8* %pa, align 4
%tb = load i8, i8* %pb, align 4
%tc = load i8, i8* %pc, align 4
%td = load i8, i8* %pd, align 4
%te = load i8, i8* %pe, align 4
%tf = load i8, i8* %pf, align 4
%m10 = tail call i8 @llvm.umin.i8(i8 %t1, i8 %t0)
%m32 = tail call i8 @llvm.umin.i8(i8 %t3, i8 %t2)
%m54 = tail call i8 @llvm.umin.i8(i8 %t5, i8 %t4)
%m76 = tail call i8 @llvm.umin.i8(i8 %t7, i8 %t6)
%m98 = tail call i8 @llvm.umin.i8(i8 %t9, i8 %t8)
%mba = tail call i8 @llvm.umin.i8(i8 %tb, i8 %ta)
%mdc = tail call i8 @llvm.umin.i8(i8 %td, i8 %tc)
%mfe = tail call i8 @llvm.umin.i8(i8 %tf, i8 %te)
%m3210 = tail call i8 @llvm.umin.i8(i8 %m32, i8 %m10)
%m7654 = tail call i8 @llvm.umin.i8(i8 %m76, i8 %m54)
%mdc98 = tail call i8 @llvm.umin.i8(i8 %mdc, i8 %m98)
%mfeba = tail call i8 @llvm.umin.i8(i8 %mfe, i8 %mba)
%ml = tail call i8 @llvm.umin.i8(i8 %m3210, i8 %m7654)
%mh = tail call i8 @llvm.umin.i8(i8 %mfeba, i8 %mdc98)
%m = tail call i8 @llvm.umin.i8(i8 %mh, i8 %ml)
ret i8 %m
}
; This should not crash.
define void @PR49730() {
; CHECK-LABEL: @PR49730(
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> poison, <4 x i32> <i32 2, i32 2, i32 1, i32 1>)
; CHECK-NEXT: [[TMP2:%.*]] = sub nsw <4 x i32> poison, [[TMP1]]
; CHECK-NEXT: [[T12:%.*]] = sub nsw i32 undef, undef
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[TMP2]])
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP3]], i32 [[T12]])
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP4]], i32 undef)
; CHECK-NEXT: [[T14:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP5]], i32 93)
; CHECK-NEXT: ret void
;
%t = call i32 @llvm.smin.i32(i32 undef, i32 2)
%t1 = sub nsw i32 undef, %t
%t2 = call i32 @llvm.umin.i32(i32 undef, i32 %t1)
%t3 = call i32 @llvm.smin.i32(i32 undef, i32 2)
%t4 = sub nsw i32 undef, %t3
%t5 = call i32 @llvm.umin.i32(i32 %t2, i32 %t4)
%t6 = call i32 @llvm.smin.i32(i32 undef, i32 1)
%t7 = sub nuw nsw i32 undef, %t6
%t8 = call i32 @llvm.umin.i32(i32 %t5, i32 %t7)
%t9 = call i32 @llvm.smin.i32(i32 undef, i32 1)
%t10 = sub nsw i32 undef, %t9
%t11 = call i32 @llvm.umin.i32(i32 %t8, i32 %t10)
%t12 = sub nsw i32 undef, undef
%t13 = call i32 @llvm.umin.i32(i32 %t11, i32 %t12)
%t14 = call i32 @llvm.umin.i32(i32 %t13, i32 93)
ret void
}