Vectorizing of fminimumnum and fminimumnum have not support yet. Let's add the testcase for it now, and we will update the testcase when we support it.
517 lines
43 KiB
LLVM
517 lines
43 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: opt --passes=slp-vectorizer --mtriple=riscv64 -mattr="+zvfh,+v,+zfh" -S < %s | FileCheck %s
|
|
|
|
@input1_f32 = global [9 x float] zeroinitializer, align 16
|
|
@input2_f32 = global [9 x float] zeroinitializer, align 16
|
|
@output_f32 = global [9 x float] zeroinitializer, align 16
|
|
@input1_f64 = global [9 x double] zeroinitializer, align 16
|
|
@input2_f64 = global [9 x double] zeroinitializer, align 16
|
|
@output_f64 = global [9 x double] zeroinitializer, align 16
|
|
@input1_f16 = global [9 x half] zeroinitializer, align 16
|
|
@input2_f16 = global [9 x half] zeroinitializer, align 16
|
|
@output_f16 = global [9 x half] zeroinitializer, align 16
|
|
|
|
define void @fmin32() {
|
|
; CHECK-LABEL: define void @fmin32(
|
|
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr @input1_f32, align 16
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr @input2_f32, align 16
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call float @llvm.minimumnum.f32(float [[TMP0]], float [[TMP1]])
|
|
; CHECK-NEXT: store float [[TMP2]], ptr @output_f32, align 16
|
|
; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 4), align 4
|
|
; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 4), align 4
|
|
; CHECK-NEXT: [[TMP5:%.*]] = tail call float @llvm.minimumnum.f32(float [[TMP3]], float [[TMP4]])
|
|
; CHECK-NEXT: store float [[TMP5]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 4), align 4
|
|
; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 8), align 8
|
|
; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 8), align 8
|
|
; CHECK-NEXT: [[TMP8:%.*]] = tail call float @llvm.minimumnum.f32(float [[TMP6]], float [[TMP7]])
|
|
; CHECK-NEXT: store float [[TMP8]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 8), align 8
|
|
; CHECK-NEXT: [[TMP9:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 12), align 4
|
|
; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 12), align 4
|
|
; CHECK-NEXT: [[TMP11:%.*]] = tail call float @llvm.minimumnum.f32(float [[TMP9]], float [[TMP10]])
|
|
; CHECK-NEXT: store float [[TMP11]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 12), align 4
|
|
; CHECK-NEXT: [[TMP12:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 16), align 16
|
|
; CHECK-NEXT: [[TMP13:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 16), align 16
|
|
; CHECK-NEXT: [[TMP14:%.*]] = tail call float @llvm.minimumnum.f32(float [[TMP12]], float [[TMP13]])
|
|
; CHECK-NEXT: store float [[TMP14]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 16), align 16
|
|
; CHECK-NEXT: [[TMP15:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 20), align 4
|
|
; CHECK-NEXT: [[TMP16:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 20), align 4
|
|
; CHECK-NEXT: [[TMP17:%.*]] = tail call float @llvm.minimumnum.f32(float [[TMP15]], float [[TMP16]])
|
|
; CHECK-NEXT: store float [[TMP17]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 20), align 4
|
|
; CHECK-NEXT: [[TMP18:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 24), align 8
|
|
; CHECK-NEXT: [[TMP19:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 24), align 8
|
|
; CHECK-NEXT: [[TMP20:%.*]] = tail call float @llvm.minimumnum.f32(float [[TMP18]], float [[TMP19]])
|
|
; CHECK-NEXT: store float [[TMP20]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 24), align 8
|
|
; CHECK-NEXT: [[TMP21:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 28), align 4
|
|
; CHECK-NEXT: [[TMP22:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 28), align 4
|
|
; CHECK-NEXT: [[TMP23:%.*]] = tail call float @llvm.minimumnum.f32(float [[TMP21]], float [[TMP22]])
|
|
; CHECK-NEXT: store float [[TMP23]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 28), align 4
|
|
; CHECK-NEXT: [[TMP24:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 32), align 16
|
|
; CHECK-NEXT: [[TMP25:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 32), align 16
|
|
; CHECK-NEXT: [[TMP26:%.*]] = tail call float @llvm.minimumnum.f32(float [[TMP24]], float [[TMP25]])
|
|
; CHECK-NEXT: store float [[TMP26]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 32), align 16
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%input0_0 = load float, ptr @input1_f32, align 16
|
|
%input0_1 = load float, ptr @input2_f32, align 16
|
|
%output0 = tail call float @llvm.minimumnum.f32(float %input0_0, float %input0_1)
|
|
store float %output0, ptr @output_f32, align 16
|
|
%input1_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 4), align 4
|
|
%input1_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 4), align 4
|
|
%output1 = tail call float @llvm.minimumnum.f32(float %input1_1, float %input1_2)
|
|
store float %output1, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 4), align 4
|
|
%input2_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 8), align 8
|
|
%input2_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 8), align 8
|
|
%output2 = tail call float @llvm.minimumnum.f32(float %input2_1, float %input2_2)
|
|
store float %output2, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 8), align 8
|
|
%input3_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 12), align 4
|
|
%input3_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 12), align 4
|
|
%output3 = tail call float @llvm.minimumnum.f32(float %input3_1, float %input3_2)
|
|
store float %output3, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 12), align 4
|
|
%input4_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 16), align 16
|
|
%input4_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 16), align 16
|
|
%output4 = tail call float @llvm.minimumnum.f32(float %input4_1, float %input4_2)
|
|
store float %output4, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 16), align 16
|
|
%input5_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 20), align 4
|
|
%input5_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 20), align 4
|
|
%output5 = tail call float @llvm.minimumnum.f32(float %input5_1, float %input5_2)
|
|
store float %output5, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 20), align 4
|
|
%input6_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 24), align 8
|
|
%input6_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 24), align 8
|
|
%output6 = tail call float @llvm.minimumnum.f32(float %input6_1, float %input6_2)
|
|
store float %output6, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 24), align 8
|
|
%input7_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 28), align 4
|
|
%input7_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 28), align 4
|
|
%output7 = tail call float @llvm.minimumnum.f32(float %input7_1, float %input7_2)
|
|
store float %output7, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 28), align 4
|
|
%input8_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 32), align 16
|
|
%input8_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 32), align 16
|
|
%output8 = tail call float @llvm.minimumnum.f32(float %input8_1, float %input8_2)
|
|
store float %output8, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 32), align 16
|
|
ret void
|
|
}
|
|
|
|
declare float @llvm.minimumnum.f32(float, float)
|
|
|
|
define void @fmax32() {
|
|
; CHECK-LABEL: define void @fmax32(
|
|
; CHECK-SAME: ) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr @input1_f32, align 16
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr @input2_f32, align 16
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call float @llvm.maximumnum.f32(float [[TMP0]], float [[TMP1]])
|
|
; CHECK-NEXT: store float [[TMP2]], ptr @output_f32, align 16
|
|
; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 4), align 4
|
|
; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 4), align 4
|
|
; CHECK-NEXT: [[TMP5:%.*]] = tail call float @llvm.maximumnum.f32(float [[TMP3]], float [[TMP4]])
|
|
; CHECK-NEXT: store float [[TMP5]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 4), align 4
|
|
; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 8), align 8
|
|
; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 8), align 8
|
|
; CHECK-NEXT: [[TMP8:%.*]] = tail call float @llvm.maximumnum.f32(float [[TMP6]], float [[TMP7]])
|
|
; CHECK-NEXT: store float [[TMP8]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 8), align 8
|
|
; CHECK-NEXT: [[TMP9:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 12), align 4
|
|
; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 12), align 4
|
|
; CHECK-NEXT: [[TMP11:%.*]] = tail call float @llvm.maximumnum.f32(float [[TMP9]], float [[TMP10]])
|
|
; CHECK-NEXT: store float [[TMP11]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 12), align 4
|
|
; CHECK-NEXT: [[TMP12:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 16), align 16
|
|
; CHECK-NEXT: [[TMP13:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 16), align 16
|
|
; CHECK-NEXT: [[TMP14:%.*]] = tail call float @llvm.maximumnum.f32(float [[TMP12]], float [[TMP13]])
|
|
; CHECK-NEXT: store float [[TMP14]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 16), align 16
|
|
; CHECK-NEXT: [[TMP15:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 20), align 4
|
|
; CHECK-NEXT: [[TMP16:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 20), align 4
|
|
; CHECK-NEXT: [[TMP17:%.*]] = tail call float @llvm.maximumnum.f32(float [[TMP15]], float [[TMP16]])
|
|
; CHECK-NEXT: store float [[TMP17]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 20), align 4
|
|
; CHECK-NEXT: [[TMP18:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 24), align 8
|
|
; CHECK-NEXT: [[TMP19:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 24), align 8
|
|
; CHECK-NEXT: [[TMP20:%.*]] = tail call float @llvm.maximumnum.f32(float [[TMP18]], float [[TMP19]])
|
|
; CHECK-NEXT: store float [[TMP20]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 24), align 8
|
|
; CHECK-NEXT: [[TMP21:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 28), align 4
|
|
; CHECK-NEXT: [[TMP22:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 28), align 4
|
|
; CHECK-NEXT: [[TMP23:%.*]] = tail call float @llvm.maximumnum.f32(float [[TMP21]], float [[TMP22]])
|
|
; CHECK-NEXT: store float [[TMP23]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 28), align 4
|
|
; CHECK-NEXT: [[TMP24:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 32), align 16
|
|
; CHECK-NEXT: [[TMP25:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 32), align 16
|
|
; CHECK-NEXT: [[TMP26:%.*]] = tail call float @llvm.maximumnum.f32(float [[TMP24]], float [[TMP25]])
|
|
; CHECK-NEXT: store float [[TMP26]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 32), align 16
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%input0_0 = load float, ptr @input1_f32, align 16
|
|
%input0_1 = load float, ptr @input2_f32, align 16
|
|
%output0 = tail call float @llvm.maximumnum.f32(float %input0_0, float %input0_1)
|
|
store float %output0, ptr @output_f32, align 16
|
|
%input1_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 4), align 4
|
|
%input1_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 4), align 4
|
|
%output1 = tail call float @llvm.maximumnum.f32(float %input1_1, float %input1_2)
|
|
store float %output1, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 4), align 4
|
|
%input2_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 8), align 8
|
|
%input2_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 8), align 8
|
|
%output2 = tail call float @llvm.maximumnum.f32(float %input2_1, float %input2_2)
|
|
store float %output2, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 8), align 8
|
|
%input3_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 12), align 4
|
|
%input3_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 12), align 4
|
|
%output3 = tail call float @llvm.maximumnum.f32(float %input3_1, float %input3_2)
|
|
store float %output3, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 12), align 4
|
|
%input4_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 16), align 16
|
|
%input4_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 16), align 16
|
|
%output4 = tail call float @llvm.maximumnum.f32(float %input4_1, float %input4_2)
|
|
store float %output4, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 16), align 16
|
|
%input5_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 20), align 4
|
|
%input5_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 20), align 4
|
|
%output5 = tail call float @llvm.maximumnum.f32(float %input5_1, float %input5_2)
|
|
store float %output5, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 20), align 4
|
|
%input6_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 24), align 8
|
|
%input6_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 24), align 8
|
|
%output6 = tail call float @llvm.maximumnum.f32(float %input6_1, float %input6_2)
|
|
store float %output6, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 24), align 8
|
|
%input7_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 28), align 4
|
|
%input7_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 28), align 4
|
|
%output7 = tail call float @llvm.maximumnum.f32(float %input7_1, float %input7_2)
|
|
store float %output7, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 28), align 4
|
|
%input8_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 32), align 16
|
|
%input8_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 32), align 16
|
|
%output8 = tail call float @llvm.maximumnum.f32(float %input8_1, float %input8_2)
|
|
store float %output8, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 32), align 16
|
|
ret void
|
|
}
|
|
|
|
declare float @llvm.maximumnum.f32(float, float)
|
|
|
|
define void @fmin64() {
|
|
; CHECK-LABEL: define void @fmin64(
|
|
; CHECK-SAME: ) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr @input1_f64, align 16
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr @input2_f64, align 16
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call double @llvm.minimumnum.f64(double [[TMP0]], double [[TMP1]])
|
|
; CHECK-NEXT: store double [[TMP2]], ptr @output_f64, align 16
|
|
; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 8), align 8
|
|
; CHECK-NEXT: [[TMP4:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 8), align 8
|
|
; CHECK-NEXT: [[TMP5:%.*]] = tail call double @llvm.minimumnum.f64(double [[TMP3]], double [[TMP4]])
|
|
; CHECK-NEXT: store double [[TMP5]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 8), align 8
|
|
; CHECK-NEXT: [[TMP6:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 16), align 16
|
|
; CHECK-NEXT: [[TMP7:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 16), align 16
|
|
; CHECK-NEXT: [[TMP8:%.*]] = tail call double @llvm.minimumnum.f64(double [[TMP6]], double [[TMP7]])
|
|
; CHECK-NEXT: store double [[TMP8]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 16), align 16
|
|
; CHECK-NEXT: [[TMP9:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 24), align 8
|
|
; CHECK-NEXT: [[TMP10:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 24), align 8
|
|
; CHECK-NEXT: [[TMP11:%.*]] = tail call double @llvm.minimumnum.f64(double [[TMP9]], double [[TMP10]])
|
|
; CHECK-NEXT: store double [[TMP11]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 24), align 8
|
|
; CHECK-NEXT: [[TMP12:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 32), align 16
|
|
; CHECK-NEXT: [[TMP13:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 32), align 16
|
|
; CHECK-NEXT: [[TMP14:%.*]] = tail call double @llvm.minimumnum.f64(double [[TMP12]], double [[TMP13]])
|
|
; CHECK-NEXT: store double [[TMP14]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 32), align 16
|
|
; CHECK-NEXT: [[TMP15:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 40), align 8
|
|
; CHECK-NEXT: [[TMP16:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 40), align 8
|
|
; CHECK-NEXT: [[TMP17:%.*]] = tail call double @llvm.minimumnum.f64(double [[TMP15]], double [[TMP16]])
|
|
; CHECK-NEXT: store double [[TMP17]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 40), align 8
|
|
; CHECK-NEXT: [[TMP18:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 48), align 16
|
|
; CHECK-NEXT: [[TMP19:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 48), align 16
|
|
; CHECK-NEXT: [[TMP20:%.*]] = tail call double @llvm.minimumnum.f64(double [[TMP18]], double [[TMP19]])
|
|
; CHECK-NEXT: store double [[TMP20]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 48), align 16
|
|
; CHECK-NEXT: [[TMP21:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 56), align 8
|
|
; CHECK-NEXT: [[TMP22:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 56), align 8
|
|
; CHECK-NEXT: [[TMP23:%.*]] = tail call double @llvm.minimumnum.f64(double [[TMP21]], double [[TMP22]])
|
|
; CHECK-NEXT: store double [[TMP23]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 56), align 8
|
|
; CHECK-NEXT: [[TMP24:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 64), align 16
|
|
; CHECK-NEXT: [[TMP25:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 64), align 16
|
|
; CHECK-NEXT: [[TMP26:%.*]] = tail call double @llvm.minimumnum.f64(double [[TMP24]], double [[TMP25]])
|
|
; CHECK-NEXT: store double [[TMP26]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 64), align 16
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%input0_0 = load double, ptr @input1_f64, align 16
|
|
%input0_1 = load double, ptr @input2_f64, align 16
|
|
%output0 = tail call double @llvm.minimumnum.f64(double %input0_0, double %input0_1)
|
|
store double %output0, ptr @output_f64, align 16
|
|
%input1_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 8), align 8
|
|
%input1_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 8), align 8
|
|
%output1 = tail call double @llvm.minimumnum.f64(double %input1_1, double %input1_2)
|
|
store double %output1, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 8), align 8
|
|
%input2_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 16), align 16
|
|
%input2_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 16), align 16
|
|
%output2 = tail call double @llvm.minimumnum.f64(double %input2_1, double %input2_2)
|
|
store double %output2, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 16), align 16
|
|
%input3_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 24), align 8
|
|
%input3_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 24), align 8
|
|
%output3 = tail call double @llvm.minimumnum.f64(double %input3_1, double %input3_2)
|
|
store double %output3, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 24), align 8
|
|
%input4_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 32), align 16
|
|
%input4_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 32), align 16
|
|
%output4 = tail call double @llvm.minimumnum.f64(double %input4_1, double %input4_2)
|
|
store double %output4, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 32), align 16
|
|
%input5_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 40), align 8
|
|
%input5_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 40), align 8
|
|
%output5 = tail call double @llvm.minimumnum.f64(double %input5_1, double %input5_2)
|
|
store double %output5, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 40), align 8
|
|
%input6_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 48), align 16
|
|
%input6_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 48), align 16
|
|
%output6 = tail call double @llvm.minimumnum.f64(double %input6_1, double %input6_2)
|
|
store double %output6, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 48), align 16
|
|
%input7_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 56), align 8
|
|
%input7_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 56), align 8
|
|
%output7 = tail call double @llvm.minimumnum.f64(double %input7_1, double %input7_2)
|
|
store double %output7, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 56), align 8
|
|
%input8_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 64), align 16
|
|
%input8_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 64), align 16
|
|
%output8 = tail call double @llvm.minimumnum.f64(double %input8_1, double %input8_2)
|
|
store double %output8, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 64), align 16
|
|
ret void
|
|
}
|
|
|
|
declare double @llvm.minimumnum.f64(double, double)
|
|
|
|
define void @fmax64() {
|
|
; CHECK-LABEL: define void @fmax64(
|
|
; CHECK-SAME: ) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr @input1_f64, align 16
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr @input2_f64, align 16
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call double @llvm.maximumnum.f64(double [[TMP0]], double [[TMP1]])
|
|
; CHECK-NEXT: store double [[TMP2]], ptr @output_f64, align 16
|
|
; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 8), align 8
|
|
; CHECK-NEXT: [[TMP4:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 8), align 8
|
|
; CHECK-NEXT: [[TMP5:%.*]] = tail call double @llvm.maximumnum.f64(double [[TMP3]], double [[TMP4]])
|
|
; CHECK-NEXT: store double [[TMP5]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 8), align 8
|
|
; CHECK-NEXT: [[TMP6:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 16), align 16
|
|
; CHECK-NEXT: [[TMP7:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 16), align 16
|
|
; CHECK-NEXT: [[TMP8:%.*]] = tail call double @llvm.maximumnum.f64(double [[TMP6]], double [[TMP7]])
|
|
; CHECK-NEXT: store double [[TMP8]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 16), align 16
|
|
; CHECK-NEXT: [[TMP9:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 24), align 8
|
|
; CHECK-NEXT: [[TMP10:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 24), align 8
|
|
; CHECK-NEXT: [[TMP11:%.*]] = tail call double @llvm.maximumnum.f64(double [[TMP9]], double [[TMP10]])
|
|
; CHECK-NEXT: store double [[TMP11]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 24), align 8
|
|
; CHECK-NEXT: [[TMP12:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 32), align 16
|
|
; CHECK-NEXT: [[TMP13:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 32), align 16
|
|
; CHECK-NEXT: [[TMP14:%.*]] = tail call double @llvm.maximumnum.f64(double [[TMP12]], double [[TMP13]])
|
|
; CHECK-NEXT: store double [[TMP14]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 32), align 16
|
|
; CHECK-NEXT: [[TMP15:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 40), align 8
|
|
; CHECK-NEXT: [[TMP16:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 40), align 8
|
|
; CHECK-NEXT: [[TMP17:%.*]] = tail call double @llvm.maximumnum.f64(double [[TMP15]], double [[TMP16]])
|
|
; CHECK-NEXT: store double [[TMP17]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 40), align 8
|
|
; CHECK-NEXT: [[TMP18:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 48), align 16
|
|
; CHECK-NEXT: [[TMP19:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 48), align 16
|
|
; CHECK-NEXT: [[TMP20:%.*]] = tail call double @llvm.maximumnum.f64(double [[TMP18]], double [[TMP19]])
|
|
; CHECK-NEXT: store double [[TMP20]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 48), align 16
|
|
; CHECK-NEXT: [[TMP21:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 56), align 8
|
|
; CHECK-NEXT: [[TMP22:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 56), align 8
|
|
; CHECK-NEXT: [[TMP23:%.*]] = tail call double @llvm.maximumnum.f64(double [[TMP21]], double [[TMP22]])
|
|
; CHECK-NEXT: store double [[TMP23]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 56), align 8
|
|
; CHECK-NEXT: [[TMP24:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 64), align 16
|
|
; CHECK-NEXT: [[TMP25:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 64), align 16
|
|
; CHECK-NEXT: [[TMP26:%.*]] = tail call double @llvm.maximumnum.f64(double [[TMP24]], double [[TMP25]])
|
|
; CHECK-NEXT: store double [[TMP26]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 64), align 16
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%input0_0 = load double, ptr @input1_f64, align 16
|
|
%input0_1 = load double, ptr @input2_f64, align 16
|
|
%output0 = tail call double @llvm.maximumnum.f64(double %input0_0, double %input0_1)
|
|
store double %output0, ptr @output_f64, align 16
|
|
%input1_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 8), align 8
|
|
%input1_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 8), align 8
|
|
%output1 = tail call double @llvm.maximumnum.f64(double %input1_1, double %input1_2)
|
|
store double %output1, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 8), align 8
|
|
%input2_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 16), align 16
|
|
%input2_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 16), align 16
|
|
%output2 = tail call double @llvm.maximumnum.f64(double %input2_1, double %input2_2)
|
|
store double %output2, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 16), align 16
|
|
%input3_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 24), align 8
|
|
%input3_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 24), align 8
|
|
%output3 = tail call double @llvm.maximumnum.f64(double %input3_1, double %input3_2)
|
|
store double %output3, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 24), align 8
|
|
%input4_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 32), align 16
|
|
%input4_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 32), align 16
|
|
%output4 = tail call double @llvm.maximumnum.f64(double %input4_1, double %input4_2)
|
|
store double %output4, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 32), align 16
|
|
%input5_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 40), align 8
|
|
%input5_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 40), align 8
|
|
%output5 = tail call double @llvm.maximumnum.f64(double %input5_1, double %input5_2)
|
|
store double %output5, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 40), align 8
|
|
%input6_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 48), align 16
|
|
%input6_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 48), align 16
|
|
%output6 = tail call double @llvm.maximumnum.f64(double %input6_1, double %input6_2)
|
|
store double %output6, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 48), align 16
|
|
%input7_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 56), align 8
|
|
%input7_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 56), align 8
|
|
%output7 = tail call double @llvm.maximumnum.f64(double %input7_1, double %input7_2)
|
|
store double %output7, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 56), align 8
|
|
%input8_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 64), align 16
|
|
%input8_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 64), align 16
|
|
%output8 = tail call double @llvm.maximumnum.f64(double %input8_1, double %input8_2)
|
|
store double %output8, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 64), align 16
|
|
ret void
|
|
}
|
|
|
|
declare double @llvm.maximumnum.f64(double, double)
|
|
|
|
define void @fmin16() {
|
|
; CHECK-LABEL: define void @fmin16(
|
|
; CHECK-SAME: ) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load half, ptr @input1_f16, align 16
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load half, ptr @input2_f16, align 16
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call half @llvm.minimumnum.f16(half [[TMP0]], half [[TMP1]])
|
|
; CHECK-NEXT: store half [[TMP2]], ptr @output_f16, align 16
|
|
; CHECK-NEXT: [[TMP3:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 2), align 2
|
|
; CHECK-NEXT: [[TMP4:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 2), align 2
|
|
; CHECK-NEXT: [[TMP5:%.*]] = tail call half @llvm.minimumnum.f16(half [[TMP3]], half [[TMP4]])
|
|
; CHECK-NEXT: store half [[TMP5]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 2), align 2
|
|
; CHECK-NEXT: [[TMP6:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 4), align 4
|
|
; CHECK-NEXT: [[TMP7:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 4), align 4
|
|
; CHECK-NEXT: [[TMP8:%.*]] = tail call half @llvm.minimumnum.f16(half [[TMP6]], half [[TMP7]])
|
|
; CHECK-NEXT: store half [[TMP8]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 4), align 4
|
|
; CHECK-NEXT: [[TMP9:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 6), align 2
|
|
; CHECK-NEXT: [[TMP10:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 6), align 2
|
|
; CHECK-NEXT: [[TMP11:%.*]] = tail call half @llvm.minimumnum.f16(half [[TMP9]], half [[TMP10]])
|
|
; CHECK-NEXT: store half [[TMP11]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 6), align 2
|
|
; CHECK-NEXT: [[TMP12:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 8), align 8
|
|
; CHECK-NEXT: [[TMP13:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 8), align 8
|
|
; CHECK-NEXT: [[TMP14:%.*]] = tail call half @llvm.minimumnum.f16(half [[TMP12]], half [[TMP13]])
|
|
; CHECK-NEXT: store half [[TMP14]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 8), align 8
|
|
; CHECK-NEXT: [[TMP15:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 10), align 2
|
|
; CHECK-NEXT: [[TMP16:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 10), align 2
|
|
; CHECK-NEXT: [[TMP17:%.*]] = tail call half @llvm.minimumnum.f16(half [[TMP15]], half [[TMP16]])
|
|
; CHECK-NEXT: store half [[TMP17]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 10), align 2
|
|
; CHECK-NEXT: [[TMP18:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 12), align 4
|
|
; CHECK-NEXT: [[TMP19:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 12), align 4
|
|
; CHECK-NEXT: [[TMP20:%.*]] = tail call half @llvm.minimumnum.f16(half [[TMP18]], half [[TMP19]])
|
|
; CHECK-NEXT: store half [[TMP20]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 12), align 4
|
|
; CHECK-NEXT: [[TMP21:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 14), align 2
|
|
; CHECK-NEXT: [[TMP22:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 14), align 2
|
|
; CHECK-NEXT: [[TMP23:%.*]] = tail call half @llvm.minimumnum.f16(half [[TMP21]], half [[TMP22]])
|
|
; CHECK-NEXT: store half [[TMP23]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 14), align 2
|
|
; CHECK-NEXT: [[TMP24:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 16), align 16
|
|
; CHECK-NEXT: [[TMP25:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 16), align 16
|
|
; CHECK-NEXT: [[TMP26:%.*]] = tail call half @llvm.minimumnum.f16(half [[TMP24]], half [[TMP25]])
|
|
; CHECK-NEXT: store half [[TMP26]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 16), align 16
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%input0_0 = load half, ptr @input1_f16, align 16
|
|
%input0_1 = load half, ptr @input2_f16, align 16
|
|
%output0 = tail call half @llvm.minimumnum.f16(half %input0_0, half %input0_1)
|
|
store half %output0, ptr @output_f16, align 16
|
|
%input1_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 2), align 2
|
|
%input1_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 2), align 2
|
|
%output1 = tail call half @llvm.minimumnum.f16(half %input1_1, half %input1_2)
|
|
store half %output1, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 2), align 2
|
|
%input2_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 4), align 4
|
|
%input2_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 4), align 4
|
|
%output2 = tail call half @llvm.minimumnum.f16(half %input2_1, half %input2_2)
|
|
store half %output2, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 4), align 4
|
|
%input3_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 6), align 2
|
|
%input3_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 6), align 2
|
|
%output3 = tail call half @llvm.minimumnum.f16(half %input3_1, half %input3_2)
|
|
store half %output3, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 6), align 2
|
|
%input4_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 8), align 8
|
|
%input4_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 8), align 8
|
|
%output4 = tail call half @llvm.minimumnum.f16(half %input4_1, half %input4_2)
|
|
store half %output4, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 8), align 8
|
|
%input5_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 10), align 2
|
|
%input5_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 10), align 2
|
|
%output5 = tail call half @llvm.minimumnum.f16(half %input5_1, half %input5_2)
|
|
store half %output5, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 10), align 2
|
|
%input6_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 12), align 4
|
|
%input6_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 12), align 4
|
|
%output6 = tail call half @llvm.minimumnum.f16(half %input6_1, half %input6_2)
|
|
store half %output6, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 12), align 4
|
|
%input7_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 14), align 2
|
|
%input7_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 14), align 2
|
|
%output7 = tail call half @llvm.minimumnum.f16(half %input7_1, half %input7_2)
|
|
store half %output7, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 14), align 2
|
|
%input8_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 16), align 16
|
|
%input8_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 16), align 16
|
|
%output8 = tail call half @llvm.minimumnum.f16(half %input8_1, half %input8_2)
|
|
store half %output8, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 16), align 16
|
|
ret void
|
|
}
|
|
|
|
declare half @llvm.minimumnum.f16(half, half)
|
|
|
|
define void @fmax16() {
|
|
; CHECK-LABEL: define void @fmax16(
|
|
; CHECK-SAME: ) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load half, ptr @input1_f16, align 16
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load half, ptr @input2_f16, align 16
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call half @llvm.maximumnum.f16(half [[TMP0]], half [[TMP1]])
|
|
; CHECK-NEXT: store half [[TMP2]], ptr @output_f16, align 16
|
|
; CHECK-NEXT: [[TMP3:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 2), align 2
|
|
; CHECK-NEXT: [[TMP4:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 2), align 2
|
|
; CHECK-NEXT: [[TMP5:%.*]] = tail call half @llvm.maximumnum.f16(half [[TMP3]], half [[TMP4]])
|
|
; CHECK-NEXT: store half [[TMP5]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 2), align 2
|
|
; CHECK-NEXT: [[TMP6:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 4), align 4
|
|
; CHECK-NEXT: [[TMP7:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 4), align 4
|
|
; CHECK-NEXT: [[TMP8:%.*]] = tail call half @llvm.maximumnum.f16(half [[TMP6]], half [[TMP7]])
|
|
; CHECK-NEXT: store half [[TMP8]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 4), align 4
|
|
; CHECK-NEXT: [[TMP9:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 6), align 2
|
|
; CHECK-NEXT: [[TMP10:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 6), align 2
|
|
; CHECK-NEXT: [[TMP11:%.*]] = tail call half @llvm.maximumnum.f16(half [[TMP9]], half [[TMP10]])
|
|
; CHECK-NEXT: store half [[TMP11]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 6), align 2
|
|
; CHECK-NEXT: [[TMP12:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 8), align 8
|
|
; CHECK-NEXT: [[TMP13:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 8), align 8
|
|
; CHECK-NEXT: [[TMP14:%.*]] = tail call half @llvm.maximumnum.f16(half [[TMP12]], half [[TMP13]])
|
|
; CHECK-NEXT: store half [[TMP14]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 8), align 8
|
|
; CHECK-NEXT: [[TMP15:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 10), align 2
|
|
; CHECK-NEXT: [[TMP16:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 10), align 2
|
|
; CHECK-NEXT: [[TMP17:%.*]] = tail call half @llvm.maximumnum.f16(half [[TMP15]], half [[TMP16]])
|
|
; CHECK-NEXT: store half [[TMP17]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 10), align 2
|
|
; CHECK-NEXT: [[TMP18:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 12), align 4
|
|
; CHECK-NEXT: [[TMP19:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 12), align 4
|
|
; CHECK-NEXT: [[TMP20:%.*]] = tail call half @llvm.maximumnum.f16(half [[TMP18]], half [[TMP19]])
|
|
; CHECK-NEXT: store half [[TMP20]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 12), align 4
|
|
; CHECK-NEXT: [[TMP21:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 14), align 2
|
|
; CHECK-NEXT: [[TMP22:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 14), align 2
|
|
; CHECK-NEXT: [[TMP23:%.*]] = tail call half @llvm.maximumnum.f16(half [[TMP21]], half [[TMP22]])
|
|
; CHECK-NEXT: store half [[TMP23]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 14), align 2
|
|
; CHECK-NEXT: [[TMP24:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 16), align 16
|
|
; CHECK-NEXT: [[TMP25:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 16), align 16
|
|
; CHECK-NEXT: [[TMP26:%.*]] = tail call half @llvm.maximumnum.f16(half [[TMP24]], half [[TMP25]])
|
|
; CHECK-NEXT: store half [[TMP26]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 16), align 16
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%input0_0 = load half, ptr @input1_f16, align 16
|
|
%input0_1 = load half, ptr @input2_f16, align 16
|
|
%output0 = tail call half @llvm.maximumnum.f16(half %input0_0, half %input0_1)
|
|
store half %output0, ptr @output_f16, align 16
|
|
%input1_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 2), align 2
|
|
%input1_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 2), align 2
|
|
%output1 = tail call half @llvm.maximumnum.f16(half %input1_1, half %input1_2)
|
|
store half %output1, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 2), align 2
|
|
%input2_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 4), align 4
|
|
%input2_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 4), align 4
|
|
%output2 = tail call half @llvm.maximumnum.f16(half %input2_1, half %input2_2)
|
|
store half %output2, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 4), align 4
|
|
%input3_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 6), align 2
|
|
%input3_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 6), align 2
|
|
%output3 = tail call half @llvm.maximumnum.f16(half %input3_1, half %input3_2)
|
|
store half %output3, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 6), align 2
|
|
%input4_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 8), align 8
|
|
%input4_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 8), align 8
|
|
%output4 = tail call half @llvm.maximumnum.f16(half %input4_1, half %input4_2)
|
|
store half %output4, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 8), align 8
|
|
%input5_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 10), align 2
|
|
%input5_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 10), align 2
|
|
%output5 = tail call half @llvm.maximumnum.f16(half %input5_1, half %input5_2)
|
|
store half %output5, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 10), align 2
|
|
%input6_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 12), align 4
|
|
%input6_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 12), align 4
|
|
%output6 = tail call half @llvm.maximumnum.f16(half %input6_1, half %input6_2)
|
|
store half %output6, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 12), align 4
|
|
%input7_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 14), align 2
|
|
%input7_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 14), align 2
|
|
%output7 = tail call half @llvm.maximumnum.f16(half %input7_1, half %input7_2)
|
|
store half %output7, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 14), align 2
|
|
%input8_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 16), align 16
|
|
%input8_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 16), align 16
|
|
%output8 = tail call half @llvm.maximumnum.f16(half %input8_1, half %input8_2)
|
|
store half %output8, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 16), align 16
|
|
ret void
|
|
}
|
|
|
|
declare half @llvm.maximumnum.f16(half, half)
|