LLVM/Test: Add vectorizing testcases for fminimumnum and fminimumnum (#133843)

Vectorizing of fminimumnum and fminimumnum have not support yet. Let's
add the testcase for it now, and we will update the testcase when we
support it.
This commit is contained in:
YunQiang Su
2025-04-02 08:46:02 +08:00
committed by GitHub
parent ad1ca5f4a2
commit e25187bc3e
6 changed files with 2307 additions and 0 deletions

View File

@@ -0,0 +1,255 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; FIXME: fmaximumnum/fminimumnum have no vectorizing support yet.
; RUN: opt --passes=loop-vectorize --mtriple=aarch64 -mattr="+neon" -S < %s | FileCheck %s
define void @fmin32(ptr noundef readonly captures(none) %input1, ptr noundef readonly captures(none) %input2, ptr noundef writeonly captures(none) %output) {
; CHECK-LABEL: define void @fmin32(
; CHECK-SAME: ptr noundef readonly captures(none) [[INPUT1:%.*]], ptr noundef readonly captures(none) [[INPUT2:%.*]], ptr noundef writeonly captures(none) [[OUTPUT:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[INPUT1]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[IN1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[INPUT2]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[IN2:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[OUT:%.*]] = tail call float @llvm.minimumnum.f32(float [[IN1]], float [[IN2]])
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[OUTPUT]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: store float [[OUT]], ptr [[ARRAYIDX4]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4096
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%arrayidx = getelementptr inbounds nuw [4096 x float], ptr %input1, i64 0, i64 %iv
%in1 = load float, ptr %arrayidx, align 4
%arrayidx2 = getelementptr inbounds nuw [4096 x float], ptr %input2, i64 0, i64 %iv
%in2 = load float, ptr %arrayidx2, align 4
%out = tail call float @llvm.minimumnum.f32(float %in1, float %in2)
%arrayidx4 = getelementptr inbounds nuw [4096 x float], ptr %output, i64 0, i64 %iv
store float %out, ptr %arrayidx4, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 4096
br i1 %exitcond.not, label %exit, label %for.body
exit:
ret void
}
declare float @llvm.minimumnum.f32(float, float)
define void @fmax32(ptr noundef readonly captures(none) %input1, ptr noundef readonly captures(none) %input2, ptr noundef writeonly captures(none) %output) {
; CHECK-LABEL: define void @fmax32(
; CHECK-SAME: ptr noundef readonly captures(none) [[INPUT1:%.*]], ptr noundef readonly captures(none) [[INPUT2:%.*]], ptr noundef writeonly captures(none) [[OUTPUT:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[INPUT1]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[IN1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[INPUT2]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[IN2:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[OUT:%.*]] = tail call float @llvm.maximumnum.f32(float [[IN1]], float [[IN2]])
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[OUTPUT]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: store float [[OUT]], ptr [[ARRAYIDX4]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4096
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%arrayidx = getelementptr inbounds nuw [4096 x float], ptr %input1, i64 0, i64 %iv
%in1 = load float, ptr %arrayidx, align 4
%arrayidx2 = getelementptr inbounds nuw [4096 x float], ptr %input2, i64 0, i64 %iv
%in2 = load float, ptr %arrayidx2, align 4
%out = tail call float @llvm.maximumnum.f32(float %in1, float %in2)
%arrayidx4 = getelementptr inbounds nuw [4096 x float], ptr %output, i64 0, i64 %iv
store float %out, ptr %arrayidx4, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 4096
br i1 %exitcond.not, label %exit, label %for.body
exit:
ret void
}
declare float @llvm.maximumnum.f32(float, float)
define void @fmin64(ptr noundef readonly captures(none) %input1, ptr noundef readonly captures(none) %input2, ptr noundef writeonly captures(none) %output) {
; CHECK-LABEL: define void @fmin64(
; CHECK-SAME: ptr noundef readonly captures(none) [[INPUT1:%.*]], ptr noundef readonly captures(none) [[INPUT2:%.*]], ptr noundef writeonly captures(none) [[OUTPUT:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[INPUT1]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[IN1:%.*]] = load double, ptr [[ARRAYIDX]], align 8
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[INPUT2]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[IN2:%.*]] = load double, ptr [[ARRAYIDX2]], align 8
; CHECK-NEXT: [[OUT:%.*]] = tail call double @llvm.minimumnum.f64(double [[IN1]], double [[IN2]])
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[OUTPUT]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: store double [[OUT]], ptr [[ARRAYIDX4]], align 8
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4096
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%arrayidx = getelementptr inbounds nuw [4096 x double], ptr %input1, i64 0, i64 %iv
%in1 = load double, ptr %arrayidx, align 8
%arrayidx2 = getelementptr inbounds nuw [4096 x double], ptr %input2, i64 0, i64 %iv
%in2 = load double, ptr %arrayidx2, align 8
%out = tail call double @llvm.minimumnum.f64(double %in1, double %in2)
%arrayidx4 = getelementptr inbounds nuw [4096 x double], ptr %output, i64 0, i64 %iv
store double %out, ptr %arrayidx4, align 8
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 4096
br i1 %exitcond.not, label %exit, label %for.body
exit:
ret void
}
declare double @llvm.minimumnum.f64(double, double)
define void @fmax64(ptr noundef readonly captures(none) %input1, ptr noundef readonly captures(none) %input2, ptr noundef writeonly captures(none) %output) {
; CHECK-LABEL: define void @fmax64(
; CHECK-SAME: ptr noundef readonly captures(none) [[INPUT1:%.*]], ptr noundef readonly captures(none) [[INPUT2:%.*]], ptr noundef writeonly captures(none) [[OUTPUT:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[INPUT1]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[IN1:%.*]] = load double, ptr [[ARRAYIDX]], align 8
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[INPUT2]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[IN2:%.*]] = load double, ptr [[ARRAYIDX2]], align 8
; CHECK-NEXT: [[OUT:%.*]] = tail call double @llvm.maximumnum.f64(double [[IN1]], double [[IN2]])
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[OUTPUT]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: store double [[OUT]], ptr [[ARRAYIDX4]], align 8
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4096
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%arrayidx = getelementptr inbounds nuw [4096 x double], ptr %input1, i64 0, i64 %iv
%in1 = load double, ptr %arrayidx, align 8
%arrayidx2 = getelementptr inbounds nuw [4096 x double], ptr %input2, i64 0, i64 %iv
%in2 = load double, ptr %arrayidx2, align 8
%out = tail call double @llvm.maximumnum.f64(double %in1, double %in2)
%arrayidx4 = getelementptr inbounds nuw [4096 x double], ptr %output, i64 0, i64 %iv
store double %out, ptr %arrayidx4, align 8
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 4096
br i1 %exitcond.not, label %exit, label %for.body
exit:
ret void
}
declare double @llvm.maximumnum.f64(double, double)
define void @fmin16(ptr noundef readonly captures(none) %input1, ptr noundef readonly captures(none) %input2, ptr noundef writeonly captures(none) %output) {
; CHECK-LABEL: define void @fmin16(
; CHECK-SAME: ptr noundef readonly captures(none) [[INPUT1:%.*]], ptr noundef readonly captures(none) [[INPUT2:%.*]], ptr noundef writeonly captures(none) [[OUTPUT:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[INPUT1]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[IN1:%.*]] = load half, ptr [[ARRAYIDX]], align 2
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[INPUT2]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[IN2:%.*]] = load half, ptr [[ARRAYIDX2]], align 2
; CHECK-NEXT: [[OUT:%.*]] = tail call half @llvm.minimumnum.f16(half [[IN1]], half [[IN2]])
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[OUTPUT]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: store half [[OUT]], ptr [[ARRAYIDX4]], align 2
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4096
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%arrayidx = getelementptr inbounds nuw [4096 x half], ptr %input1, i64 0, i64 %iv
%in1 = load half, ptr %arrayidx, align 2
%arrayidx2 = getelementptr inbounds nuw [4096 x half], ptr %input2, i64 0, i64 %iv
%in2 = load half, ptr %arrayidx2, align 2
%out = tail call half @llvm.minimumnum.f16(half %in1, half %in2)
%arrayidx4 = getelementptr inbounds nuw [4096 x half], ptr %output, i64 0, i64 %iv
store half %out, ptr %arrayidx4, align 2
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 4096
br i1 %exitcond.not, label %exit, label %for.body
exit:
ret void
}
declare half @llvm.minimumnum.f16(half, half)
define void @fmax16(ptr noundef readonly captures(none) %input1, ptr noundef readonly captures(none) %input2, ptr noundef writeonly captures(none) %output) {
; CHECK-LABEL: define void @fmax16(
; CHECK-SAME: ptr noundef readonly captures(none) [[INPUT1:%.*]], ptr noundef readonly captures(none) [[INPUT2:%.*]], ptr noundef writeonly captures(none) [[OUTPUT:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[INPUT1]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[IN1:%.*]] = load half, ptr [[ARRAYIDX]], align 2
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[INPUT2]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[IN2:%.*]] = load half, ptr [[ARRAYIDX2]], align 2
; CHECK-NEXT: [[OUT:%.*]] = tail call half @llvm.maximumnum.f16(half [[IN1]], half [[IN2]])
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[OUTPUT]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: store half [[OUT]], ptr [[ARRAYIDX4]], align 2
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4096
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%arrayidx = getelementptr inbounds nuw [4096 x half], ptr %input1, i64 0, i64 %iv
%in1 = load half, ptr %arrayidx, align 2
%arrayidx2 = getelementptr inbounds nuw [4096 x half], ptr %input2, i64 0, i64 %iv
%in2 = load half, ptr %arrayidx2, align 2
%out = tail call half @llvm.maximumnum.f16(half %in1, half %in2)
%arrayidx4 = getelementptr inbounds nuw [4096 x half], ptr %output, i64 0, i64 %iv
store half %out, ptr %arrayidx4, align 2
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 4096
br i1 %exitcond.not, label %exit, label %for.body
exit:
ret void
}
declare half @llvm.maximumnum.f16(half, half)

View File

@@ -0,0 +1,255 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; FIXME: fmaximumnum/fminimumnum have no vectorizing support yet.
; RUN: opt --passes=loop-vectorize --mtriple=riscv64 -mattr="+zvfh,+v,+zfh" -S < %s | FileCheck %s
define void @fmin32(ptr noundef readonly captures(none) %input1, ptr noundef readonly captures(none) %input2, ptr noundef writeonly captures(none) %output) {
; CHECK-LABEL: define void @fmin32(
; CHECK-SAME: ptr noundef readonly captures(none) [[INPUT1:%.*]], ptr noundef readonly captures(none) [[INPUT2:%.*]], ptr noundef writeonly captures(none) [[OUTPUT:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[INPUT1]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[IN1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[INPUT2]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[IN2:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[OUT:%.*]] = tail call float @llvm.minimumnum.f32(float [[IN1]], float [[IN2]])
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[OUTPUT]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: store float [[OUT]], ptr [[ARRAYIDX4]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4096
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%arrayidx = getelementptr inbounds nuw [4096 x float], ptr %input1, i64 0, i64 %iv
%in1 = load float, ptr %arrayidx, align 4
%arrayidx2 = getelementptr inbounds nuw [4096 x float], ptr %input2, i64 0, i64 %iv
%in2 = load float, ptr %arrayidx2, align 4
%out = tail call float @llvm.minimumnum.f32(float %in1, float %in2)
%arrayidx4 = getelementptr inbounds nuw [4096 x float], ptr %output, i64 0, i64 %iv
store float %out, ptr %arrayidx4, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 4096
br i1 %exitcond.not, label %exit, label %for.body
exit:
ret void
}
declare float @llvm.minimumnum.f32(float, float)
define void @fmax32(ptr noundef readonly captures(none) %input1, ptr noundef readonly captures(none) %input2, ptr noundef writeonly captures(none) %output) {
; CHECK-LABEL: define void @fmax32(
; CHECK-SAME: ptr noundef readonly captures(none) [[INPUT1:%.*]], ptr noundef readonly captures(none) [[INPUT2:%.*]], ptr noundef writeonly captures(none) [[OUTPUT:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[INPUT1]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[IN1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[INPUT2]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[IN2:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[OUT:%.*]] = tail call float @llvm.maximumnum.f32(float [[IN1]], float [[IN2]])
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[OUTPUT]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: store float [[OUT]], ptr [[ARRAYIDX4]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4096
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%arrayidx = getelementptr inbounds nuw [4096 x float], ptr %input1, i64 0, i64 %iv
%in1 = load float, ptr %arrayidx, align 4
%arrayidx2 = getelementptr inbounds nuw [4096 x float], ptr %input2, i64 0, i64 %iv
%in2 = load float, ptr %arrayidx2, align 4
%out = tail call float @llvm.maximumnum.f32(float %in1, float %in2)
%arrayidx4 = getelementptr inbounds nuw [4096 x float], ptr %output, i64 0, i64 %iv
store float %out, ptr %arrayidx4, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 4096
br i1 %exitcond.not, label %exit, label %for.body
exit:
ret void
}
declare float @llvm.maximumnum.f32(float, float)
define void @fmin64(ptr noundef readonly captures(none) %input1, ptr noundef readonly captures(none) %input2, ptr noundef writeonly captures(none) %output) {
; CHECK-LABEL: define void @fmin64(
; CHECK-SAME: ptr noundef readonly captures(none) [[INPUT1:%.*]], ptr noundef readonly captures(none) [[INPUT2:%.*]], ptr noundef writeonly captures(none) [[OUTPUT:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[INPUT1]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[IN1:%.*]] = load double, ptr [[ARRAYIDX]], align 8
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[INPUT2]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[IN2:%.*]] = load double, ptr [[ARRAYIDX2]], align 8
; CHECK-NEXT: [[OUT:%.*]] = tail call double @llvm.minimumnum.f64(double [[IN1]], double [[IN2]])
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[OUTPUT]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: store double [[OUT]], ptr [[ARRAYIDX4]], align 8
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4096
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%arrayidx = getelementptr inbounds nuw [4096 x double], ptr %input1, i64 0, i64 %iv
%in1 = load double, ptr %arrayidx, align 8
%arrayidx2 = getelementptr inbounds nuw [4096 x double], ptr %input2, i64 0, i64 %iv
%in2 = load double, ptr %arrayidx2, align 8
%out = tail call double @llvm.minimumnum.f64(double %in1, double %in2)
%arrayidx4 = getelementptr inbounds nuw [4096 x double], ptr %output, i64 0, i64 %iv
store double %out, ptr %arrayidx4, align 8
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 4096
br i1 %exitcond.not, label %exit, label %for.body
exit:
ret void
}
declare double @llvm.minimumnum.f64(double, double)
define void @fmax64(ptr noundef readonly captures(none) %input1, ptr noundef readonly captures(none) %input2, ptr noundef writeonly captures(none) %output) {
; CHECK-LABEL: define void @fmax64(
; CHECK-SAME: ptr noundef readonly captures(none) [[INPUT1:%.*]], ptr noundef readonly captures(none) [[INPUT2:%.*]], ptr noundef writeonly captures(none) [[OUTPUT:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[INPUT1]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[IN1:%.*]] = load double, ptr [[ARRAYIDX]], align 8
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[INPUT2]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[IN2:%.*]] = load double, ptr [[ARRAYIDX2]], align 8
; CHECK-NEXT: [[OUT:%.*]] = tail call double @llvm.maximumnum.f64(double [[IN1]], double [[IN2]])
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[OUTPUT]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: store double [[OUT]], ptr [[ARRAYIDX4]], align 8
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4096
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%arrayidx = getelementptr inbounds nuw [4096 x double], ptr %input1, i64 0, i64 %iv
%in1 = load double, ptr %arrayidx, align 8
%arrayidx2 = getelementptr inbounds nuw [4096 x double], ptr %input2, i64 0, i64 %iv
%in2 = load double, ptr %arrayidx2, align 8
%out = tail call double @llvm.maximumnum.f64(double %in1, double %in2)
%arrayidx4 = getelementptr inbounds nuw [4096 x double], ptr %output, i64 0, i64 %iv
store double %out, ptr %arrayidx4, align 8
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 4096
br i1 %exitcond.not, label %exit, label %for.body
exit:
ret void
}
declare double @llvm.maximumnum.f64(double, double)
define void @fmin16(ptr noundef readonly captures(none) %input1, ptr noundef readonly captures(none) %input2, ptr noundef writeonly captures(none) %output) {
; CHECK-LABEL: define void @fmin16(
; CHECK-SAME: ptr noundef readonly captures(none) [[INPUT1:%.*]], ptr noundef readonly captures(none) [[INPUT2:%.*]], ptr noundef writeonly captures(none) [[OUTPUT:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[INPUT1]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[IN1:%.*]] = load half, ptr [[ARRAYIDX]], align 2
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[INPUT2]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[IN2:%.*]] = load half, ptr [[ARRAYIDX2]], align 2
; CHECK-NEXT: [[OUT:%.*]] = tail call half @llvm.minimumnum.f16(half [[IN1]], half [[IN2]])
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[OUTPUT]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: store half [[OUT]], ptr [[ARRAYIDX4]], align 2
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4096
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%arrayidx = getelementptr inbounds nuw [4096 x half], ptr %input1, i64 0, i64 %iv
%in1 = load half, ptr %arrayidx, align 2
%arrayidx2 = getelementptr inbounds nuw [4096 x half], ptr %input2, i64 0, i64 %iv
%in2 = load half, ptr %arrayidx2, align 2
%out = tail call half @llvm.minimumnum.f16(half %in1, half %in2)
%arrayidx4 = getelementptr inbounds nuw [4096 x half], ptr %output, i64 0, i64 %iv
store half %out, ptr %arrayidx4, align 2
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 4096
br i1 %exitcond.not, label %exit, label %for.body
exit:
ret void
}
declare half @llvm.minimumnum.f16(half, half)
define void @fmax16(ptr noundef readonly captures(none) %input1, ptr noundef readonly captures(none) %input2, ptr noundef writeonly captures(none) %output) {
; CHECK-LABEL: define void @fmax16(
; CHECK-SAME: ptr noundef readonly captures(none) [[INPUT1:%.*]], ptr noundef readonly captures(none) [[INPUT2:%.*]], ptr noundef writeonly captures(none) [[OUTPUT:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[INPUT1]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[IN1:%.*]] = load half, ptr [[ARRAYIDX]], align 2
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[INPUT2]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[IN2:%.*]] = load half, ptr [[ARRAYIDX2]], align 2
; CHECK-NEXT: [[OUT:%.*]] = tail call half @llvm.maximumnum.f16(half [[IN1]], half [[IN2]])
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[OUTPUT]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: store half [[OUT]], ptr [[ARRAYIDX4]], align 2
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4096
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%arrayidx = getelementptr inbounds nuw [4096 x half], ptr %input1, i64 0, i64 %iv
%in1 = load half, ptr %arrayidx, align 2
%arrayidx2 = getelementptr inbounds nuw [4096 x half], ptr %input2, i64 0, i64 %iv
%in2 = load half, ptr %arrayidx2, align 2
%out = tail call half @llvm.maximumnum.f16(half %in1, half %in2)
%arrayidx4 = getelementptr inbounds nuw [4096 x half], ptr %output, i64 0, i64 %iv
store half %out, ptr %arrayidx4, align 2
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 4096
br i1 %exitcond.not, label %exit, label %for.body
exit:
ret void
}
declare half @llvm.maximumnum.f16(half, half)

View File

@@ -0,0 +1,255 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; FIXME: fmaximumnum/fminimumnum have no vectorizing support yet.
; RUN: opt --passes=loop-vectorize --mtriple=x86_64 -S < %s | FileCheck %s
define void @fmin32(ptr noundef readonly captures(none) %input1, ptr noundef readonly captures(none) %input2, ptr noundef writeonly captures(none) %output) {
; CHECK-LABEL: define void @fmin32(
; CHECK-SAME: ptr noundef readonly captures(none) [[INPUT1:%.*]], ptr noundef readonly captures(none) [[INPUT2:%.*]], ptr noundef writeonly captures(none) [[OUTPUT:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[INPUT1]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[IN1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[INPUT2]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[IN2:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[OUT:%.*]] = tail call float @llvm.minimumnum.f32(float [[IN1]], float [[IN2]])
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[OUTPUT]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: store float [[OUT]], ptr [[ARRAYIDX4]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4096
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%arrayidx = getelementptr inbounds nuw [4096 x float], ptr %input1, i64 0, i64 %iv
%in1 = load float, ptr %arrayidx, align 4
%arrayidx2 = getelementptr inbounds nuw [4096 x float], ptr %input2, i64 0, i64 %iv
%in2 = load float, ptr %arrayidx2, align 4
%out = tail call float @llvm.minimumnum.f32(float %in1, float %in2)
%arrayidx4 = getelementptr inbounds nuw [4096 x float], ptr %output, i64 0, i64 %iv
store float %out, ptr %arrayidx4, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 4096
br i1 %exitcond.not, label %exit, label %for.body
exit:
ret void
}
declare float @llvm.minimumnum.f32(float, float)
define void @fmax32(ptr noundef readonly captures(none) %input1, ptr noundef readonly captures(none) %input2, ptr noundef writeonly captures(none) %output) {
; CHECK-LABEL: define void @fmax32(
; CHECK-SAME: ptr noundef readonly captures(none) [[INPUT1:%.*]], ptr noundef readonly captures(none) [[INPUT2:%.*]], ptr noundef writeonly captures(none) [[OUTPUT:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[INPUT1]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[IN1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[INPUT2]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[IN2:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[OUT:%.*]] = tail call float @llvm.maximumnum.f32(float [[IN1]], float [[IN2]])
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[OUTPUT]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: store float [[OUT]], ptr [[ARRAYIDX4]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4096
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%arrayidx = getelementptr inbounds nuw [4096 x float], ptr %input1, i64 0, i64 %iv
%in1 = load float, ptr %arrayidx, align 4
%arrayidx2 = getelementptr inbounds nuw [4096 x float], ptr %input2, i64 0, i64 %iv
%in2 = load float, ptr %arrayidx2, align 4
%out = tail call float @llvm.maximumnum.f32(float %in1, float %in2)
%arrayidx4 = getelementptr inbounds nuw [4096 x float], ptr %output, i64 0, i64 %iv
store float %out, ptr %arrayidx4, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 4096
br i1 %exitcond.not, label %exit, label %for.body
exit:
ret void
}
declare float @llvm.maximumnum.f32(float, float)
define void @fmin64(ptr noundef readonly captures(none) %input1, ptr noundef readonly captures(none) %input2, ptr noundef writeonly captures(none) %output) {
; CHECK-LABEL: define void @fmin64(
; CHECK-SAME: ptr noundef readonly captures(none) [[INPUT1:%.*]], ptr noundef readonly captures(none) [[INPUT2:%.*]], ptr noundef writeonly captures(none) [[OUTPUT:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[INPUT1]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[IN1:%.*]] = load double, ptr [[ARRAYIDX]], align 8
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[INPUT2]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[IN2:%.*]] = load double, ptr [[ARRAYIDX2]], align 8
; CHECK-NEXT: [[OUT:%.*]] = tail call double @llvm.minimumnum.f64(double [[IN1]], double [[IN2]])
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[OUTPUT]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: store double [[OUT]], ptr [[ARRAYIDX4]], align 8
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4096
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%arrayidx = getelementptr inbounds nuw [4096 x double], ptr %input1, i64 0, i64 %iv
%in1 = load double, ptr %arrayidx, align 8
%arrayidx2 = getelementptr inbounds nuw [4096 x double], ptr %input2, i64 0, i64 %iv
%in2 = load double, ptr %arrayidx2, align 8
%out = tail call double @llvm.minimumnum.f64(double %in1, double %in2)
%arrayidx4 = getelementptr inbounds nuw [4096 x double], ptr %output, i64 0, i64 %iv
store double %out, ptr %arrayidx4, align 8
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 4096
br i1 %exitcond.not, label %exit, label %for.body
exit:
ret void
}
declare double @llvm.minimumnum.f64(double, double)
define void @fmax64(ptr noundef readonly captures(none) %input1, ptr noundef readonly captures(none) %input2, ptr noundef writeonly captures(none) %output) {
; CHECK-LABEL: define void @fmax64(
; CHECK-SAME: ptr noundef readonly captures(none) [[INPUT1:%.*]], ptr noundef readonly captures(none) [[INPUT2:%.*]], ptr noundef writeonly captures(none) [[OUTPUT:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[INPUT1]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[IN1:%.*]] = load double, ptr [[ARRAYIDX]], align 8
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[INPUT2]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[IN2:%.*]] = load double, ptr [[ARRAYIDX2]], align 8
; CHECK-NEXT: [[OUT:%.*]] = tail call double @llvm.maximumnum.f64(double [[IN1]], double [[IN2]])
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[OUTPUT]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: store double [[OUT]], ptr [[ARRAYIDX4]], align 8
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4096
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%arrayidx = getelementptr inbounds nuw [4096 x double], ptr %input1, i64 0, i64 %iv
%in1 = load double, ptr %arrayidx, align 8
%arrayidx2 = getelementptr inbounds nuw [4096 x double], ptr %input2, i64 0, i64 %iv
%in2 = load double, ptr %arrayidx2, align 8
%out = tail call double @llvm.maximumnum.f64(double %in1, double %in2)
%arrayidx4 = getelementptr inbounds nuw [4096 x double], ptr %output, i64 0, i64 %iv
store double %out, ptr %arrayidx4, align 8
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 4096
br i1 %exitcond.not, label %exit, label %for.body
exit:
ret void
}
declare double @llvm.maximumnum.f64(double, double)
define void @fmin16(ptr noundef readonly captures(none) %input1, ptr noundef readonly captures(none) %input2, ptr noundef writeonly captures(none) %output) {
; CHECK-LABEL: define void @fmin16(
; CHECK-SAME: ptr noundef readonly captures(none) [[INPUT1:%.*]], ptr noundef readonly captures(none) [[INPUT2:%.*]], ptr noundef writeonly captures(none) [[OUTPUT:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[INPUT1]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[IN1:%.*]] = load half, ptr [[ARRAYIDX]], align 2
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[INPUT2]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[IN2:%.*]] = load half, ptr [[ARRAYIDX2]], align 2
; CHECK-NEXT: [[OUT:%.*]] = tail call half @llvm.minimumnum.f16(half [[IN1]], half [[IN2]])
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[OUTPUT]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: store half [[OUT]], ptr [[ARRAYIDX4]], align 2
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4096
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%arrayidx = getelementptr inbounds nuw [4096 x half], ptr %input1, i64 0, i64 %iv
%in1 = load half, ptr %arrayidx, align 2
%arrayidx2 = getelementptr inbounds nuw [4096 x half], ptr %input2, i64 0, i64 %iv
%in2 = load half, ptr %arrayidx2, align 2
%out = tail call half @llvm.minimumnum.f16(half %in1, half %in2)
%arrayidx4 = getelementptr inbounds nuw [4096 x half], ptr %output, i64 0, i64 %iv
store half %out, ptr %arrayidx4, align 2
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 4096
br i1 %exitcond.not, label %exit, label %for.body
exit:
ret void
}
declare half @llvm.minimumnum.f16(half, half)
define void @fmax16(ptr noundef readonly captures(none) %input1, ptr noundef readonly captures(none) %input2, ptr noundef writeonly captures(none) %output) {
; CHECK-LABEL: define void @fmax16(
; CHECK-SAME: ptr noundef readonly captures(none) [[INPUT1:%.*]], ptr noundef readonly captures(none) [[INPUT2:%.*]], ptr noundef writeonly captures(none) [[OUTPUT:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[INPUT1]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[IN1:%.*]] = load half, ptr [[ARRAYIDX]], align 2
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[INPUT2]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[IN2:%.*]] = load half, ptr [[ARRAYIDX2]], align 2
; CHECK-NEXT: [[OUT:%.*]] = tail call half @llvm.maximumnum.f16(half [[IN1]], half [[IN2]])
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[OUTPUT]], i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: store half [[OUT]], ptr [[ARRAYIDX4]], align 2
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4096
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%arrayidx = getelementptr inbounds nuw [4096 x half], ptr %input1, i64 0, i64 %iv
%in1 = load half, ptr %arrayidx, align 2
%arrayidx2 = getelementptr inbounds nuw [4096 x half], ptr %input2, i64 0, i64 %iv
%in2 = load half, ptr %arrayidx2, align 2
%out = tail call half @llvm.maximumnum.f16(half %in1, half %in2)
%arrayidx4 = getelementptr inbounds nuw [4096 x half], ptr %output, i64 0, i64 %iv
store half %out, ptr %arrayidx4, align 2
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 4096
br i1 %exitcond.not, label %exit, label %for.body
exit:
ret void
}
declare half @llvm.maximumnum.f16(half, half)

View File

@@ -0,0 +1,516 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt --passes=slp-vectorizer --mtriple=aarch64 -mattr="+neon" -S < %s | FileCheck %s
@input1_f32 = global [9 x float] zeroinitializer, align 16
@input2_f32 = global [9 x float] zeroinitializer, align 16
@output_f32 = global [9 x float] zeroinitializer, align 16
@input1_f64 = global [9 x double] zeroinitializer, align 16
@input2_f64 = global [9 x double] zeroinitializer, align 16
@output_f64 = global [9 x double] zeroinitializer, align 16
@input1_f16 = global [9 x half] zeroinitializer, align 16
@input2_f16 = global [9 x half] zeroinitializer, align 16
@output_f16 = global [9 x half] zeroinitializer, align 16
define void @fmin32() {
; CHECK-LABEL: define void @fmin32(
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr @input1_f32, align 16
; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr @input2_f32, align 16
; CHECK-NEXT: [[TMP2:%.*]] = tail call float @llvm.minimumnum.f32(float [[TMP0]], float [[TMP1]])
; CHECK-NEXT: store float [[TMP2]], ptr @output_f32, align 16
; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 4), align 4
; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 4), align 4
; CHECK-NEXT: [[TMP5:%.*]] = tail call float @llvm.minimumnum.f32(float [[TMP3]], float [[TMP4]])
; CHECK-NEXT: store float [[TMP5]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 4), align 4
; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 8), align 8
; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 8), align 8
; CHECK-NEXT: [[TMP8:%.*]] = tail call float @llvm.minimumnum.f32(float [[TMP6]], float [[TMP7]])
; CHECK-NEXT: store float [[TMP8]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 8), align 8
; CHECK-NEXT: [[TMP9:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 12), align 4
; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 12), align 4
; CHECK-NEXT: [[TMP11:%.*]] = tail call float @llvm.minimumnum.f32(float [[TMP9]], float [[TMP10]])
; CHECK-NEXT: store float [[TMP11]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 12), align 4
; CHECK-NEXT: [[TMP12:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 16), align 16
; CHECK-NEXT: [[TMP13:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 16), align 16
; CHECK-NEXT: [[TMP14:%.*]] = tail call float @llvm.minimumnum.f32(float [[TMP12]], float [[TMP13]])
; CHECK-NEXT: store float [[TMP14]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 16), align 16
; CHECK-NEXT: [[TMP15:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 20), align 4
; CHECK-NEXT: [[TMP16:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 20), align 4
; CHECK-NEXT: [[TMP17:%.*]] = tail call float @llvm.minimumnum.f32(float [[TMP15]], float [[TMP16]])
; CHECK-NEXT: store float [[TMP17]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 20), align 4
; CHECK-NEXT: [[TMP18:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 24), align 8
; CHECK-NEXT: [[TMP19:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 24), align 8
; CHECK-NEXT: [[TMP20:%.*]] = tail call float @llvm.minimumnum.f32(float [[TMP18]], float [[TMP19]])
; CHECK-NEXT: store float [[TMP20]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 24), align 8
; CHECK-NEXT: [[TMP21:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 28), align 4
; CHECK-NEXT: [[TMP22:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 28), align 4
; CHECK-NEXT: [[TMP23:%.*]] = tail call float @llvm.minimumnum.f32(float [[TMP21]], float [[TMP22]])
; CHECK-NEXT: store float [[TMP23]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 28), align 4
; CHECK-NEXT: [[TMP24:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 32), align 16
; CHECK-NEXT: [[TMP25:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 32), align 16
; CHECK-NEXT: [[TMP26:%.*]] = tail call float @llvm.minimumnum.f32(float [[TMP24]], float [[TMP25]])
; CHECK-NEXT: store float [[TMP26]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 32), align 16
; CHECK-NEXT: ret void
;
entry:
%input0_0 = load float, ptr @input1_f32, align 16
%input0_1 = load float, ptr @input2_f32, align 16
%output0 = tail call float @llvm.minimumnum.f32(float %input0_0, float %input0_1)
store float %output0, ptr @output_f32, align 16
%input1_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 4), align 4
%input1_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 4), align 4
%output1 = tail call float @llvm.minimumnum.f32(float %input1_1, float %input1_2)
store float %output1, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 4), align 4
%input2_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 8), align 8
%input2_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 8), align 8
%output2 = tail call float @llvm.minimumnum.f32(float %input2_1, float %input2_2)
store float %output2, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 8), align 8
%input3_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 12), align 4
%input3_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 12), align 4
%output3 = tail call float @llvm.minimumnum.f32(float %input3_1, float %input3_2)
store float %output3, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 12), align 4
%input4_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 16), align 16
%input4_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 16), align 16
%output4 = tail call float @llvm.minimumnum.f32(float %input4_1, float %input4_2)
store float %output4, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 16), align 16
%input5_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 20), align 4
%input5_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 20), align 4
%output5 = tail call float @llvm.minimumnum.f32(float %input5_1, float %input5_2)
store float %output5, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 20), align 4
%input6_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 24), align 8
%input6_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 24), align 8
%output6 = tail call float @llvm.minimumnum.f32(float %input6_1, float %input6_2)
store float %output6, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 24), align 8
%input7_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 28), align 4
%input7_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 28), align 4
%output7 = tail call float @llvm.minimumnum.f32(float %input7_1, float %input7_2)
store float %output7, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 28), align 4
%input8_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 32), align 16
%input8_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 32), align 16
%output8 = tail call float @llvm.minimumnum.f32(float %input8_1, float %input8_2)
store float %output8, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 32), align 16
ret void
}
declare float @llvm.minimumnum.f32(float, float)
define void @fmax32() {
; CHECK-LABEL: define void @fmax32(
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr @input1_f32, align 16
; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr @input2_f32, align 16
; CHECK-NEXT: [[TMP2:%.*]] = tail call float @llvm.maximumnum.f32(float [[TMP0]], float [[TMP1]])
; CHECK-NEXT: store float [[TMP2]], ptr @output_f32, align 16
; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 4), align 4
; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 4), align 4
; CHECK-NEXT: [[TMP5:%.*]] = tail call float @llvm.maximumnum.f32(float [[TMP3]], float [[TMP4]])
; CHECK-NEXT: store float [[TMP5]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 4), align 4
; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 8), align 8
; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 8), align 8
; CHECK-NEXT: [[TMP8:%.*]] = tail call float @llvm.maximumnum.f32(float [[TMP6]], float [[TMP7]])
; CHECK-NEXT: store float [[TMP8]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 8), align 8
; CHECK-NEXT: [[TMP9:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 12), align 4
; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 12), align 4
; CHECK-NEXT: [[TMP11:%.*]] = tail call float @llvm.maximumnum.f32(float [[TMP9]], float [[TMP10]])
; CHECK-NEXT: store float [[TMP11]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 12), align 4
; CHECK-NEXT: [[TMP12:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 16), align 16
; CHECK-NEXT: [[TMP13:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 16), align 16
; CHECK-NEXT: [[TMP14:%.*]] = tail call float @llvm.maximumnum.f32(float [[TMP12]], float [[TMP13]])
; CHECK-NEXT: store float [[TMP14]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 16), align 16
; CHECK-NEXT: [[TMP15:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 20), align 4
; CHECK-NEXT: [[TMP16:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 20), align 4
; CHECK-NEXT: [[TMP17:%.*]] = tail call float @llvm.maximumnum.f32(float [[TMP15]], float [[TMP16]])
; CHECK-NEXT: store float [[TMP17]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 20), align 4
; CHECK-NEXT: [[TMP18:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 24), align 8
; CHECK-NEXT: [[TMP19:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 24), align 8
; CHECK-NEXT: [[TMP20:%.*]] = tail call float @llvm.maximumnum.f32(float [[TMP18]], float [[TMP19]])
; CHECK-NEXT: store float [[TMP20]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 24), align 8
; CHECK-NEXT: [[TMP21:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 28), align 4
; CHECK-NEXT: [[TMP22:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 28), align 4
; CHECK-NEXT: [[TMP23:%.*]] = tail call float @llvm.maximumnum.f32(float [[TMP21]], float [[TMP22]])
; CHECK-NEXT: store float [[TMP23]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 28), align 4
; CHECK-NEXT: [[TMP24:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 32), align 16
; CHECK-NEXT: [[TMP25:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 32), align 16
; CHECK-NEXT: [[TMP26:%.*]] = tail call float @llvm.maximumnum.f32(float [[TMP24]], float [[TMP25]])
; CHECK-NEXT: store float [[TMP26]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 32), align 16
; CHECK-NEXT: ret void
;
entry:
%input0_0 = load float, ptr @input1_f32, align 16
%input0_1 = load float, ptr @input2_f32, align 16
%output0 = tail call float @llvm.maximumnum.f32(float %input0_0, float %input0_1)
store float %output0, ptr @output_f32, align 16
%input1_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 4), align 4
%input1_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 4), align 4
%output1 = tail call float @llvm.maximumnum.f32(float %input1_1, float %input1_2)
store float %output1, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 4), align 4
%input2_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 8), align 8
%input2_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 8), align 8
%output2 = tail call float @llvm.maximumnum.f32(float %input2_1, float %input2_2)
store float %output2, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 8), align 8
%input3_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 12), align 4
%input3_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 12), align 4
%output3 = tail call float @llvm.maximumnum.f32(float %input3_1, float %input3_2)
store float %output3, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 12), align 4
%input4_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 16), align 16
%input4_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 16), align 16
%output4 = tail call float @llvm.maximumnum.f32(float %input4_1, float %input4_2)
store float %output4, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 16), align 16
%input5_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 20), align 4
%input5_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 20), align 4
%output5 = tail call float @llvm.maximumnum.f32(float %input5_1, float %input5_2)
store float %output5, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 20), align 4
%input6_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 24), align 8
%input6_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 24), align 8
%output6 = tail call float @llvm.maximumnum.f32(float %input6_1, float %input6_2)
store float %output6, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 24), align 8
%input7_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 28), align 4
%input7_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 28), align 4
%output7 = tail call float @llvm.maximumnum.f32(float %input7_1, float %input7_2)
store float %output7, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 28), align 4
%input8_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 32), align 16
%input8_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 32), align 16
%output8 = tail call float @llvm.maximumnum.f32(float %input8_1, float %input8_2)
store float %output8, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 32), align 16
ret void
}
declare float @llvm.maximumnum.f32(float, float)
define void @fmin64() {
; CHECK-LABEL: define void @fmin64(
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr @input1_f64, align 16
; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr @input2_f64, align 16
; CHECK-NEXT: [[TMP2:%.*]] = tail call double @llvm.minimumnum.f64(double [[TMP0]], double [[TMP1]])
; CHECK-NEXT: store double [[TMP2]], ptr @output_f64, align 16
; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 8), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 8), align 8
; CHECK-NEXT: [[TMP5:%.*]] = tail call double @llvm.minimumnum.f64(double [[TMP3]], double [[TMP4]])
; CHECK-NEXT: store double [[TMP5]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 8), align 8
; CHECK-NEXT: [[TMP6:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 16), align 16
; CHECK-NEXT: [[TMP7:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 16), align 16
; CHECK-NEXT: [[TMP8:%.*]] = tail call double @llvm.minimumnum.f64(double [[TMP6]], double [[TMP7]])
; CHECK-NEXT: store double [[TMP8]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 16), align 16
; CHECK-NEXT: [[TMP9:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 24), align 8
; CHECK-NEXT: [[TMP10:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 24), align 8
; CHECK-NEXT: [[TMP11:%.*]] = tail call double @llvm.minimumnum.f64(double [[TMP9]], double [[TMP10]])
; CHECK-NEXT: store double [[TMP11]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 24), align 8
; CHECK-NEXT: [[TMP12:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 32), align 16
; CHECK-NEXT: [[TMP13:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 32), align 16
; CHECK-NEXT: [[TMP14:%.*]] = tail call double @llvm.minimumnum.f64(double [[TMP12]], double [[TMP13]])
; CHECK-NEXT: store double [[TMP14]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 32), align 16
; CHECK-NEXT: [[TMP15:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 40), align 8
; CHECK-NEXT: [[TMP16:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 40), align 8
; CHECK-NEXT: [[TMP17:%.*]] = tail call double @llvm.minimumnum.f64(double [[TMP15]], double [[TMP16]])
; CHECK-NEXT: store double [[TMP17]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 40), align 8
; CHECK-NEXT: [[TMP18:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 48), align 16
; CHECK-NEXT: [[TMP19:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 48), align 16
; CHECK-NEXT: [[TMP20:%.*]] = tail call double @llvm.minimumnum.f64(double [[TMP18]], double [[TMP19]])
; CHECK-NEXT: store double [[TMP20]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 48), align 16
; CHECK-NEXT: [[TMP21:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 56), align 8
; CHECK-NEXT: [[TMP22:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 56), align 8
; CHECK-NEXT: [[TMP23:%.*]] = tail call double @llvm.minimumnum.f64(double [[TMP21]], double [[TMP22]])
; CHECK-NEXT: store double [[TMP23]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 56), align 8
; CHECK-NEXT: [[TMP24:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 64), align 16
; CHECK-NEXT: [[TMP25:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 64), align 16
; CHECK-NEXT: [[TMP26:%.*]] = tail call double @llvm.minimumnum.f64(double [[TMP24]], double [[TMP25]])
; CHECK-NEXT: store double [[TMP26]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 64), align 16
; CHECK-NEXT: ret void
;
entry:
%input0_0 = load double, ptr @input1_f64, align 16
%input0_1 = load double, ptr @input2_f64, align 16
%output0 = tail call double @llvm.minimumnum.f64(double %input0_0, double %input0_1)
store double %output0, ptr @output_f64, align 16
%input1_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 8), align 8
%input1_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 8), align 8
%output1 = tail call double @llvm.minimumnum.f64(double %input1_1, double %input1_2)
store double %output1, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 8), align 8
%input2_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 16), align 16
%input2_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 16), align 16
%output2 = tail call double @llvm.minimumnum.f64(double %input2_1, double %input2_2)
store double %output2, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 16), align 16
%input3_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 24), align 8
%input3_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 24), align 8
%output3 = tail call double @llvm.minimumnum.f64(double %input3_1, double %input3_2)
store double %output3, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 24), align 8
%input4_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 32), align 16
%input4_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 32), align 16
%output4 = tail call double @llvm.minimumnum.f64(double %input4_1, double %input4_2)
store double %output4, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 32), align 16
%input5_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 40), align 8
%input5_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 40), align 8
%output5 = tail call double @llvm.minimumnum.f64(double %input5_1, double %input5_2)
store double %output5, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 40), align 8
%input6_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 48), align 16
%input6_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 48), align 16
%output6 = tail call double @llvm.minimumnum.f64(double %input6_1, double %input6_2)
store double %output6, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 48), align 16
%input7_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 56), align 8
%input7_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 56), align 8
%output7 = tail call double @llvm.minimumnum.f64(double %input7_1, double %input7_2)
store double %output7, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 56), align 8
%input8_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 64), align 16
%input8_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 64), align 16
%output8 = tail call double @llvm.minimumnum.f64(double %input8_1, double %input8_2)
store double %output8, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 64), align 16
ret void
}
declare double @llvm.minimumnum.f64(double, double)
define void @fmax64() {
; CHECK-LABEL: define void @fmax64(
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr @input1_f64, align 16
; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr @input2_f64, align 16
; CHECK-NEXT: [[TMP2:%.*]] = tail call double @llvm.maximumnum.f64(double [[TMP0]], double [[TMP1]])
; CHECK-NEXT: store double [[TMP2]], ptr @output_f64, align 16
; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 8), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 8), align 8
; CHECK-NEXT: [[TMP5:%.*]] = tail call double @llvm.maximumnum.f64(double [[TMP3]], double [[TMP4]])
; CHECK-NEXT: store double [[TMP5]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 8), align 8
; CHECK-NEXT: [[TMP6:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 16), align 16
; CHECK-NEXT: [[TMP7:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 16), align 16
; CHECK-NEXT: [[TMP8:%.*]] = tail call double @llvm.maximumnum.f64(double [[TMP6]], double [[TMP7]])
; CHECK-NEXT: store double [[TMP8]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 16), align 16
; CHECK-NEXT: [[TMP9:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 24), align 8
; CHECK-NEXT: [[TMP10:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 24), align 8
; CHECK-NEXT: [[TMP11:%.*]] = tail call double @llvm.maximumnum.f64(double [[TMP9]], double [[TMP10]])
; CHECK-NEXT: store double [[TMP11]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 24), align 8
; CHECK-NEXT: [[TMP12:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 32), align 16
; CHECK-NEXT: [[TMP13:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 32), align 16
; CHECK-NEXT: [[TMP14:%.*]] = tail call double @llvm.maximumnum.f64(double [[TMP12]], double [[TMP13]])
; CHECK-NEXT: store double [[TMP14]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 32), align 16
; CHECK-NEXT: [[TMP15:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 40), align 8
; CHECK-NEXT: [[TMP16:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 40), align 8
; CHECK-NEXT: [[TMP17:%.*]] = tail call double @llvm.maximumnum.f64(double [[TMP15]], double [[TMP16]])
; CHECK-NEXT: store double [[TMP17]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 40), align 8
; CHECK-NEXT: [[TMP18:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 48), align 16
; CHECK-NEXT: [[TMP19:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 48), align 16
; CHECK-NEXT: [[TMP20:%.*]] = tail call double @llvm.maximumnum.f64(double [[TMP18]], double [[TMP19]])
; CHECK-NEXT: store double [[TMP20]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 48), align 16
; CHECK-NEXT: [[TMP21:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 56), align 8
; CHECK-NEXT: [[TMP22:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 56), align 8
; CHECK-NEXT: [[TMP23:%.*]] = tail call double @llvm.maximumnum.f64(double [[TMP21]], double [[TMP22]])
; CHECK-NEXT: store double [[TMP23]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 56), align 8
; CHECK-NEXT: [[TMP24:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 64), align 16
; CHECK-NEXT: [[TMP25:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 64), align 16
; CHECK-NEXT: [[TMP26:%.*]] = tail call double @llvm.maximumnum.f64(double [[TMP24]], double [[TMP25]])
; CHECK-NEXT: store double [[TMP26]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 64), align 16
; CHECK-NEXT: ret void
;
entry:
%input0_0 = load double, ptr @input1_f64, align 16
%input0_1 = load double, ptr @input2_f64, align 16
%output0 = tail call double @llvm.maximumnum.f64(double %input0_0, double %input0_1)
store double %output0, ptr @output_f64, align 16
%input1_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 8), align 8
%input1_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 8), align 8
%output1 = tail call double @llvm.maximumnum.f64(double %input1_1, double %input1_2)
store double %output1, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 8), align 8
%input2_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 16), align 16
%input2_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 16), align 16
%output2 = tail call double @llvm.maximumnum.f64(double %input2_1, double %input2_2)
store double %output2, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 16), align 16
%input3_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 24), align 8
%input3_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 24), align 8
%output3 = tail call double @llvm.maximumnum.f64(double %input3_1, double %input3_2)
store double %output3, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 24), align 8
%input4_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 32), align 16
%input4_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 32), align 16
%output4 = tail call double @llvm.maximumnum.f64(double %input4_1, double %input4_2)
store double %output4, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 32), align 16
%input5_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 40), align 8
%input5_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 40), align 8
%output5 = tail call double @llvm.maximumnum.f64(double %input5_1, double %input5_2)
store double %output5, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 40), align 8
%input6_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 48), align 16
%input6_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 48), align 16
%output6 = tail call double @llvm.maximumnum.f64(double %input6_1, double %input6_2)
store double %output6, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 48), align 16
%input7_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 56), align 8
%input7_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 56), align 8
%output7 = tail call double @llvm.maximumnum.f64(double %input7_1, double %input7_2)
store double %output7, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 56), align 8
%input8_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 64), align 16
%input8_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 64), align 16
%output8 = tail call double @llvm.maximumnum.f64(double %input8_1, double %input8_2)
store double %output8, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 64), align 16
ret void
}
declare double @llvm.maximumnum.f64(double, double)
define void @fmin16() {
; CHECK-LABEL: define void @fmin16(
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load half, ptr @input1_f16, align 16
; CHECK-NEXT: [[TMP1:%.*]] = load half, ptr @input2_f16, align 16
; CHECK-NEXT: [[TMP2:%.*]] = tail call half @llvm.minimumnum.f16(half [[TMP0]], half [[TMP1]])
; CHECK-NEXT: store half [[TMP2]], ptr @output_f16, align 16
; CHECK-NEXT: [[TMP3:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 2), align 2
; CHECK-NEXT: [[TMP4:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 2), align 2
; CHECK-NEXT: [[TMP5:%.*]] = tail call half @llvm.minimumnum.f16(half [[TMP3]], half [[TMP4]])
; CHECK-NEXT: store half [[TMP5]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 2), align 2
; CHECK-NEXT: [[TMP6:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 4), align 4
; CHECK-NEXT: [[TMP7:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 4), align 4
; CHECK-NEXT: [[TMP8:%.*]] = tail call half @llvm.minimumnum.f16(half [[TMP6]], half [[TMP7]])
; CHECK-NEXT: store half [[TMP8]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 4), align 4
; CHECK-NEXT: [[TMP9:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 6), align 2
; CHECK-NEXT: [[TMP10:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 6), align 2
; CHECK-NEXT: [[TMP11:%.*]] = tail call half @llvm.minimumnum.f16(half [[TMP9]], half [[TMP10]])
; CHECK-NEXT: store half [[TMP11]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 6), align 2
; CHECK-NEXT: [[TMP12:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 8), align 8
; CHECK-NEXT: [[TMP13:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 8), align 8
; CHECK-NEXT: [[TMP14:%.*]] = tail call half @llvm.minimumnum.f16(half [[TMP12]], half [[TMP13]])
; CHECK-NEXT: store half [[TMP14]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 8), align 8
; CHECK-NEXT: [[TMP15:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 10), align 2
; CHECK-NEXT: [[TMP16:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 10), align 2
; CHECK-NEXT: [[TMP17:%.*]] = tail call half @llvm.minimumnum.f16(half [[TMP15]], half [[TMP16]])
; CHECK-NEXT: store half [[TMP17]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 10), align 2
; CHECK-NEXT: [[TMP18:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 12), align 4
; CHECK-NEXT: [[TMP19:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 12), align 4
; CHECK-NEXT: [[TMP20:%.*]] = tail call half @llvm.minimumnum.f16(half [[TMP18]], half [[TMP19]])
; CHECK-NEXT: store half [[TMP20]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 12), align 4
; CHECK-NEXT: [[TMP21:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 14), align 2
; CHECK-NEXT: [[TMP22:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 14), align 2
; CHECK-NEXT: [[TMP23:%.*]] = tail call half @llvm.minimumnum.f16(half [[TMP21]], half [[TMP22]])
; CHECK-NEXT: store half [[TMP23]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 14), align 2
; CHECK-NEXT: [[TMP24:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 16), align 16
; CHECK-NEXT: [[TMP25:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 16), align 16
; CHECK-NEXT: [[TMP26:%.*]] = tail call half @llvm.minimumnum.f16(half [[TMP24]], half [[TMP25]])
; CHECK-NEXT: store half [[TMP26]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 16), align 16
; CHECK-NEXT: ret void
;
entry:
%input0_0 = load half, ptr @input1_f16, align 16
%input0_1 = load half, ptr @input2_f16, align 16
%output0 = tail call half @llvm.minimumnum.f16(half %input0_0, half %input0_1)
store half %output0, ptr @output_f16, align 16
%input1_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 2), align 2
%input1_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 2), align 2
%output1 = tail call half @llvm.minimumnum.f16(half %input1_1, half %input1_2)
store half %output1, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 2), align 2
%input2_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 4), align 4
%input2_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 4), align 4
%output2 = tail call half @llvm.minimumnum.f16(half %input2_1, half %input2_2)
store half %output2, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 4), align 4
%input3_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 6), align 2
%input3_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 6), align 2
%output3 = tail call half @llvm.minimumnum.f16(half %input3_1, half %input3_2)
store half %output3, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 6), align 2
%input4_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 8), align 8
%input4_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 8), align 8
%output4 = tail call half @llvm.minimumnum.f16(half %input4_1, half %input4_2)
store half %output4, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 8), align 8
%input5_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 10), align 2
%input5_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 10), align 2
%output5 = tail call half @llvm.minimumnum.f16(half %input5_1, half %input5_2)
store half %output5, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 10), align 2
%input6_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 12), align 4
%input6_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 12), align 4
%output6 = tail call half @llvm.minimumnum.f16(half %input6_1, half %input6_2)
store half %output6, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 12), align 4
%input7_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 14), align 2
%input7_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 14), align 2
%output7 = tail call half @llvm.minimumnum.f16(half %input7_1, half %input7_2)
store half %output7, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 14), align 2
%input8_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 16), align 16
%input8_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 16), align 16
%output8 = tail call half @llvm.minimumnum.f16(half %input8_1, half %input8_2)
store half %output8, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 16), align 16
ret void
}
declare half @llvm.minimumnum.f16(half, half)
define void @fmax16() {
; CHECK-LABEL: define void @fmax16(
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load half, ptr @input1_f16, align 16
; CHECK-NEXT: [[TMP1:%.*]] = load half, ptr @input2_f16, align 16
; CHECK-NEXT: [[TMP2:%.*]] = tail call half @llvm.maximumnum.f16(half [[TMP0]], half [[TMP1]])
; CHECK-NEXT: store half [[TMP2]], ptr @output_f16, align 16
; CHECK-NEXT: [[TMP3:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 2), align 2
; CHECK-NEXT: [[TMP4:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 2), align 2
; CHECK-NEXT: [[TMP5:%.*]] = tail call half @llvm.maximumnum.f16(half [[TMP3]], half [[TMP4]])
; CHECK-NEXT: store half [[TMP5]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 2), align 2
; CHECK-NEXT: [[TMP6:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 4), align 4
; CHECK-NEXT: [[TMP7:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 4), align 4
; CHECK-NEXT: [[TMP8:%.*]] = tail call half @llvm.maximumnum.f16(half [[TMP6]], half [[TMP7]])
; CHECK-NEXT: store half [[TMP8]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 4), align 4
; CHECK-NEXT: [[TMP9:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 6), align 2
; CHECK-NEXT: [[TMP10:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 6), align 2
; CHECK-NEXT: [[TMP11:%.*]] = tail call half @llvm.maximumnum.f16(half [[TMP9]], half [[TMP10]])
; CHECK-NEXT: store half [[TMP11]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 6), align 2
; CHECK-NEXT: [[TMP12:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 8), align 8
; CHECK-NEXT: [[TMP13:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 8), align 8
; CHECK-NEXT: [[TMP14:%.*]] = tail call half @llvm.maximumnum.f16(half [[TMP12]], half [[TMP13]])
; CHECK-NEXT: store half [[TMP14]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 8), align 8
; CHECK-NEXT: [[TMP15:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 10), align 2
; CHECK-NEXT: [[TMP16:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 10), align 2
; CHECK-NEXT: [[TMP17:%.*]] = tail call half @llvm.maximumnum.f16(half [[TMP15]], half [[TMP16]])
; CHECK-NEXT: store half [[TMP17]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 10), align 2
; CHECK-NEXT: [[TMP18:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 12), align 4
; CHECK-NEXT: [[TMP19:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 12), align 4
; CHECK-NEXT: [[TMP20:%.*]] = tail call half @llvm.maximumnum.f16(half [[TMP18]], half [[TMP19]])
; CHECK-NEXT: store half [[TMP20]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 12), align 4
; CHECK-NEXT: [[TMP21:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 14), align 2
; CHECK-NEXT: [[TMP22:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 14), align 2
; CHECK-NEXT: [[TMP23:%.*]] = tail call half @llvm.maximumnum.f16(half [[TMP21]], half [[TMP22]])
; CHECK-NEXT: store half [[TMP23]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 14), align 2
; CHECK-NEXT: [[TMP24:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 16), align 16
; CHECK-NEXT: [[TMP25:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 16), align 16
; CHECK-NEXT: [[TMP26:%.*]] = tail call half @llvm.maximumnum.f16(half [[TMP24]], half [[TMP25]])
; CHECK-NEXT: store half [[TMP26]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 16), align 16
; CHECK-NEXT: ret void
;
entry:
%input0_0 = load half, ptr @input1_f16, align 16
%input0_1 = load half, ptr @input2_f16, align 16
%output0 = tail call half @llvm.maximumnum.f16(half %input0_0, half %input0_1)
store half %output0, ptr @output_f16, align 16
%input1_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 2), align 2
%input1_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 2), align 2
%output1 = tail call half @llvm.maximumnum.f16(half %input1_1, half %input1_2)
store half %output1, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 2), align 2
%input2_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 4), align 4
%input2_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 4), align 4
%output2 = tail call half @llvm.maximumnum.f16(half %input2_1, half %input2_2)
store half %output2, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 4), align 4
%input3_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 6), align 2
%input3_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 6), align 2
%output3 = tail call half @llvm.maximumnum.f16(half %input3_1, half %input3_2)
store half %output3, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 6), align 2
%input4_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 8), align 8
%input4_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 8), align 8
%output4 = tail call half @llvm.maximumnum.f16(half %input4_1, half %input4_2)
store half %output4, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 8), align 8
%input5_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 10), align 2
%input5_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 10), align 2
%output5 = tail call half @llvm.maximumnum.f16(half %input5_1, half %input5_2)
store half %output5, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 10), align 2
%input6_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 12), align 4
%input6_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 12), align 4
%output6 = tail call half @llvm.maximumnum.f16(half %input6_1, half %input6_2)
store half %output6, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 12), align 4
%input7_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 14), align 2
%input7_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 14), align 2
%output7 = tail call half @llvm.maximumnum.f16(half %input7_1, half %input7_2)
store half %output7, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 14), align 2
%input8_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 16), align 16
%input8_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 16), align 16
%output8 = tail call half @llvm.maximumnum.f16(half %input8_1, half %input8_2)
store half %output8, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 16), align 16
ret void
}
declare half @llvm.maximumnum.f16(half, half)

View File

@@ -0,0 +1,516 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt --passes=slp-vectorizer --mtriple=riscv64 -mattr="+zvfh,+v,+zfh" -S < %s | FileCheck %s
@input1_f32 = global [9 x float] zeroinitializer, align 16
@input2_f32 = global [9 x float] zeroinitializer, align 16
@output_f32 = global [9 x float] zeroinitializer, align 16
@input1_f64 = global [9 x double] zeroinitializer, align 16
@input2_f64 = global [9 x double] zeroinitializer, align 16
@output_f64 = global [9 x double] zeroinitializer, align 16
@input1_f16 = global [9 x half] zeroinitializer, align 16
@input2_f16 = global [9 x half] zeroinitializer, align 16
@output_f16 = global [9 x half] zeroinitializer, align 16
define void @fmin32() {
; CHECK-LABEL: define void @fmin32(
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr @input1_f32, align 16
; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr @input2_f32, align 16
; CHECK-NEXT: [[TMP2:%.*]] = tail call float @llvm.minimumnum.f32(float [[TMP0]], float [[TMP1]])
; CHECK-NEXT: store float [[TMP2]], ptr @output_f32, align 16
; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 4), align 4
; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 4), align 4
; CHECK-NEXT: [[TMP5:%.*]] = tail call float @llvm.minimumnum.f32(float [[TMP3]], float [[TMP4]])
; CHECK-NEXT: store float [[TMP5]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 4), align 4
; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 8), align 8
; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 8), align 8
; CHECK-NEXT: [[TMP8:%.*]] = tail call float @llvm.minimumnum.f32(float [[TMP6]], float [[TMP7]])
; CHECK-NEXT: store float [[TMP8]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 8), align 8
; CHECK-NEXT: [[TMP9:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 12), align 4
; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 12), align 4
; CHECK-NEXT: [[TMP11:%.*]] = tail call float @llvm.minimumnum.f32(float [[TMP9]], float [[TMP10]])
; CHECK-NEXT: store float [[TMP11]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 12), align 4
; CHECK-NEXT: [[TMP12:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 16), align 16
; CHECK-NEXT: [[TMP13:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 16), align 16
; CHECK-NEXT: [[TMP14:%.*]] = tail call float @llvm.minimumnum.f32(float [[TMP12]], float [[TMP13]])
; CHECK-NEXT: store float [[TMP14]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 16), align 16
; CHECK-NEXT: [[TMP15:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 20), align 4
; CHECK-NEXT: [[TMP16:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 20), align 4
; CHECK-NEXT: [[TMP17:%.*]] = tail call float @llvm.minimumnum.f32(float [[TMP15]], float [[TMP16]])
; CHECK-NEXT: store float [[TMP17]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 20), align 4
; CHECK-NEXT: [[TMP18:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 24), align 8
; CHECK-NEXT: [[TMP19:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 24), align 8
; CHECK-NEXT: [[TMP20:%.*]] = tail call float @llvm.minimumnum.f32(float [[TMP18]], float [[TMP19]])
; CHECK-NEXT: store float [[TMP20]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 24), align 8
; CHECK-NEXT: [[TMP21:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 28), align 4
; CHECK-NEXT: [[TMP22:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 28), align 4
; CHECK-NEXT: [[TMP23:%.*]] = tail call float @llvm.minimumnum.f32(float [[TMP21]], float [[TMP22]])
; CHECK-NEXT: store float [[TMP23]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 28), align 4
; CHECK-NEXT: [[TMP24:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 32), align 16
; CHECK-NEXT: [[TMP25:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 32), align 16
; CHECK-NEXT: [[TMP26:%.*]] = tail call float @llvm.minimumnum.f32(float [[TMP24]], float [[TMP25]])
; CHECK-NEXT: store float [[TMP26]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 32), align 16
; CHECK-NEXT: ret void
;
entry:
%input0_0 = load float, ptr @input1_f32, align 16
%input0_1 = load float, ptr @input2_f32, align 16
%output0 = tail call float @llvm.minimumnum.f32(float %input0_0, float %input0_1)
store float %output0, ptr @output_f32, align 16
%input1_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 4), align 4
%input1_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 4), align 4
%output1 = tail call float @llvm.minimumnum.f32(float %input1_1, float %input1_2)
store float %output1, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 4), align 4
%input2_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 8), align 8
%input2_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 8), align 8
%output2 = tail call float @llvm.minimumnum.f32(float %input2_1, float %input2_2)
store float %output2, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 8), align 8
%input3_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 12), align 4
%input3_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 12), align 4
%output3 = tail call float @llvm.minimumnum.f32(float %input3_1, float %input3_2)
store float %output3, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 12), align 4
%input4_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 16), align 16
%input4_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 16), align 16
%output4 = tail call float @llvm.minimumnum.f32(float %input4_1, float %input4_2)
store float %output4, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 16), align 16
%input5_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 20), align 4
%input5_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 20), align 4
%output5 = tail call float @llvm.minimumnum.f32(float %input5_1, float %input5_2)
store float %output5, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 20), align 4
%input6_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 24), align 8
%input6_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 24), align 8
%output6 = tail call float @llvm.minimumnum.f32(float %input6_1, float %input6_2)
store float %output6, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 24), align 8
%input7_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 28), align 4
%input7_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 28), align 4
%output7 = tail call float @llvm.minimumnum.f32(float %input7_1, float %input7_2)
store float %output7, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 28), align 4
%input8_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 32), align 16
%input8_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 32), align 16
%output8 = tail call float @llvm.minimumnum.f32(float %input8_1, float %input8_2)
store float %output8, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 32), align 16
ret void
}
declare float @llvm.minimumnum.f32(float, float)
define void @fmax32() {
; CHECK-LABEL: define void @fmax32(
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr @input1_f32, align 16
; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr @input2_f32, align 16
; CHECK-NEXT: [[TMP2:%.*]] = tail call float @llvm.maximumnum.f32(float [[TMP0]], float [[TMP1]])
; CHECK-NEXT: store float [[TMP2]], ptr @output_f32, align 16
; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 4), align 4
; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 4), align 4
; CHECK-NEXT: [[TMP5:%.*]] = tail call float @llvm.maximumnum.f32(float [[TMP3]], float [[TMP4]])
; CHECK-NEXT: store float [[TMP5]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 4), align 4
; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 8), align 8
; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 8), align 8
; CHECK-NEXT: [[TMP8:%.*]] = tail call float @llvm.maximumnum.f32(float [[TMP6]], float [[TMP7]])
; CHECK-NEXT: store float [[TMP8]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 8), align 8
; CHECK-NEXT: [[TMP9:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 12), align 4
; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 12), align 4
; CHECK-NEXT: [[TMP11:%.*]] = tail call float @llvm.maximumnum.f32(float [[TMP9]], float [[TMP10]])
; CHECK-NEXT: store float [[TMP11]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 12), align 4
; CHECK-NEXT: [[TMP12:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 16), align 16
; CHECK-NEXT: [[TMP13:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 16), align 16
; CHECK-NEXT: [[TMP14:%.*]] = tail call float @llvm.maximumnum.f32(float [[TMP12]], float [[TMP13]])
; CHECK-NEXT: store float [[TMP14]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 16), align 16
; CHECK-NEXT: [[TMP15:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 20), align 4
; CHECK-NEXT: [[TMP16:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 20), align 4
; CHECK-NEXT: [[TMP17:%.*]] = tail call float @llvm.maximumnum.f32(float [[TMP15]], float [[TMP16]])
; CHECK-NEXT: store float [[TMP17]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 20), align 4
; CHECK-NEXT: [[TMP18:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 24), align 8
; CHECK-NEXT: [[TMP19:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 24), align 8
; CHECK-NEXT: [[TMP20:%.*]] = tail call float @llvm.maximumnum.f32(float [[TMP18]], float [[TMP19]])
; CHECK-NEXT: store float [[TMP20]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 24), align 8
; CHECK-NEXT: [[TMP21:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 28), align 4
; CHECK-NEXT: [[TMP22:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 28), align 4
; CHECK-NEXT: [[TMP23:%.*]] = tail call float @llvm.maximumnum.f32(float [[TMP21]], float [[TMP22]])
; CHECK-NEXT: store float [[TMP23]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 28), align 4
; CHECK-NEXT: [[TMP24:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 32), align 16
; CHECK-NEXT: [[TMP25:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 32), align 16
; CHECK-NEXT: [[TMP26:%.*]] = tail call float @llvm.maximumnum.f32(float [[TMP24]], float [[TMP25]])
; CHECK-NEXT: store float [[TMP26]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 32), align 16
; CHECK-NEXT: ret void
;
entry:
%input0_0 = load float, ptr @input1_f32, align 16
%input0_1 = load float, ptr @input2_f32, align 16
%output0 = tail call float @llvm.maximumnum.f32(float %input0_0, float %input0_1)
store float %output0, ptr @output_f32, align 16
%input1_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 4), align 4
%input1_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 4), align 4
%output1 = tail call float @llvm.maximumnum.f32(float %input1_1, float %input1_2)
store float %output1, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 4), align 4
%input2_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 8), align 8
%input2_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 8), align 8
%output2 = tail call float @llvm.maximumnum.f32(float %input2_1, float %input2_2)
store float %output2, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 8), align 8
%input3_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 12), align 4
%input3_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 12), align 4
%output3 = tail call float @llvm.maximumnum.f32(float %input3_1, float %input3_2)
store float %output3, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 12), align 4
%input4_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 16), align 16
%input4_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 16), align 16
%output4 = tail call float @llvm.maximumnum.f32(float %input4_1, float %input4_2)
store float %output4, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 16), align 16
%input5_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 20), align 4
%input5_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 20), align 4
%output5 = tail call float @llvm.maximumnum.f32(float %input5_1, float %input5_2)
store float %output5, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 20), align 4
%input6_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 24), align 8
%input6_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 24), align 8
%output6 = tail call float @llvm.maximumnum.f32(float %input6_1, float %input6_2)
store float %output6, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 24), align 8
%input7_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 28), align 4
%input7_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 28), align 4
%output7 = tail call float @llvm.maximumnum.f32(float %input7_1, float %input7_2)
store float %output7, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 28), align 4
%input8_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 32), align 16
%input8_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 32), align 16
%output8 = tail call float @llvm.maximumnum.f32(float %input8_1, float %input8_2)
store float %output8, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 32), align 16
ret void
}
declare float @llvm.maximumnum.f32(float, float)
define void @fmin64() {
; CHECK-LABEL: define void @fmin64(
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr @input1_f64, align 16
; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr @input2_f64, align 16
; CHECK-NEXT: [[TMP2:%.*]] = tail call double @llvm.minimumnum.f64(double [[TMP0]], double [[TMP1]])
; CHECK-NEXT: store double [[TMP2]], ptr @output_f64, align 16
; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 8), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 8), align 8
; CHECK-NEXT: [[TMP5:%.*]] = tail call double @llvm.minimumnum.f64(double [[TMP3]], double [[TMP4]])
; CHECK-NEXT: store double [[TMP5]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 8), align 8
; CHECK-NEXT: [[TMP6:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 16), align 16
; CHECK-NEXT: [[TMP7:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 16), align 16
; CHECK-NEXT: [[TMP8:%.*]] = tail call double @llvm.minimumnum.f64(double [[TMP6]], double [[TMP7]])
; CHECK-NEXT: store double [[TMP8]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 16), align 16
; CHECK-NEXT: [[TMP9:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 24), align 8
; CHECK-NEXT: [[TMP10:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 24), align 8
; CHECK-NEXT: [[TMP11:%.*]] = tail call double @llvm.minimumnum.f64(double [[TMP9]], double [[TMP10]])
; CHECK-NEXT: store double [[TMP11]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 24), align 8
; CHECK-NEXT: [[TMP12:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 32), align 16
; CHECK-NEXT: [[TMP13:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 32), align 16
; CHECK-NEXT: [[TMP14:%.*]] = tail call double @llvm.minimumnum.f64(double [[TMP12]], double [[TMP13]])
; CHECK-NEXT: store double [[TMP14]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 32), align 16
; CHECK-NEXT: [[TMP15:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 40), align 8
; CHECK-NEXT: [[TMP16:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 40), align 8
; CHECK-NEXT: [[TMP17:%.*]] = tail call double @llvm.minimumnum.f64(double [[TMP15]], double [[TMP16]])
; CHECK-NEXT: store double [[TMP17]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 40), align 8
; CHECK-NEXT: [[TMP18:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 48), align 16
; CHECK-NEXT: [[TMP19:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 48), align 16
; CHECK-NEXT: [[TMP20:%.*]] = tail call double @llvm.minimumnum.f64(double [[TMP18]], double [[TMP19]])
; CHECK-NEXT: store double [[TMP20]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 48), align 16
; CHECK-NEXT: [[TMP21:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 56), align 8
; CHECK-NEXT: [[TMP22:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 56), align 8
; CHECK-NEXT: [[TMP23:%.*]] = tail call double @llvm.minimumnum.f64(double [[TMP21]], double [[TMP22]])
; CHECK-NEXT: store double [[TMP23]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 56), align 8
; CHECK-NEXT: [[TMP24:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 64), align 16
; CHECK-NEXT: [[TMP25:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 64), align 16
; CHECK-NEXT: [[TMP26:%.*]] = tail call double @llvm.minimumnum.f64(double [[TMP24]], double [[TMP25]])
; CHECK-NEXT: store double [[TMP26]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 64), align 16
; CHECK-NEXT: ret void
;
entry:
%input0_0 = load double, ptr @input1_f64, align 16
%input0_1 = load double, ptr @input2_f64, align 16
%output0 = tail call double @llvm.minimumnum.f64(double %input0_0, double %input0_1)
store double %output0, ptr @output_f64, align 16
%input1_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 8), align 8
%input1_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 8), align 8
%output1 = tail call double @llvm.minimumnum.f64(double %input1_1, double %input1_2)
store double %output1, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 8), align 8
%input2_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 16), align 16
%input2_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 16), align 16
%output2 = tail call double @llvm.minimumnum.f64(double %input2_1, double %input2_2)
store double %output2, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 16), align 16
%input3_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 24), align 8
%input3_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 24), align 8
%output3 = tail call double @llvm.minimumnum.f64(double %input3_1, double %input3_2)
store double %output3, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 24), align 8
%input4_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 32), align 16
%input4_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 32), align 16
%output4 = tail call double @llvm.minimumnum.f64(double %input4_1, double %input4_2)
store double %output4, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 32), align 16
%input5_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 40), align 8
%input5_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 40), align 8
%output5 = tail call double @llvm.minimumnum.f64(double %input5_1, double %input5_2)
store double %output5, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 40), align 8
%input6_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 48), align 16
%input6_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 48), align 16
%output6 = tail call double @llvm.minimumnum.f64(double %input6_1, double %input6_2)
store double %output6, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 48), align 16
%input7_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 56), align 8
%input7_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 56), align 8
%output7 = tail call double @llvm.minimumnum.f64(double %input7_1, double %input7_2)
store double %output7, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 56), align 8
%input8_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 64), align 16
%input8_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 64), align 16
%output8 = tail call double @llvm.minimumnum.f64(double %input8_1, double %input8_2)
store double %output8, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 64), align 16
ret void
}
declare double @llvm.minimumnum.f64(double, double)
define void @fmax64() {
; CHECK-LABEL: define void @fmax64(
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr @input1_f64, align 16
; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr @input2_f64, align 16
; CHECK-NEXT: [[TMP2:%.*]] = tail call double @llvm.maximumnum.f64(double [[TMP0]], double [[TMP1]])
; CHECK-NEXT: store double [[TMP2]], ptr @output_f64, align 16
; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 8), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 8), align 8
; CHECK-NEXT: [[TMP5:%.*]] = tail call double @llvm.maximumnum.f64(double [[TMP3]], double [[TMP4]])
; CHECK-NEXT: store double [[TMP5]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 8), align 8
; CHECK-NEXT: [[TMP6:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 16), align 16
; CHECK-NEXT: [[TMP7:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 16), align 16
; CHECK-NEXT: [[TMP8:%.*]] = tail call double @llvm.maximumnum.f64(double [[TMP6]], double [[TMP7]])
; CHECK-NEXT: store double [[TMP8]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 16), align 16
; CHECK-NEXT: [[TMP9:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 24), align 8
; CHECK-NEXT: [[TMP10:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 24), align 8
; CHECK-NEXT: [[TMP11:%.*]] = tail call double @llvm.maximumnum.f64(double [[TMP9]], double [[TMP10]])
; CHECK-NEXT: store double [[TMP11]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 24), align 8
; CHECK-NEXT: [[TMP12:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 32), align 16
; CHECK-NEXT: [[TMP13:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 32), align 16
; CHECK-NEXT: [[TMP14:%.*]] = tail call double @llvm.maximumnum.f64(double [[TMP12]], double [[TMP13]])
; CHECK-NEXT: store double [[TMP14]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 32), align 16
; CHECK-NEXT: [[TMP15:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 40), align 8
; CHECK-NEXT: [[TMP16:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 40), align 8
; CHECK-NEXT: [[TMP17:%.*]] = tail call double @llvm.maximumnum.f64(double [[TMP15]], double [[TMP16]])
; CHECK-NEXT: store double [[TMP17]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 40), align 8
; CHECK-NEXT: [[TMP18:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 48), align 16
; CHECK-NEXT: [[TMP19:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 48), align 16
; CHECK-NEXT: [[TMP20:%.*]] = tail call double @llvm.maximumnum.f64(double [[TMP18]], double [[TMP19]])
; CHECK-NEXT: store double [[TMP20]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 48), align 16
; CHECK-NEXT: [[TMP21:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 56), align 8
; CHECK-NEXT: [[TMP22:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 56), align 8
; CHECK-NEXT: [[TMP23:%.*]] = tail call double @llvm.maximumnum.f64(double [[TMP21]], double [[TMP22]])
; CHECK-NEXT: store double [[TMP23]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 56), align 8
; CHECK-NEXT: [[TMP24:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 64), align 16
; CHECK-NEXT: [[TMP25:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 64), align 16
; CHECK-NEXT: [[TMP26:%.*]] = tail call double @llvm.maximumnum.f64(double [[TMP24]], double [[TMP25]])
; CHECK-NEXT: store double [[TMP26]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 64), align 16
; CHECK-NEXT: ret void
;
entry:
%input0_0 = load double, ptr @input1_f64, align 16
%input0_1 = load double, ptr @input2_f64, align 16
%output0 = tail call double @llvm.maximumnum.f64(double %input0_0, double %input0_1)
store double %output0, ptr @output_f64, align 16
%input1_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 8), align 8
%input1_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 8), align 8
%output1 = tail call double @llvm.maximumnum.f64(double %input1_1, double %input1_2)
store double %output1, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 8), align 8
%input2_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 16), align 16
%input2_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 16), align 16
%output2 = tail call double @llvm.maximumnum.f64(double %input2_1, double %input2_2)
store double %output2, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 16), align 16
%input3_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 24), align 8
%input3_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 24), align 8
%output3 = tail call double @llvm.maximumnum.f64(double %input3_1, double %input3_2)
store double %output3, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 24), align 8
%input4_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 32), align 16
%input4_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 32), align 16
%output4 = tail call double @llvm.maximumnum.f64(double %input4_1, double %input4_2)
store double %output4, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 32), align 16
%input5_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 40), align 8
%input5_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 40), align 8
%output5 = tail call double @llvm.maximumnum.f64(double %input5_1, double %input5_2)
store double %output5, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 40), align 8
%input6_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 48), align 16
%input6_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 48), align 16
%output6 = tail call double @llvm.maximumnum.f64(double %input6_1, double %input6_2)
store double %output6, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 48), align 16
%input7_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 56), align 8
%input7_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 56), align 8
%output7 = tail call double @llvm.maximumnum.f64(double %input7_1, double %input7_2)
store double %output7, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 56), align 8
%input8_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 64), align 16
%input8_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 64), align 16
%output8 = tail call double @llvm.maximumnum.f64(double %input8_1, double %input8_2)
store double %output8, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 64), align 16
ret void
}
declare double @llvm.maximumnum.f64(double, double)
define void @fmin16() {
; CHECK-LABEL: define void @fmin16(
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load half, ptr @input1_f16, align 16
; CHECK-NEXT: [[TMP1:%.*]] = load half, ptr @input2_f16, align 16
; CHECK-NEXT: [[TMP2:%.*]] = tail call half @llvm.minimumnum.f16(half [[TMP0]], half [[TMP1]])
; CHECK-NEXT: store half [[TMP2]], ptr @output_f16, align 16
; CHECK-NEXT: [[TMP3:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 2), align 2
; CHECK-NEXT: [[TMP4:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 2), align 2
; CHECK-NEXT: [[TMP5:%.*]] = tail call half @llvm.minimumnum.f16(half [[TMP3]], half [[TMP4]])
; CHECK-NEXT: store half [[TMP5]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 2), align 2
; CHECK-NEXT: [[TMP6:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 4), align 4
; CHECK-NEXT: [[TMP7:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 4), align 4
; CHECK-NEXT: [[TMP8:%.*]] = tail call half @llvm.minimumnum.f16(half [[TMP6]], half [[TMP7]])
; CHECK-NEXT: store half [[TMP8]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 4), align 4
; CHECK-NEXT: [[TMP9:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 6), align 2
; CHECK-NEXT: [[TMP10:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 6), align 2
; CHECK-NEXT: [[TMP11:%.*]] = tail call half @llvm.minimumnum.f16(half [[TMP9]], half [[TMP10]])
; CHECK-NEXT: store half [[TMP11]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 6), align 2
; CHECK-NEXT: [[TMP12:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 8), align 8
; CHECK-NEXT: [[TMP13:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 8), align 8
; CHECK-NEXT: [[TMP14:%.*]] = tail call half @llvm.minimumnum.f16(half [[TMP12]], half [[TMP13]])
; CHECK-NEXT: store half [[TMP14]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 8), align 8
; CHECK-NEXT: [[TMP15:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 10), align 2
; CHECK-NEXT: [[TMP16:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 10), align 2
; CHECK-NEXT: [[TMP17:%.*]] = tail call half @llvm.minimumnum.f16(half [[TMP15]], half [[TMP16]])
; CHECK-NEXT: store half [[TMP17]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 10), align 2
; CHECK-NEXT: [[TMP18:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 12), align 4
; CHECK-NEXT: [[TMP19:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 12), align 4
; CHECK-NEXT: [[TMP20:%.*]] = tail call half @llvm.minimumnum.f16(half [[TMP18]], half [[TMP19]])
; CHECK-NEXT: store half [[TMP20]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 12), align 4
; CHECK-NEXT: [[TMP21:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 14), align 2
; CHECK-NEXT: [[TMP22:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 14), align 2
; CHECK-NEXT: [[TMP23:%.*]] = tail call half @llvm.minimumnum.f16(half [[TMP21]], half [[TMP22]])
; CHECK-NEXT: store half [[TMP23]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 14), align 2
; CHECK-NEXT: [[TMP24:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 16), align 16
; CHECK-NEXT: [[TMP25:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 16), align 16
; CHECK-NEXT: [[TMP26:%.*]] = tail call half @llvm.minimumnum.f16(half [[TMP24]], half [[TMP25]])
; CHECK-NEXT: store half [[TMP26]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 16), align 16
; CHECK-NEXT: ret void
;
entry:
%input0_0 = load half, ptr @input1_f16, align 16
%input0_1 = load half, ptr @input2_f16, align 16
%output0 = tail call half @llvm.minimumnum.f16(half %input0_0, half %input0_1)
store half %output0, ptr @output_f16, align 16
%input1_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 2), align 2
%input1_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 2), align 2
%output1 = tail call half @llvm.minimumnum.f16(half %input1_1, half %input1_2)
store half %output1, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 2), align 2
%input2_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 4), align 4
%input2_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 4), align 4
%output2 = tail call half @llvm.minimumnum.f16(half %input2_1, half %input2_2)
store half %output2, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 4), align 4
%input3_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 6), align 2
%input3_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 6), align 2
%output3 = tail call half @llvm.minimumnum.f16(half %input3_1, half %input3_2)
store half %output3, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 6), align 2
%input4_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 8), align 8
%input4_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 8), align 8
%output4 = tail call half @llvm.minimumnum.f16(half %input4_1, half %input4_2)
store half %output4, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 8), align 8
%input5_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 10), align 2
%input5_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 10), align 2
%output5 = tail call half @llvm.minimumnum.f16(half %input5_1, half %input5_2)
store half %output5, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 10), align 2
%input6_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 12), align 4
%input6_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 12), align 4
%output6 = tail call half @llvm.minimumnum.f16(half %input6_1, half %input6_2)
store half %output6, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 12), align 4
%input7_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 14), align 2
%input7_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 14), align 2
%output7 = tail call half @llvm.minimumnum.f16(half %input7_1, half %input7_2)
store half %output7, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 14), align 2
%input8_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 16), align 16
%input8_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 16), align 16
%output8 = tail call half @llvm.minimumnum.f16(half %input8_1, half %input8_2)
store half %output8, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 16), align 16
ret void
}
declare half @llvm.minimumnum.f16(half, half)
define void @fmax16() {
; CHECK-LABEL: define void @fmax16(
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load half, ptr @input1_f16, align 16
; CHECK-NEXT: [[TMP1:%.*]] = load half, ptr @input2_f16, align 16
; CHECK-NEXT: [[TMP2:%.*]] = tail call half @llvm.maximumnum.f16(half [[TMP0]], half [[TMP1]])
; CHECK-NEXT: store half [[TMP2]], ptr @output_f16, align 16
; CHECK-NEXT: [[TMP3:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 2), align 2
; CHECK-NEXT: [[TMP4:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 2), align 2
; CHECK-NEXT: [[TMP5:%.*]] = tail call half @llvm.maximumnum.f16(half [[TMP3]], half [[TMP4]])
; CHECK-NEXT: store half [[TMP5]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 2), align 2
; CHECK-NEXT: [[TMP6:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 4), align 4
; CHECK-NEXT: [[TMP7:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 4), align 4
; CHECK-NEXT: [[TMP8:%.*]] = tail call half @llvm.maximumnum.f16(half [[TMP6]], half [[TMP7]])
; CHECK-NEXT: store half [[TMP8]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 4), align 4
; CHECK-NEXT: [[TMP9:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 6), align 2
; CHECK-NEXT: [[TMP10:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 6), align 2
; CHECK-NEXT: [[TMP11:%.*]] = tail call half @llvm.maximumnum.f16(half [[TMP9]], half [[TMP10]])
; CHECK-NEXT: store half [[TMP11]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 6), align 2
; CHECK-NEXT: [[TMP12:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 8), align 8
; CHECK-NEXT: [[TMP13:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 8), align 8
; CHECK-NEXT: [[TMP14:%.*]] = tail call half @llvm.maximumnum.f16(half [[TMP12]], half [[TMP13]])
; CHECK-NEXT: store half [[TMP14]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 8), align 8
; CHECK-NEXT: [[TMP15:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 10), align 2
; CHECK-NEXT: [[TMP16:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 10), align 2
; CHECK-NEXT: [[TMP17:%.*]] = tail call half @llvm.maximumnum.f16(half [[TMP15]], half [[TMP16]])
; CHECK-NEXT: store half [[TMP17]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 10), align 2
; CHECK-NEXT: [[TMP18:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 12), align 4
; CHECK-NEXT: [[TMP19:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 12), align 4
; CHECK-NEXT: [[TMP20:%.*]] = tail call half @llvm.maximumnum.f16(half [[TMP18]], half [[TMP19]])
; CHECK-NEXT: store half [[TMP20]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 12), align 4
; CHECK-NEXT: [[TMP21:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 14), align 2
; CHECK-NEXT: [[TMP22:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 14), align 2
; CHECK-NEXT: [[TMP23:%.*]] = tail call half @llvm.maximumnum.f16(half [[TMP21]], half [[TMP22]])
; CHECK-NEXT: store half [[TMP23]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 14), align 2
; CHECK-NEXT: [[TMP24:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 16), align 16
; CHECK-NEXT: [[TMP25:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 16), align 16
; CHECK-NEXT: [[TMP26:%.*]] = tail call half @llvm.maximumnum.f16(half [[TMP24]], half [[TMP25]])
; CHECK-NEXT: store half [[TMP26]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 16), align 16
; CHECK-NEXT: ret void
;
entry:
%input0_0 = load half, ptr @input1_f16, align 16
%input0_1 = load half, ptr @input2_f16, align 16
%output0 = tail call half @llvm.maximumnum.f16(half %input0_0, half %input0_1)
store half %output0, ptr @output_f16, align 16
%input1_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 2), align 2
%input1_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 2), align 2
%output1 = tail call half @llvm.maximumnum.f16(half %input1_1, half %input1_2)
store half %output1, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 2), align 2
%input2_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 4), align 4
%input2_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 4), align 4
%output2 = tail call half @llvm.maximumnum.f16(half %input2_1, half %input2_2)
store half %output2, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 4), align 4
%input3_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 6), align 2
%input3_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 6), align 2
%output3 = tail call half @llvm.maximumnum.f16(half %input3_1, half %input3_2)
store half %output3, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 6), align 2
%input4_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 8), align 8
%input4_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 8), align 8
%output4 = tail call half @llvm.maximumnum.f16(half %input4_1, half %input4_2)
store half %output4, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 8), align 8
%input5_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 10), align 2
%input5_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 10), align 2
%output5 = tail call half @llvm.maximumnum.f16(half %input5_1, half %input5_2)
store half %output5, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 10), align 2
%input6_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 12), align 4
%input6_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 12), align 4
%output6 = tail call half @llvm.maximumnum.f16(half %input6_1, half %input6_2)
store half %output6, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 12), align 4
%input7_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 14), align 2
%input7_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 14), align 2
%output7 = tail call half @llvm.maximumnum.f16(half %input7_1, half %input7_2)
store half %output7, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 14), align 2
%input8_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 16), align 16
%input8_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 16), align 16
%output8 = tail call half @llvm.maximumnum.f16(half %input8_1, half %input8_2)
store half %output8, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 16), align 16
ret void
}
declare half @llvm.maximumnum.f16(half, half)

View File

@@ -0,0 +1,510 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt --passes=slp-vectorizer --mtriple=x86_64 -S < %s | FileCheck %s
@input1_f32 = global [9 x float] zeroinitializer, align 16
@input2_f32 = global [9 x float] zeroinitializer, align 16
@output_f32 = global [9 x float] zeroinitializer, align 16
@input1_f64 = global [9 x double] zeroinitializer, align 16
@input2_f64 = global [9 x double] zeroinitializer, align 16
@output_f64 = global [9 x double] zeroinitializer, align 16
@input1_f16 = global [9 x half] zeroinitializer, align 16
@input2_f16 = global [9 x half] zeroinitializer, align 16
@output_f16 = global [9 x half] zeroinitializer, align 16
define void @fmin32() {
; CHECK-LABEL: define void @fmin32() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr @input1_f32, align 16
; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr @input2_f32, align 16
; CHECK-NEXT: [[TMP2:%.*]] = tail call float @llvm.minimumnum.f32(float [[TMP0]], float [[TMP1]])
; CHECK-NEXT: store float [[TMP2]], ptr @output_f32, align 16
; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 4), align 4
; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 4), align 4
; CHECK-NEXT: [[TMP5:%.*]] = tail call float @llvm.minimumnum.f32(float [[TMP3]], float [[TMP4]])
; CHECK-NEXT: store float [[TMP5]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 4), align 4
; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 8), align 8
; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 8), align 8
; CHECK-NEXT: [[TMP8:%.*]] = tail call float @llvm.minimumnum.f32(float [[TMP6]], float [[TMP7]])
; CHECK-NEXT: store float [[TMP8]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 8), align 8
; CHECK-NEXT: [[TMP9:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 12), align 4
; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 12), align 4
; CHECK-NEXT: [[TMP11:%.*]] = tail call float @llvm.minimumnum.f32(float [[TMP9]], float [[TMP10]])
; CHECK-NEXT: store float [[TMP11]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 12), align 4
; CHECK-NEXT: [[TMP12:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 16), align 16
; CHECK-NEXT: [[TMP13:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 16), align 16
; CHECK-NEXT: [[TMP14:%.*]] = tail call float @llvm.minimumnum.f32(float [[TMP12]], float [[TMP13]])
; CHECK-NEXT: store float [[TMP14]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 16), align 16
; CHECK-NEXT: [[TMP15:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 20), align 4
; CHECK-NEXT: [[TMP16:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 20), align 4
; CHECK-NEXT: [[TMP17:%.*]] = tail call float @llvm.minimumnum.f32(float [[TMP15]], float [[TMP16]])
; CHECK-NEXT: store float [[TMP17]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 20), align 4
; CHECK-NEXT: [[TMP18:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 24), align 8
; CHECK-NEXT: [[TMP19:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 24), align 8
; CHECK-NEXT: [[TMP20:%.*]] = tail call float @llvm.minimumnum.f32(float [[TMP18]], float [[TMP19]])
; CHECK-NEXT: store float [[TMP20]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 24), align 8
; CHECK-NEXT: [[TMP21:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 28), align 4
; CHECK-NEXT: [[TMP22:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 28), align 4
; CHECK-NEXT: [[TMP23:%.*]] = tail call float @llvm.minimumnum.f32(float [[TMP21]], float [[TMP22]])
; CHECK-NEXT: store float [[TMP23]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 28), align 4
; CHECK-NEXT: [[TMP24:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 32), align 16
; CHECK-NEXT: [[TMP25:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 32), align 16
; CHECK-NEXT: [[TMP26:%.*]] = tail call float @llvm.minimumnum.f32(float [[TMP24]], float [[TMP25]])
; CHECK-NEXT: store float [[TMP26]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 32), align 16
; CHECK-NEXT: ret void
;
entry:
%input0_0 = load float, ptr @input1_f32, align 16
%input0_1 = load float, ptr @input2_f32, align 16
%output0 = tail call float @llvm.minimumnum.f32(float %input0_0, float %input0_1)
store float %output0, ptr @output_f32, align 16
%input1_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 4), align 4
%input1_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 4), align 4
%output1 = tail call float @llvm.minimumnum.f32(float %input1_1, float %input1_2)
store float %output1, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 4), align 4
%input2_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 8), align 8
%input2_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 8), align 8
%output2 = tail call float @llvm.minimumnum.f32(float %input2_1, float %input2_2)
store float %output2, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 8), align 8
%input3_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 12), align 4
%input3_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 12), align 4
%output3 = tail call float @llvm.minimumnum.f32(float %input3_1, float %input3_2)
store float %output3, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 12), align 4
%input4_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 16), align 16
%input4_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 16), align 16
%output4 = tail call float @llvm.minimumnum.f32(float %input4_1, float %input4_2)
store float %output4, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 16), align 16
%input5_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 20), align 4
%input5_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 20), align 4
%output5 = tail call float @llvm.minimumnum.f32(float %input5_1, float %input5_2)
store float %output5, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 20), align 4
%input6_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 24), align 8
%input6_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 24), align 8
%output6 = tail call float @llvm.minimumnum.f32(float %input6_1, float %input6_2)
store float %output6, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 24), align 8
%input7_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 28), align 4
%input7_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 28), align 4
%output7 = tail call float @llvm.minimumnum.f32(float %input7_1, float %input7_2)
store float %output7, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 28), align 4
%input8_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 32), align 16
%input8_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 32), align 16
%output8 = tail call float @llvm.minimumnum.f32(float %input8_1, float %input8_2)
store float %output8, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 32), align 16
ret void
}
declare float @llvm.minimumnum.f32(float, float)
define void @fmax32() {
; CHECK-LABEL: define void @fmax32() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr @input1_f32, align 16
; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr @input2_f32, align 16
; CHECK-NEXT: [[TMP2:%.*]] = tail call float @llvm.maximumnum.f32(float [[TMP0]], float [[TMP1]])
; CHECK-NEXT: store float [[TMP2]], ptr @output_f32, align 16
; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 4), align 4
; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 4), align 4
; CHECK-NEXT: [[TMP5:%.*]] = tail call float @llvm.maximumnum.f32(float [[TMP3]], float [[TMP4]])
; CHECK-NEXT: store float [[TMP5]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 4), align 4
; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 8), align 8
; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 8), align 8
; CHECK-NEXT: [[TMP8:%.*]] = tail call float @llvm.maximumnum.f32(float [[TMP6]], float [[TMP7]])
; CHECK-NEXT: store float [[TMP8]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 8), align 8
; CHECK-NEXT: [[TMP9:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 12), align 4
; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 12), align 4
; CHECK-NEXT: [[TMP11:%.*]] = tail call float @llvm.maximumnum.f32(float [[TMP9]], float [[TMP10]])
; CHECK-NEXT: store float [[TMP11]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 12), align 4
; CHECK-NEXT: [[TMP12:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 16), align 16
; CHECK-NEXT: [[TMP13:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 16), align 16
; CHECK-NEXT: [[TMP14:%.*]] = tail call float @llvm.maximumnum.f32(float [[TMP12]], float [[TMP13]])
; CHECK-NEXT: store float [[TMP14]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 16), align 16
; CHECK-NEXT: [[TMP15:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 20), align 4
; CHECK-NEXT: [[TMP16:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 20), align 4
; CHECK-NEXT: [[TMP17:%.*]] = tail call float @llvm.maximumnum.f32(float [[TMP15]], float [[TMP16]])
; CHECK-NEXT: store float [[TMP17]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 20), align 4
; CHECK-NEXT: [[TMP18:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 24), align 8
; CHECK-NEXT: [[TMP19:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 24), align 8
; CHECK-NEXT: [[TMP20:%.*]] = tail call float @llvm.maximumnum.f32(float [[TMP18]], float [[TMP19]])
; CHECK-NEXT: store float [[TMP20]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 24), align 8
; CHECK-NEXT: [[TMP21:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 28), align 4
; CHECK-NEXT: [[TMP22:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 28), align 4
; CHECK-NEXT: [[TMP23:%.*]] = tail call float @llvm.maximumnum.f32(float [[TMP21]], float [[TMP22]])
; CHECK-NEXT: store float [[TMP23]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 28), align 4
; CHECK-NEXT: [[TMP24:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 32), align 16
; CHECK-NEXT: [[TMP25:%.*]] = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 32), align 16
; CHECK-NEXT: [[TMP26:%.*]] = tail call float @llvm.maximumnum.f32(float [[TMP24]], float [[TMP25]])
; CHECK-NEXT: store float [[TMP26]], ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 32), align 16
; CHECK-NEXT: ret void
;
entry:
%input0_0 = load float, ptr @input1_f32, align 16
%input0_1 = load float, ptr @input2_f32, align 16
%output0 = tail call float @llvm.maximumnum.f32(float %input0_0, float %input0_1)
store float %output0, ptr @output_f32, align 16
%input1_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 4), align 4
%input1_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 4), align 4
%output1 = tail call float @llvm.maximumnum.f32(float %input1_1, float %input1_2)
store float %output1, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 4), align 4
%input2_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 8), align 8
%input2_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 8), align 8
%output2 = tail call float @llvm.maximumnum.f32(float %input2_1, float %input2_2)
store float %output2, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 8), align 8
%input3_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 12), align 4
%input3_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 12), align 4
%output3 = tail call float @llvm.maximumnum.f32(float %input3_1, float %input3_2)
store float %output3, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 12), align 4
%input4_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 16), align 16
%input4_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 16), align 16
%output4 = tail call float @llvm.maximumnum.f32(float %input4_1, float %input4_2)
store float %output4, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 16), align 16
%input5_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 20), align 4
%input5_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 20), align 4
%output5 = tail call float @llvm.maximumnum.f32(float %input5_1, float %input5_2)
store float %output5, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 20), align 4
%input6_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 24), align 8
%input6_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 24), align 8
%output6 = tail call float @llvm.maximumnum.f32(float %input6_1, float %input6_2)
store float %output6, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 24), align 8
%input7_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 28), align 4
%input7_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 28), align 4
%output7 = tail call float @llvm.maximumnum.f32(float %input7_1, float %input7_2)
store float %output7, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 28), align 4
%input8_1 = load float, ptr getelementptr inbounds nuw (i8, ptr @input1_f32, i64 32), align 16
%input8_2 = load float, ptr getelementptr inbounds nuw (i8, ptr @input2_f32, i64 32), align 16
%output8 = tail call float @llvm.maximumnum.f32(float %input8_1, float %input8_2)
store float %output8, ptr getelementptr inbounds nuw (i8, ptr @output_f32, i64 32), align 16
ret void
}
declare float @llvm.maximumnum.f32(float, float)
define void @fmin64() {
; CHECK-LABEL: define void @fmin64() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr @input1_f64, align 16
; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr @input2_f64, align 16
; CHECK-NEXT: [[TMP2:%.*]] = tail call double @llvm.minimumnum.f64(double [[TMP0]], double [[TMP1]])
; CHECK-NEXT: store double [[TMP2]], ptr @output_f64, align 16
; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 8), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 8), align 8
; CHECK-NEXT: [[TMP5:%.*]] = tail call double @llvm.minimumnum.f64(double [[TMP3]], double [[TMP4]])
; CHECK-NEXT: store double [[TMP5]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 8), align 8
; CHECK-NEXT: [[TMP6:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 16), align 16
; CHECK-NEXT: [[TMP7:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 16), align 16
; CHECK-NEXT: [[TMP8:%.*]] = tail call double @llvm.minimumnum.f64(double [[TMP6]], double [[TMP7]])
; CHECK-NEXT: store double [[TMP8]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 16), align 16
; CHECK-NEXT: [[TMP9:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 24), align 8
; CHECK-NEXT: [[TMP10:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 24), align 8
; CHECK-NEXT: [[TMP11:%.*]] = tail call double @llvm.minimumnum.f64(double [[TMP9]], double [[TMP10]])
; CHECK-NEXT: store double [[TMP11]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 24), align 8
; CHECK-NEXT: [[TMP12:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 32), align 16
; CHECK-NEXT: [[TMP13:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 32), align 16
; CHECK-NEXT: [[TMP14:%.*]] = tail call double @llvm.minimumnum.f64(double [[TMP12]], double [[TMP13]])
; CHECK-NEXT: store double [[TMP14]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 32), align 16
; CHECK-NEXT: [[TMP15:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 40), align 8
; CHECK-NEXT: [[TMP16:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 40), align 8
; CHECK-NEXT: [[TMP17:%.*]] = tail call double @llvm.minimumnum.f64(double [[TMP15]], double [[TMP16]])
; CHECK-NEXT: store double [[TMP17]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 40), align 8
; CHECK-NEXT: [[TMP18:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 48), align 16
; CHECK-NEXT: [[TMP19:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 48), align 16
; CHECK-NEXT: [[TMP20:%.*]] = tail call double @llvm.minimumnum.f64(double [[TMP18]], double [[TMP19]])
; CHECK-NEXT: store double [[TMP20]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 48), align 16
; CHECK-NEXT: [[TMP21:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 56), align 8
; CHECK-NEXT: [[TMP22:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 56), align 8
; CHECK-NEXT: [[TMP23:%.*]] = tail call double @llvm.minimumnum.f64(double [[TMP21]], double [[TMP22]])
; CHECK-NEXT: store double [[TMP23]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 56), align 8
; CHECK-NEXT: [[TMP24:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 64), align 16
; CHECK-NEXT: [[TMP25:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 64), align 16
; CHECK-NEXT: [[TMP26:%.*]] = tail call double @llvm.minimumnum.f64(double [[TMP24]], double [[TMP25]])
; CHECK-NEXT: store double [[TMP26]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 64), align 16
; CHECK-NEXT: ret void
;
entry:
%input0_0 = load double, ptr @input1_f64, align 16
%input0_1 = load double, ptr @input2_f64, align 16
%output0 = tail call double @llvm.minimumnum.f64(double %input0_0, double %input0_1)
store double %output0, ptr @output_f64, align 16
%input1_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 8), align 8
%input1_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 8), align 8
%output1 = tail call double @llvm.minimumnum.f64(double %input1_1, double %input1_2)
store double %output1, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 8), align 8
%input2_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 16), align 16
%input2_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 16), align 16
%output2 = tail call double @llvm.minimumnum.f64(double %input2_1, double %input2_2)
store double %output2, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 16), align 16
%input3_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 24), align 8
%input3_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 24), align 8
%output3 = tail call double @llvm.minimumnum.f64(double %input3_1, double %input3_2)
store double %output3, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 24), align 8
%input4_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 32), align 16
%input4_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 32), align 16
%output4 = tail call double @llvm.minimumnum.f64(double %input4_1, double %input4_2)
store double %output4, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 32), align 16
%input5_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 40), align 8
%input5_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 40), align 8
%output5 = tail call double @llvm.minimumnum.f64(double %input5_1, double %input5_2)
store double %output5, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 40), align 8
%input6_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 48), align 16
%input6_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 48), align 16
%output6 = tail call double @llvm.minimumnum.f64(double %input6_1, double %input6_2)
store double %output6, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 48), align 16
%input7_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 56), align 8
%input7_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 56), align 8
%output7 = tail call double @llvm.minimumnum.f64(double %input7_1, double %input7_2)
store double %output7, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 56), align 8
%input8_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 64), align 16
%input8_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 64), align 16
%output8 = tail call double @llvm.minimumnum.f64(double %input8_1, double %input8_2)
store double %output8, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 64), align 16
ret void
}
declare double @llvm.minimumnum.f64(double, double)
define void @fmax64() {
; CHECK-LABEL: define void @fmax64() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr @input1_f64, align 16
; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr @input2_f64, align 16
; CHECK-NEXT: [[TMP2:%.*]] = tail call double @llvm.maximumnum.f64(double [[TMP0]], double [[TMP1]])
; CHECK-NEXT: store double [[TMP2]], ptr @output_f64, align 16
; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 8), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 8), align 8
; CHECK-NEXT: [[TMP5:%.*]] = tail call double @llvm.maximumnum.f64(double [[TMP3]], double [[TMP4]])
; CHECK-NEXT: store double [[TMP5]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 8), align 8
; CHECK-NEXT: [[TMP6:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 16), align 16
; CHECK-NEXT: [[TMP7:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 16), align 16
; CHECK-NEXT: [[TMP8:%.*]] = tail call double @llvm.maximumnum.f64(double [[TMP6]], double [[TMP7]])
; CHECK-NEXT: store double [[TMP8]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 16), align 16
; CHECK-NEXT: [[TMP9:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 24), align 8
; CHECK-NEXT: [[TMP10:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 24), align 8
; CHECK-NEXT: [[TMP11:%.*]] = tail call double @llvm.maximumnum.f64(double [[TMP9]], double [[TMP10]])
; CHECK-NEXT: store double [[TMP11]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 24), align 8
; CHECK-NEXT: [[TMP12:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 32), align 16
; CHECK-NEXT: [[TMP13:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 32), align 16
; CHECK-NEXT: [[TMP14:%.*]] = tail call double @llvm.maximumnum.f64(double [[TMP12]], double [[TMP13]])
; CHECK-NEXT: store double [[TMP14]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 32), align 16
; CHECK-NEXT: [[TMP15:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 40), align 8
; CHECK-NEXT: [[TMP16:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 40), align 8
; CHECK-NEXT: [[TMP17:%.*]] = tail call double @llvm.maximumnum.f64(double [[TMP15]], double [[TMP16]])
; CHECK-NEXT: store double [[TMP17]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 40), align 8
; CHECK-NEXT: [[TMP18:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 48), align 16
; CHECK-NEXT: [[TMP19:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 48), align 16
; CHECK-NEXT: [[TMP20:%.*]] = tail call double @llvm.maximumnum.f64(double [[TMP18]], double [[TMP19]])
; CHECK-NEXT: store double [[TMP20]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 48), align 16
; CHECK-NEXT: [[TMP21:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 56), align 8
; CHECK-NEXT: [[TMP22:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 56), align 8
; CHECK-NEXT: [[TMP23:%.*]] = tail call double @llvm.maximumnum.f64(double [[TMP21]], double [[TMP22]])
; CHECK-NEXT: store double [[TMP23]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 56), align 8
; CHECK-NEXT: [[TMP24:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 64), align 16
; CHECK-NEXT: [[TMP25:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 64), align 16
; CHECK-NEXT: [[TMP26:%.*]] = tail call double @llvm.maximumnum.f64(double [[TMP24]], double [[TMP25]])
; CHECK-NEXT: store double [[TMP26]], ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 64), align 16
; CHECK-NEXT: ret void
;
entry:
%input0_0 = load double, ptr @input1_f64, align 16
%input0_1 = load double, ptr @input2_f64, align 16
%output0 = tail call double @llvm.maximumnum.f64(double %input0_0, double %input0_1)
store double %output0, ptr @output_f64, align 16
%input1_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 8), align 8
%input1_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 8), align 8
%output1 = tail call double @llvm.maximumnum.f64(double %input1_1, double %input1_2)
store double %output1, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 8), align 8
%input2_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 16), align 16
%input2_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 16), align 16
%output2 = tail call double @llvm.maximumnum.f64(double %input2_1, double %input2_2)
store double %output2, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 16), align 16
%input3_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 24), align 8
%input3_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 24), align 8
%output3 = tail call double @llvm.maximumnum.f64(double %input3_1, double %input3_2)
store double %output3, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 24), align 8
%input4_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 32), align 16
%input4_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 32), align 16
%output4 = tail call double @llvm.maximumnum.f64(double %input4_1, double %input4_2)
store double %output4, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 32), align 16
%input5_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 40), align 8
%input5_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 40), align 8
%output5 = tail call double @llvm.maximumnum.f64(double %input5_1, double %input5_2)
store double %output5, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 40), align 8
%input6_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 48), align 16
%input6_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 48), align 16
%output6 = tail call double @llvm.maximumnum.f64(double %input6_1, double %input6_2)
store double %output6, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 48), align 16
%input7_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 56), align 8
%input7_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 56), align 8
%output7 = tail call double @llvm.maximumnum.f64(double %input7_1, double %input7_2)
store double %output7, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 56), align 8
%input8_1 = load double, ptr getelementptr inbounds nuw (i8, ptr @input1_f64, i64 64), align 16
%input8_2 = load double, ptr getelementptr inbounds nuw (i8, ptr @input2_f64, i64 64), align 16
%output8 = tail call double @llvm.maximumnum.f64(double %input8_1, double %input8_2)
store double %output8, ptr getelementptr inbounds nuw (i8, ptr @output_f64, i64 64), align 16
ret void
}
declare double @llvm.maximumnum.f64(double, double)
define void @fmin16() {
; CHECK-LABEL: define void @fmin16() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load half, ptr @input1_f16, align 16
; CHECK-NEXT: [[TMP1:%.*]] = load half, ptr @input2_f16, align 16
; CHECK-NEXT: [[TMP2:%.*]] = tail call half @llvm.minimumnum.f16(half [[TMP0]], half [[TMP1]])
; CHECK-NEXT: store half [[TMP2]], ptr @output_f16, align 16
; CHECK-NEXT: [[TMP3:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 2), align 2
; CHECK-NEXT: [[TMP4:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 2), align 2
; CHECK-NEXT: [[TMP5:%.*]] = tail call half @llvm.minimumnum.f16(half [[TMP3]], half [[TMP4]])
; CHECK-NEXT: store half [[TMP5]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 2), align 2
; CHECK-NEXT: [[TMP6:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 4), align 4
; CHECK-NEXT: [[TMP7:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 4), align 4
; CHECK-NEXT: [[TMP8:%.*]] = tail call half @llvm.minimumnum.f16(half [[TMP6]], half [[TMP7]])
; CHECK-NEXT: store half [[TMP8]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 4), align 4
; CHECK-NEXT: [[TMP9:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 6), align 2
; CHECK-NEXT: [[TMP10:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 6), align 2
; CHECK-NEXT: [[TMP11:%.*]] = tail call half @llvm.minimumnum.f16(half [[TMP9]], half [[TMP10]])
; CHECK-NEXT: store half [[TMP11]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 6), align 2
; CHECK-NEXT: [[TMP12:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 8), align 8
; CHECK-NEXT: [[TMP13:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 8), align 8
; CHECK-NEXT: [[TMP14:%.*]] = tail call half @llvm.minimumnum.f16(half [[TMP12]], half [[TMP13]])
; CHECK-NEXT: store half [[TMP14]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 8), align 8
; CHECK-NEXT: [[TMP15:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 10), align 2
; CHECK-NEXT: [[TMP16:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 10), align 2
; CHECK-NEXT: [[TMP17:%.*]] = tail call half @llvm.minimumnum.f16(half [[TMP15]], half [[TMP16]])
; CHECK-NEXT: store half [[TMP17]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 10), align 2
; CHECK-NEXT: [[TMP18:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 12), align 4
; CHECK-NEXT: [[TMP19:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 12), align 4
; CHECK-NEXT: [[TMP20:%.*]] = tail call half @llvm.minimumnum.f16(half [[TMP18]], half [[TMP19]])
; CHECK-NEXT: store half [[TMP20]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 12), align 4
; CHECK-NEXT: [[TMP21:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 14), align 2
; CHECK-NEXT: [[TMP22:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 14), align 2
; CHECK-NEXT: [[TMP23:%.*]] = tail call half @llvm.minimumnum.f16(half [[TMP21]], half [[TMP22]])
; CHECK-NEXT: store half [[TMP23]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 14), align 2
; CHECK-NEXT: [[TMP24:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 16), align 16
; CHECK-NEXT: [[TMP25:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 16), align 16
; CHECK-NEXT: [[TMP26:%.*]] = tail call half @llvm.minimumnum.f16(half [[TMP24]], half [[TMP25]])
; CHECK-NEXT: store half [[TMP26]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 16), align 16
; CHECK-NEXT: ret void
;
entry:
%input0_0 = load half, ptr @input1_f16, align 16
%input0_1 = load half, ptr @input2_f16, align 16
%output0 = tail call half @llvm.minimumnum.f16(half %input0_0, half %input0_1)
store half %output0, ptr @output_f16, align 16
%input1_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 2), align 2
%input1_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 2), align 2
%output1 = tail call half @llvm.minimumnum.f16(half %input1_1, half %input1_2)
store half %output1, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 2), align 2
%input2_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 4), align 4
%input2_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 4), align 4
%output2 = tail call half @llvm.minimumnum.f16(half %input2_1, half %input2_2)
store half %output2, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 4), align 4
%input3_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 6), align 2
%input3_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 6), align 2
%output3 = tail call half @llvm.minimumnum.f16(half %input3_1, half %input3_2)
store half %output3, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 6), align 2
%input4_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 8), align 8
%input4_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 8), align 8
%output4 = tail call half @llvm.minimumnum.f16(half %input4_1, half %input4_2)
store half %output4, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 8), align 8
%input5_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 10), align 2
%input5_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 10), align 2
%output5 = tail call half @llvm.minimumnum.f16(half %input5_1, half %input5_2)
store half %output5, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 10), align 2
%input6_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 12), align 4
%input6_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 12), align 4
%output6 = tail call half @llvm.minimumnum.f16(half %input6_1, half %input6_2)
store half %output6, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 12), align 4
%input7_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 14), align 2
%input7_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 14), align 2
%output7 = tail call half @llvm.minimumnum.f16(half %input7_1, half %input7_2)
store half %output7, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 14), align 2
%input8_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 16), align 16
%input8_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 16), align 16
%output8 = tail call half @llvm.minimumnum.f16(half %input8_1, half %input8_2)
store half %output8, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 16), align 16
ret void
}
declare half @llvm.minimumnum.f16(half, half)
define void @fmax16() {
; CHECK-LABEL: define void @fmax16() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load half, ptr @input1_f16, align 16
; CHECK-NEXT: [[TMP1:%.*]] = load half, ptr @input2_f16, align 16
; CHECK-NEXT: [[TMP2:%.*]] = tail call half @llvm.maximumnum.f16(half [[TMP0]], half [[TMP1]])
; CHECK-NEXT: store half [[TMP2]], ptr @output_f16, align 16
; CHECK-NEXT: [[TMP3:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 2), align 2
; CHECK-NEXT: [[TMP4:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 2), align 2
; CHECK-NEXT: [[TMP5:%.*]] = tail call half @llvm.maximumnum.f16(half [[TMP3]], half [[TMP4]])
; CHECK-NEXT: store half [[TMP5]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 2), align 2
; CHECK-NEXT: [[TMP6:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 4), align 4
; CHECK-NEXT: [[TMP7:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 4), align 4
; CHECK-NEXT: [[TMP8:%.*]] = tail call half @llvm.maximumnum.f16(half [[TMP6]], half [[TMP7]])
; CHECK-NEXT: store half [[TMP8]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 4), align 4
; CHECK-NEXT: [[TMP9:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 6), align 2
; CHECK-NEXT: [[TMP10:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 6), align 2
; CHECK-NEXT: [[TMP11:%.*]] = tail call half @llvm.maximumnum.f16(half [[TMP9]], half [[TMP10]])
; CHECK-NEXT: store half [[TMP11]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 6), align 2
; CHECK-NEXT: [[TMP12:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 8), align 8
; CHECK-NEXT: [[TMP13:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 8), align 8
; CHECK-NEXT: [[TMP14:%.*]] = tail call half @llvm.maximumnum.f16(half [[TMP12]], half [[TMP13]])
; CHECK-NEXT: store half [[TMP14]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 8), align 8
; CHECK-NEXT: [[TMP15:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 10), align 2
; CHECK-NEXT: [[TMP16:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 10), align 2
; CHECK-NEXT: [[TMP17:%.*]] = tail call half @llvm.maximumnum.f16(half [[TMP15]], half [[TMP16]])
; CHECK-NEXT: store half [[TMP17]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 10), align 2
; CHECK-NEXT: [[TMP18:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 12), align 4
; CHECK-NEXT: [[TMP19:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 12), align 4
; CHECK-NEXT: [[TMP20:%.*]] = tail call half @llvm.maximumnum.f16(half [[TMP18]], half [[TMP19]])
; CHECK-NEXT: store half [[TMP20]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 12), align 4
; CHECK-NEXT: [[TMP21:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 14), align 2
; CHECK-NEXT: [[TMP22:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 14), align 2
; CHECK-NEXT: [[TMP23:%.*]] = tail call half @llvm.maximumnum.f16(half [[TMP21]], half [[TMP22]])
; CHECK-NEXT: store half [[TMP23]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 14), align 2
; CHECK-NEXT: [[TMP24:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 16), align 16
; CHECK-NEXT: [[TMP25:%.*]] = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 16), align 16
; CHECK-NEXT: [[TMP26:%.*]] = tail call half @llvm.maximumnum.f16(half [[TMP24]], half [[TMP25]])
; CHECK-NEXT: store half [[TMP26]], ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 16), align 16
; CHECK-NEXT: ret void
;
entry:
%input0_0 = load half, ptr @input1_f16, align 16
%input0_1 = load half, ptr @input2_f16, align 16
%output0 = tail call half @llvm.maximumnum.f16(half %input0_0, half %input0_1)
store half %output0, ptr @output_f16, align 16
%input1_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 2), align 2
%input1_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 2), align 2
%output1 = tail call half @llvm.maximumnum.f16(half %input1_1, half %input1_2)
store half %output1, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 2), align 2
%input2_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 4), align 4
%input2_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 4), align 4
%output2 = tail call half @llvm.maximumnum.f16(half %input2_1, half %input2_2)
store half %output2, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 4), align 4
%input3_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 6), align 2
%input3_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 6), align 2
%output3 = tail call half @llvm.maximumnum.f16(half %input3_1, half %input3_2)
store half %output3, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 6), align 2
%input4_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 8), align 8
%input4_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 8), align 8
%output4 = tail call half @llvm.maximumnum.f16(half %input4_1, half %input4_2)
store half %output4, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 8), align 8
%input5_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 10), align 2
%input5_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 10), align 2
%output5 = tail call half @llvm.maximumnum.f16(half %input5_1, half %input5_2)
store half %output5, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 10), align 2
%input6_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 12), align 4
%input6_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 12), align 4
%output6 = tail call half @llvm.maximumnum.f16(half %input6_1, half %input6_2)
store half %output6, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 12), align 4
%input7_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 14), align 2
%input7_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 14), align 2
%output7 = tail call half @llvm.maximumnum.f16(half %input7_1, half %input7_2)
store half %output7, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 14), align 2
%input8_1 = load half, ptr getelementptr inbounds nuw (i8, ptr @input1_f16, i64 16), align 16
%input8_2 = load half, ptr getelementptr inbounds nuw (i8, ptr @input2_f16, i64 16), align 16
%output8 = tail call half @llvm.maximumnum.f16(half %input8_1, half %input8_2)
store half %output8, ptr getelementptr inbounds nuw (i8, ptr @output_f16, i64 16), align 16
ret void
}
declare half @llvm.maximumnum.f16(half, half)