384 lines
21 KiB
LLVM
384 lines
21 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
|
|
; RUN: opt -S -mtriple=amdgcn-- -mcpu=hawaii -passes=amdgpu-codegenprepare -denormal-fp-math-f32=ieee %s | FileCheck -check-prefixes=CHECK,IEEE %s
|
|
; RUN: opt -S -mtriple=amdgcn-- -mcpu=hawaii -passes=amdgpu-codegenprepare -denormal-fp-math-f32=dynamic %s | FileCheck -check-prefixes=CHECK,IEEE %s
|
|
; RUN: opt -S -mtriple=amdgcn-- -mcpu=hawaii -passes=amdgpu-codegenprepare -denormal-fp-math-f32=preserve-sign %s | FileCheck -check-prefixes=CHECK,DAZ %s
|
|
|
|
define amdgpu_kernel void @noop_sqrt_fpmath(ptr addrspace(1) %out, float %x) #0 {
|
|
; CHECK-LABEL: define amdgpu_kernel void @noop_sqrt_fpmath
|
|
; CHECK-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: [[MD_25ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !0
|
|
; CHECK-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%md.25ulp = call float @llvm.sqrt.f32(float %x), !fpmath !3
|
|
store volatile float %md.25ulp, ptr addrspace(1) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @sqrt_fpmath_f32(ptr addrspace(1) %out, float %x) {
|
|
; CHECK-LABEL: define amdgpu_kernel void @sqrt_fpmath_f32
|
|
; CHECK-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1:[0-9]+]] {
|
|
; CHECK-NEXT: [[NO_MD:%.*]] = call float @llvm.sqrt.f32(float [[X]])
|
|
; CHECK-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_HALF_ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !1
|
|
; CHECK-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_1ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !2
|
|
; CHECK-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_25ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !3
|
|
; CHECK-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_3ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !0
|
|
; CHECK-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_2ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !4
|
|
; CHECK-NEXT: store volatile float [[MD_2ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%no.md = call float @llvm.sqrt.f32(float %x)
|
|
store volatile float %no.md, ptr addrspace(1) %out, align 4
|
|
|
|
%md.half.ulp = call float @llvm.sqrt.f32(float %x), !fpmath !1
|
|
store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%md.1ulp = call float @llvm.sqrt.f32(float %x), !fpmath !2
|
|
store volatile float %md.1ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%md.25ulp = call float @llvm.sqrt.f32(float %x), !fpmath !0
|
|
store volatile float %md.25ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%md.3ulp = call float @llvm.sqrt.f32(float %x), !fpmath !3
|
|
store volatile float %md.3ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%md.2ulp = call float @llvm.sqrt.f32(float %x), !fpmath !4
|
|
store volatile float %md.2ulp, ptr addrspace(1) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @sqrt_fpmath_v2f32(ptr addrspace(1) %out, <2 x float> %x) {
|
|
; CHECK-LABEL: define amdgpu_kernel void @sqrt_fpmath_v2f32
|
|
; CHECK-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: [[NO_MD:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]])
|
|
; CHECK-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_HALF_ULP:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath !1
|
|
; CHECK-NEXT: store volatile <2 x float> [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_1ULP:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath !2
|
|
; CHECK-NEXT: store volatile <2 x float> [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_25ULP:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath !3
|
|
; CHECK-NEXT: store volatile <2 x float> [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_3ULP:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath !0
|
|
; CHECK-NEXT: store volatile <2 x float> [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_2ULP:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath !4
|
|
; CHECK-NEXT: store volatile <2 x float> [[MD_2ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%no.md = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x)
|
|
store volatile <2 x float> %no.md, ptr addrspace(1) %out, align 4
|
|
|
|
%md.half.ulp = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !1
|
|
store volatile <2 x float> %md.half.ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%md.1ulp = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !2
|
|
store volatile <2 x float> %md.1ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%md.25ulp = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !0
|
|
store volatile <2 x float> %md.25ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%md.3ulp = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !3
|
|
store volatile <2 x float> %md.3ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%md.2ulp = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !4
|
|
store volatile <2 x float> %md.2ulp, ptr addrspace(1) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @sqrt_fpmath_f32_known_nosub(ptr addrspace(1) %out, float nofpclass(sub) %x) {
|
|
; CHECK-LABEL: define amdgpu_kernel void @sqrt_fpmath_f32_known_nosub
|
|
; CHECK-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(sub) [[X:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: [[NO_MD:%.*]] = call float @llvm.sqrt.f32(float [[X]])
|
|
; CHECK-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_HALF_ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !1
|
|
; CHECK-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_1ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !2
|
|
; CHECK-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_25ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !3
|
|
; CHECK-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_3ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !0
|
|
; CHECK-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_2ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !4
|
|
; CHECK-NEXT: store volatile float [[MD_2ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%no.md = call float @llvm.sqrt.f32(float %x)
|
|
store volatile float %no.md, ptr addrspace(1) %out, align 4
|
|
|
|
%md.half.ulp = call float @llvm.sqrt.f32(float %x), !fpmath !1
|
|
store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%md.1ulp = call float @llvm.sqrt.f32(float %x), !fpmath !2
|
|
store volatile float %md.1ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%md.25ulp = call float @llvm.sqrt.f32(float %x), !fpmath !0
|
|
store volatile float %md.25ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%md.3ulp = call float @llvm.sqrt.f32(float %x), !fpmath !3
|
|
store volatile float %md.3ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%md.2ulp = call float @llvm.sqrt.f32(float %x), !fpmath !4
|
|
store volatile float %md.2ulp, ptr addrspace(1) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @sqrt_fpmath_f32_known_nonzero(ptr addrspace(1) %out, float nofpclass(nzero) %x) {
|
|
; CHECK-LABEL: define amdgpu_kernel void @sqrt_fpmath_f32_known_nonzero
|
|
; CHECK-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(nzero) [[X:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: [[NO_MD:%.*]] = call float @llvm.sqrt.f32(float [[X]])
|
|
; CHECK-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_HALF_ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !1
|
|
; CHECK-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_1ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !2
|
|
; CHECK-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_25ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !3
|
|
; CHECK-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_3ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !0
|
|
; CHECK-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_2ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !4
|
|
; CHECK-NEXT: store volatile float [[MD_2ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%no.md = call float @llvm.sqrt.f32(float %x)
|
|
store volatile float %no.md, ptr addrspace(1) %out, align 4
|
|
|
|
%md.half.ulp = call float @llvm.sqrt.f32(float %x), !fpmath !1
|
|
store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%md.1ulp = call float @llvm.sqrt.f32(float %x), !fpmath !2
|
|
store volatile float %md.1ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%md.25ulp = call float @llvm.sqrt.f32(float %x), !fpmath !0
|
|
store volatile float %md.25ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%md.3ulp = call float @llvm.sqrt.f32(float %x), !fpmath !3
|
|
store volatile float %md.3ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%md.2ulp = call float @llvm.sqrt.f32(float %x), !fpmath !4
|
|
store volatile float %md.2ulp, ptr addrspace(1) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @sqrt_fpmath_f32_known_nonzero_nonsub(ptr addrspace(1) %out, float nofpclass(nzero nsub) %x) {
|
|
; CHECK-LABEL: define amdgpu_kernel void @sqrt_fpmath_f32_known_nonzero_nonsub
|
|
; CHECK-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(nzero nsub) [[X:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: [[NO_MD:%.*]] = call float @llvm.sqrt.f32(float [[X]])
|
|
; CHECK-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_HALF_ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !1
|
|
; CHECK-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_1ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !2
|
|
; CHECK-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_25ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !3
|
|
; CHECK-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_3ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !0
|
|
; CHECK-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_2ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !4
|
|
; CHECK-NEXT: store volatile float [[MD_2ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%no.md = call float @llvm.sqrt.f32(float %x)
|
|
store volatile float %no.md, ptr addrspace(1) %out, align 4
|
|
|
|
%md.half.ulp = call float @llvm.sqrt.f32(float %x), !fpmath !1
|
|
store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%md.1ulp = call float @llvm.sqrt.f32(float %x), !fpmath !2
|
|
store volatile float %md.1ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%md.25ulp = call float @llvm.sqrt.f32(float %x), !fpmath !0
|
|
store volatile float %md.25ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%md.3ulp = call float @llvm.sqrt.f32(float %x), !fpmath !3
|
|
store volatile float %md.3ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%md.2ulp = call float @llvm.sqrt.f32(float %x), !fpmath !4
|
|
store volatile float %md.2ulp, ptr addrspace(1) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @sqrt_fpmath_f32_known_nonzero_nonsub_noinf(ptr addrspace(1) %out, float nofpclass(nzero nsub inf) %x) {
|
|
; CHECK-LABEL: define amdgpu_kernel void @sqrt_fpmath_f32_known_nonzero_nonsub_noinf
|
|
; CHECK-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(inf nzero nsub) [[X:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: [[NO_MD:%.*]] = call float @llvm.sqrt.f32(float [[X]])
|
|
; CHECK-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_HALF_ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !1
|
|
; CHECK-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_1ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !2
|
|
; CHECK-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_25ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !3
|
|
; CHECK-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_3ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !0
|
|
; CHECK-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_2ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !4
|
|
; CHECK-NEXT: store volatile float [[MD_2ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%no.md = call float @llvm.sqrt.f32(float %x)
|
|
store volatile float %no.md, ptr addrspace(1) %out, align 4
|
|
|
|
%md.half.ulp = call float @llvm.sqrt.f32(float %x), !fpmath !1
|
|
store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%md.1ulp = call float @llvm.sqrt.f32(float %x), !fpmath !2
|
|
store volatile float %md.1ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%md.25ulp = call float @llvm.sqrt.f32(float %x), !fpmath !0
|
|
store volatile float %md.25ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%md.3ulp = call float @llvm.sqrt.f32(float %x), !fpmath !3
|
|
store volatile float %md.3ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%md.2ulp = call float @llvm.sqrt.f32(float %x), !fpmath !4
|
|
store volatile float %md.2ulp, ptr addrspace(1) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @sqrt_fpmath_f32_known_nopsub(ptr addrspace(1) %out, float nofpclass(psub) %x) {
|
|
; CHECK-LABEL: define amdgpu_kernel void @sqrt_fpmath_f32_known_nopsub
|
|
; CHECK-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(psub) [[X:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: [[NO_MD:%.*]] = call float @llvm.sqrt.f32(float [[X]])
|
|
; CHECK-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_HALF_ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !1
|
|
; CHECK-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_1ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !2
|
|
; CHECK-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_25ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !3
|
|
; CHECK-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_3ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !0
|
|
; CHECK-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_2ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !4
|
|
; CHECK-NEXT: store volatile float [[MD_2ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%no.md = call float @llvm.sqrt.f32(float %x)
|
|
store volatile float %no.md, ptr addrspace(1) %out, align 4
|
|
|
|
%md.half.ulp = call float @llvm.sqrt.f32(float %x), !fpmath !1
|
|
store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%md.1ulp = call float @llvm.sqrt.f32(float %x), !fpmath !2
|
|
store volatile float %md.1ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%md.25ulp = call float @llvm.sqrt.f32(float %x), !fpmath !0
|
|
store volatile float %md.25ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%md.3ulp = call float @llvm.sqrt.f32(float %x), !fpmath !3
|
|
store volatile float %md.3ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%md.2ulp = call float @llvm.sqrt.f32(float %x), !fpmath !4
|
|
store volatile float %md.2ulp, ptr addrspace(1) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @sqrt_fpmath_f32_afn(ptr addrspace(1) %out, float %x) {
|
|
; CHECK-LABEL: define amdgpu_kernel void @sqrt_fpmath_f32_afn
|
|
; CHECK-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: [[NO_MD:%.*]] = call afn float @llvm.sqrt.f32(float [[X]])
|
|
; CHECK-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_HALF_ULP:%.*]] = call afn float @llvm.sqrt.f32(float [[X]]), !fpmath !1
|
|
; CHECK-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_1ULP:%.*]] = call afn float @llvm.sqrt.f32(float [[X]]), !fpmath !2
|
|
; CHECK-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_25ULP:%.*]] = call afn float @llvm.sqrt.f32(float [[X]]), !fpmath !3
|
|
; CHECK-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_3ULP:%.*]] = call afn float @llvm.sqrt.f32(float [[X]]), !fpmath !0
|
|
; CHECK-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_2ULP:%.*]] = call afn float @llvm.sqrt.f32(float [[X]]), !fpmath !4
|
|
; CHECK-NEXT: store volatile float [[MD_2ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%no.md = call afn float @llvm.sqrt.f32(float %x)
|
|
store volatile float %no.md, ptr addrspace(1) %out, align 4
|
|
|
|
%md.half.ulp = call afn float @llvm.sqrt.f32(float %x), !fpmath !1
|
|
store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%md.1ulp = call afn float @llvm.sqrt.f32(float %x), !fpmath !2
|
|
store volatile float %md.1ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%md.25ulp = call afn float @llvm.sqrt.f32(float %x), !fpmath !0
|
|
store volatile float %md.25ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%md.3ulp = call afn float @llvm.sqrt.f32(float %x), !fpmath !3
|
|
store volatile float %md.3ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%md.2ulp = call afn float @llvm.sqrt.f32(float %x), !fpmath !4
|
|
store volatile float %md.2ulp, ptr addrspace(1) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @sqrt_fpmath_f32_assume_nosub(ptr addrspace(1) %out, float %x) {
|
|
; CHECK-LABEL: define amdgpu_kernel void @sqrt_fpmath_f32_assume_nosub
|
|
; CHECK-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]])
|
|
; CHECK-NEXT: [[IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_X]], 0x3810000000000000
|
|
; CHECK-NEXT: call void @llvm.assume(i1 [[IS_NOT_SUBNORMAL]])
|
|
; CHECK-NEXT: [[NO_MD:%.*]] = call float @llvm.sqrt.f32(float [[X]])
|
|
; CHECK-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_HALF_ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !1
|
|
; CHECK-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_1ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !2
|
|
; CHECK-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_25ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !3
|
|
; CHECK-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_3ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !0
|
|
; CHECK-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_2ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !4
|
|
; CHECK-NEXT: store volatile float [[MD_2ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[MD_3ULP_AFN:%.*]] = call afn float @llvm.sqrt.f32(float [[X]]), !fpmath !0
|
|
; CHECK-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: [[NO_MD_AFN:%.*]] = call afn float @llvm.sqrt.f32(float [[X]])
|
|
; CHECK-NEXT: store volatile float [[NO_MD_AFN]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%fabs.x = call float @llvm.fabs.f32(float %x)
|
|
%is.not.subnormal = fcmp oge float %fabs.x, 0x3810000000000000
|
|
call void @llvm.assume(i1 %is.not.subnormal)
|
|
|
|
%no.md = call float @llvm.sqrt.f32(float %x)
|
|
store volatile float %no.md, ptr addrspace(1) %out, align 4
|
|
|
|
%md.half.ulp = call float @llvm.sqrt.f32(float %x), !fpmath !1
|
|
store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%md.1ulp = call float @llvm.sqrt.f32(float %x), !fpmath !2
|
|
store volatile float %md.1ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%md.25ulp = call float @llvm.sqrt.f32(float %x), !fpmath !0
|
|
store volatile float %md.25ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%md.3ulp = call float @llvm.sqrt.f32(float %x), !fpmath !3
|
|
store volatile float %md.3ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%md.2ulp = call float @llvm.sqrt.f32(float %x), !fpmath !4
|
|
store volatile float %md.2ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%md.3ulp.afn = call afn float @llvm.sqrt.f32(float %x), !fpmath !3
|
|
store volatile float %md.3ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%no.md.afn = call afn float @llvm.sqrt.f32(float %x)
|
|
store volatile float %no.md.afn, ptr addrspace(1) %out, align 4
|
|
|
|
ret void
|
|
}
|
|
|
|
declare float @llvm.sqrt.f32(float)
|
|
declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
|
|
declare float @llvm.fabs.f32(float)
|
|
declare void @llvm.assume(i1 noundef)
|
|
|
|
attributes #0 = { optnone noinline }
|
|
|
|
!0 = !{float 2.500000e+00}
|
|
!1 = !{float 5.000000e-01}
|
|
!2 = !{float 1.000000e+00}
|
|
!3 = !{float 3.000000e+00}
|
|
!4 = !{float 2.000000e+00}
|
|
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
|
; DAZ: {{.*}}
|
|
; IEEE: {{.*}}
|