Extends the new frexp scaled reciprocal to the general case. The reciprocal case is just the same thing when frexp of 1 is constant folded. Could probably clean up the code to rely on that constant folding. Improves results for the IEEE path for the default OpenCL division. We used to only emit the fdiv.fast intrinsic with a 2.5 ulp accuracy threshold with DAZ, which uses explicit range checks. This gives us a better fast option with the default IEEE behavior.
4304 lines
334 KiB
LLVM
4304 lines
334 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
|
|
; RUN: opt -S -mtriple=amdgcn-- -mcpu=hawaii -passes=amdgpu-codegenprepare -denormal-fp-math-f32=ieee %s | FileCheck -check-prefixes=CHECK,IEEE,IEEE-GOODFREXP %s
|
|
; RUN: opt -S -mtriple=amdgcn-- -mcpu=tahiti -passes=amdgpu-codegenprepare -denormal-fp-math-f32=ieee %s | FileCheck -check-prefixes=CHECK,IEEE,IEEE-BADFREXP %s
|
|
; RUN: opt -S -mtriple=amdgcn-- -mcpu=hawaii -passes=amdgpu-codegenprepare -denormal-fp-math-f32=dynamic %s | FileCheck -check-prefixes=CHECK,IEEE,IEEE-GOODFREXP %s
|
|
; RUN: opt -S -mtriple=amdgcn-- -mcpu=hawaii -passes=amdgpu-codegenprepare -denormal-fp-math-f32=preserve-sign %s | FileCheck -check-prefixes=CHECK,DAZ %s
|
|
|
|
; Make sure this doesn't crash with no triple
|
|
; TODO: Delete when old PM deleted
|
|
; RUN: opt -amdgpu-codegenprepare -disable-output %s
|
|
|
|
|
|
define amdgpu_kernel void @noop_fdiv_fpmath(ptr addrspace(1) %out, float %a, float %b) #0 {
|
|
; CHECK-LABEL: define amdgpu_kernel void @noop_fdiv_fpmath
|
|
; CHECK-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: [[MD_25ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath !0
|
|
; CHECK-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%md.25ulp = fdiv float %a, %b, !fpmath !0
|
|
store volatile float %md.25ulp, ptr addrspace(1) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @fdiv_fpmath_f32(ptr addrspace(1) %out, float %a, float %b) {
|
|
; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32
|
|
; IEEE-GOODFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]]) #[[ATTR1:[0-9]+]] {
|
|
; IEEE-GOODFREXP-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]]
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath !1
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = fmul float [[TMP6]], [[TMP4]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]]
|
|
; IEEE-GOODFREXP-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]])
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP10]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP10]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP11]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP14]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = extractvalue { float, i32 } [[TMP14]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = fmul float [[TMP15]], [[TMP13]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = sub i32 [[TMP16]], [[TMP12]]
|
|
; IEEE-GOODFREXP-NEXT: [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP18]])
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP19]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP20]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP23]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = fmul float [[TMP24]], [[TMP22]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]]
|
|
; IEEE-GOODFREXP-NEXT: [[MD_3ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]])
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath !0
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath !0
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]]
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP28]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = sub i32 0, [[TMP30]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP29]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP31]])
|
|
; IEEE-GOODFREXP-NEXT: [[ARCP_MD_25ULP:%.*]] = fmul arcp float [[A]], [[TMP33]]
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = extractvalue { float, i32 } [[TMP34]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = extractvalue { float, i32 } [[TMP34]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = sub i32 0, [[TMP36]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP35]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP38]], i32 [[TMP37]])
|
|
; IEEE-GOODFREXP-NEXT: [[ARCP_MD_1ULP:%.*]] = fmul arcp float [[A]], [[TMP39]]
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: ret void
|
|
;
|
|
; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32
|
|
; IEEE-BADFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]]) #[[ATTR1:[0-9]+]] {
|
|
; IEEE-BADFREXP-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]]
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath !1
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = fmul float [[TMP6]], [[TMP4]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]]
|
|
; IEEE-BADFREXP-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]])
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP10]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP11]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP14]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = fmul float [[TMP15]], [[TMP13]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = sub i32 [[TMP16]], [[TMP12]]
|
|
; IEEE-BADFREXP-NEXT: [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP18]])
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP20]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = fmul float [[TMP24]], [[TMP22]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]]
|
|
; IEEE-BADFREXP-NEXT: [[MD_3ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]])
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath !0
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath !0
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]]
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = sub i32 0, [[TMP30]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP29]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP31]])
|
|
; IEEE-BADFREXP-NEXT: [[ARCP_MD_25ULP:%.*]] = fmul arcp float [[A]], [[TMP33]]
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[ARCP_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = extractvalue { float, i32 } [[TMP34]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = sub i32 0, [[TMP36]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP35]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP38]], i32 [[TMP37]])
|
|
; IEEE-BADFREXP-NEXT: [[ARCP_MD_1ULP:%.*]] = fmul arcp float [[A]], [[TMP39]]
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[ARCP_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: ret void
|
|
;
|
|
; DAZ-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32
|
|
; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]]) #[[ATTR1:[0-9]+]] {
|
|
; DAZ-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]]
|
|
; DAZ-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath !1
|
|
; DAZ-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; DAZ-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; DAZ-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
|
|
; DAZ-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; DAZ-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
|
|
; DAZ-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
|
|
; DAZ-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1
|
|
; DAZ-NEXT: [[TMP8:%.*]] = fmul float [[TMP6]], [[TMP4]]
|
|
; DAZ-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]]
|
|
; DAZ-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]])
|
|
; DAZ-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[MD_25ULP:%.*]] = call float @llvm.amdgcn.fdiv.fast(float [[A]], float [[B]])
|
|
; DAZ-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[MD_3ULP:%.*]] = call float @llvm.amdgcn.fdiv.fast(float [[A]], float [[B]])
|
|
; DAZ-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath !0
|
|
; DAZ-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath !0
|
|
; DAZ-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]]
|
|
; DAZ-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[TMP10:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[B]])
|
|
; DAZ-NEXT: [[ARCP_MD_25ULP:%.*]] = fmul arcp float [[A]], [[TMP10]]
|
|
; DAZ-NEXT: store volatile float [[ARCP_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[TMP11:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[B]])
|
|
; DAZ-NEXT: [[ARCP_MD_1ULP:%.*]] = fmul arcp float [[A]], [[TMP11]]
|
|
; DAZ-NEXT: store volatile float [[ARCP_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: ret void
|
|
;
|
|
%no.md = fdiv float %a, %b
|
|
store volatile float %no.md, ptr addrspace(1) %out, align 4
|
|
%md.half.ulp = fdiv float %a, %b, !fpmath !1
|
|
store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4
|
|
%md.1ulp = fdiv float %a, %b, !fpmath !2
|
|
store volatile float %md.1ulp, ptr addrspace(1) %out, align 4
|
|
%md.25ulp = fdiv float %a, %b, !fpmath !0
|
|
store volatile float %md.25ulp, ptr addrspace(1) %out, align 4
|
|
%md.3ulp = fdiv float %a, %b, !fpmath !3
|
|
store volatile float %md.3ulp, ptr addrspace(1) %out, align 4
|
|
%fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0
|
|
store volatile float %fast.md.25ulp, ptr addrspace(1) %out, align 4
|
|
%afn.md.25ulp = fdiv afn float %a, %b, !fpmath !0
|
|
store volatile float %afn.md.25ulp, ptr addrspace(1) %out, align 4
|
|
%no.md.arcp = fdiv arcp float %a, %b
|
|
store volatile float %no.md.arcp, ptr addrspace(1) %out, align 4
|
|
%arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0
|
|
store volatile float %arcp.md.25ulp, ptr addrspace(1) %out, align 4
|
|
%arcp.md.1ulp = fdiv arcp float %a, %b, !fpmath !2
|
|
store volatile float %arcp.md.1ulp, ptr addrspace(1) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @fdiv_fpmath_f32_flags(ptr addrspace(1) %out, float %a, float %b) {
|
|
; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_flags
|
|
; IEEE-GOODFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]]) #[[ATTR1]] {
|
|
; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = fmul nnan ninf float [[TMP6]], [[TMP4]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]]
|
|
; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NINF_NNAN:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]])
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP10]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP10]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP11]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP14]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = extractvalue { float, i32 } [[TMP14]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = fmul nnan ninf float [[TMP15]], [[TMP13]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = sub i32 [[TMP16]], [[TMP12]]
|
|
; IEEE-GOODFREXP-NEXT: [[MD_25ULP_NINF_NNAN:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP18]])
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[MD_25ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP19]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP20]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP23]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = fmul ninf float [[TMP24]], [[TMP22]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]]
|
|
; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NINF:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]])
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NINF]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP28]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP29]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = extractvalue { float, i32 } [[TMP32]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = extractvalue { float, i32 } [[TMP32]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = fmul ninf float [[TMP33]], [[TMP31]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = sub i32 [[TMP34]], [[TMP30]]
|
|
; IEEE-GOODFREXP-NEXT: [[MD_25ULP_NINF:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP36]])
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[MD_25ULP_NINF]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = extractvalue { float, i32 } [[TMP37]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = extractvalue { float, i32 } [[TMP37]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP40:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP38]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP41:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP42:%.*]] = extractvalue { float, i32 } [[TMP41]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP43:%.*]] = extractvalue { float, i32 } [[TMP41]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP44:%.*]] = fmul nnan float [[TMP42]], [[TMP40]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP45:%.*]] = sub i32 [[TMP43]], [[TMP39]]
|
|
; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NNAN:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP44]], i32 [[TMP45]])
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[TMP46:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP47:%.*]] = extractvalue { float, i32 } [[TMP46]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP48:%.*]] = extractvalue { float, i32 } [[TMP46]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP49:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP47]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP50:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP51:%.*]] = extractvalue { float, i32 } [[TMP50]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP52:%.*]] = extractvalue { float, i32 } [[TMP50]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP53:%.*]] = fmul nnan float [[TMP51]], [[TMP49]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP54:%.*]] = sub i32 [[TMP52]], [[TMP48]]
|
|
; IEEE-GOODFREXP-NEXT: [[MD_25ULP_NNAN:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP53]], i32 [[TMP54]])
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[MD_25ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: ret void
|
|
;
|
|
; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_flags
|
|
; IEEE-BADFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]]) #[[ATTR1]] {
|
|
; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = fmul nnan ninf float [[TMP6]], [[TMP4]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]]
|
|
; IEEE-BADFREXP-NEXT: [[MD_1ULP_NINF_NNAN:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]])
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP10]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP11]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP14]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = fmul nnan ninf float [[TMP15]], [[TMP13]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = sub i32 [[TMP16]], [[TMP12]]
|
|
; IEEE-BADFREXP-NEXT: [[MD_25ULP_NINF_NNAN:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP18]])
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[MD_25ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP20]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = fmul ninf float [[TMP24]], [[TMP22]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]]
|
|
; IEEE-BADFREXP-NEXT: [[MD_1ULP_NINF:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]])
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NINF]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP29]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = extractvalue { float, i32 } [[TMP32]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = fmul ninf float [[TMP33]], [[TMP31]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = sub i32 [[TMP34]], [[TMP30]]
|
|
; IEEE-BADFREXP-NEXT: [[MD_25ULP_NINF:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP36]])
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[MD_25ULP_NINF]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = extractvalue { float, i32 } [[TMP37]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP40:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP38]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP41:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP42:%.*]] = extractvalue { float, i32 } [[TMP41]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP43:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP44:%.*]] = fmul nnan float [[TMP42]], [[TMP40]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP45:%.*]] = sub i32 [[TMP43]], [[TMP39]]
|
|
; IEEE-BADFREXP-NEXT: [[MD_1ULP_NNAN:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP44]], i32 [[TMP45]])
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[TMP46:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP47:%.*]] = extractvalue { float, i32 } [[TMP46]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP48:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP49:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP47]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP50:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP51:%.*]] = extractvalue { float, i32 } [[TMP50]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP52:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP53:%.*]] = fmul nnan float [[TMP51]], [[TMP49]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP54:%.*]] = sub i32 [[TMP52]], [[TMP48]]
|
|
; IEEE-BADFREXP-NEXT: [[MD_25ULP_NNAN:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP53]], i32 [[TMP54]])
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[MD_25ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: ret void
|
|
;
|
|
; DAZ-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_flags
|
|
; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]]) #[[ATTR1]] {
|
|
; DAZ-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; DAZ-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; DAZ-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
|
|
; DAZ-NEXT: [[TMP4:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; DAZ-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
|
|
; DAZ-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
|
|
; DAZ-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1
|
|
; DAZ-NEXT: [[TMP8:%.*]] = fmul nnan ninf float [[TMP6]], [[TMP4]]
|
|
; DAZ-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]]
|
|
; DAZ-NEXT: [[MD_1ULP_NINF_NNAN:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]])
|
|
; DAZ-NEXT: store volatile float [[MD_1ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[MD_25ULP_NINF_NNAN:%.*]] = call nnan ninf float @llvm.amdgcn.fdiv.fast(float [[A]], float [[B]])
|
|
; DAZ-NEXT: store volatile float [[MD_25ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[TMP10:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; DAZ-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP10]], 0
|
|
; DAZ-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP10]], 1
|
|
; DAZ-NEXT: [[TMP13:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP11]])
|
|
; DAZ-NEXT: [[TMP14:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
|
|
; DAZ-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP14]], 0
|
|
; DAZ-NEXT: [[TMP16:%.*]] = extractvalue { float, i32 } [[TMP14]], 1
|
|
; DAZ-NEXT: [[TMP17:%.*]] = fmul ninf float [[TMP15]], [[TMP13]]
|
|
; DAZ-NEXT: [[TMP18:%.*]] = sub i32 [[TMP16]], [[TMP12]]
|
|
; DAZ-NEXT: [[MD_1ULP_NINF:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP18]])
|
|
; DAZ-NEXT: store volatile float [[MD_1ULP_NINF]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[MD_25ULP_NINF:%.*]] = call ninf float @llvm.amdgcn.fdiv.fast(float [[A]], float [[B]])
|
|
; DAZ-NEXT: store volatile float [[MD_25ULP_NINF]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; DAZ-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0
|
|
; DAZ-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP19]], 1
|
|
; DAZ-NEXT: [[TMP22:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP20]])
|
|
; DAZ-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
|
|
; DAZ-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0
|
|
; DAZ-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP23]], 1
|
|
; DAZ-NEXT: [[TMP26:%.*]] = fmul nnan float [[TMP24]], [[TMP22]]
|
|
; DAZ-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]]
|
|
; DAZ-NEXT: [[MD_1ULP_NNAN:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]])
|
|
; DAZ-NEXT: store volatile float [[MD_1ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[MD_25ULP_NNAN:%.*]] = call nnan float @llvm.amdgcn.fdiv.fast(float [[A]], float [[B]])
|
|
; DAZ-NEXT: store volatile float [[MD_25ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: ret void
|
|
;
|
|
%md.1ulp.ninf.nnan = fdiv ninf nnan float %a, %b, !fpmath !2
|
|
store volatile float %md.1ulp.ninf.nnan, ptr addrspace(1) %out, align 4
|
|
|
|
%md.25ulp.ninf.nnan = fdiv ninf nnan float %a, %b, !fpmath !0
|
|
store volatile float %md.25ulp.ninf.nnan, ptr addrspace(1) %out, align 4
|
|
|
|
%md.1ulp.ninf = fdiv ninf float %a, %b, !fpmath !2
|
|
store volatile float %md.1ulp.ninf, ptr addrspace(1) %out, align 4
|
|
|
|
%md.25ulp.ninf = fdiv ninf float %a, %b, !fpmath !0
|
|
store volatile float %md.25ulp.ninf, ptr addrspace(1) %out, align 4
|
|
|
|
%md.1ulp.nnan = fdiv nnan float %a, %b, !fpmath !2
|
|
store volatile float %md.1ulp.nnan, ptr addrspace(1) %out, align 4
|
|
|
|
%md.25ulp.nnan = fdiv nnan float %a, %b, !fpmath !0
|
|
store volatile float %md.25ulp.nnan, ptr addrspace(1) %out, align 4
|
|
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @rcp_fdiv_f32_fpmath(ptr addrspace(1) %out, float %x) {
|
|
; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_fpmath
|
|
; IEEE-GOODFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
|
|
; IEEE-GOODFREXP-NEXT: [[NO_MD:%.*]] = fdiv float 1.000000e+00, [[X]]
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-GOODFREXP-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP6]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP7]])
|
|
; IEEE-GOODFREXP-NEXT: [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]])
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float 1.000000e+00, [[X]], !fpmath !1
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[AFN_NO_MD:%.*]] = fdiv afn float 1.000000e+00, [[X]]
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[AFN_25ULP:%.*]] = fdiv afn float 1.000000e+00, [[X]], !fpmath !0
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast float 1.000000e+00, [[X]]
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[FAST_25ULP:%.*]] = fdiv fast float 1.000000e+00, [[X]], !fpmath !0
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fneg float [[X]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP11]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP13]])
|
|
; IEEE-GOODFREXP-NEXT: [[NEG_MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]])
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[NEG_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = fneg float [[X]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP17]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP18]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = sub i32 0, [[TMP20]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP19]])
|
|
; IEEE-GOODFREXP-NEXT: [[NEG_MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP21]])
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[NEG_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[NEG_AFN_NO_MD:%.*]] = fdiv afn float -1.000000e+00, [[X]]
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[NEG_AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[NEG_AFN_25ULP:%.*]] = fdiv afn float -1.000000e+00, [[X]], !fpmath !0
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[NEG_AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[NEG_FAST_NO_MD:%.*]] = fdiv fast float -1.000000e+00, [[X]]
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[NEG_FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: ret void
|
|
;
|
|
; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_fpmath
|
|
; IEEE-BADFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
|
|
; IEEE-BADFREXP-NEXT: [[NO_MD:%.*]] = fdiv float 1.000000e+00, [[X]]
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[X]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-BADFREXP-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[X]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP7]])
|
|
; IEEE-BADFREXP-NEXT: [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]])
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float 1.000000e+00, [[X]], !fpmath !1
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[AFN_NO_MD:%.*]] = fdiv afn float 1.000000e+00, [[X]]
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[AFN_25ULP:%.*]] = fdiv afn float 1.000000e+00, [[X]], !fpmath !0
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast float 1.000000e+00, [[X]]
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[FAST_25ULP:%.*]] = fdiv fast float 1.000000e+00, [[X]], !fpmath !0
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fneg float [[X]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP11]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP11]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP13]])
|
|
; IEEE-BADFREXP-NEXT: [[NEG_MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]])
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[NEG_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = fneg float [[X]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP17]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP17]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = sub i32 0, [[TMP20]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP19]])
|
|
; IEEE-BADFREXP-NEXT: [[NEG_MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP21]])
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[NEG_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[NEG_AFN_NO_MD:%.*]] = fdiv afn float -1.000000e+00, [[X]]
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[NEG_AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[NEG_AFN_25ULP:%.*]] = fdiv afn float -1.000000e+00, [[X]], !fpmath !0
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[NEG_AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[NEG_FAST_NO_MD:%.*]] = fdiv fast float -1.000000e+00, [[X]]
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[NEG_FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: ret void
|
|
;
|
|
; DAZ-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_fpmath
|
|
; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
|
|
; DAZ-NEXT: [[NO_MD:%.*]] = fdiv float 1.000000e+00, [[X]]
|
|
; DAZ-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[MD_1ULP:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[X]])
|
|
; DAZ-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[MD_25ULP:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[X]])
|
|
; DAZ-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float 1.000000e+00, [[X]], !fpmath !1
|
|
; DAZ-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[AFN_NO_MD:%.*]] = fdiv afn float 1.000000e+00, [[X]]
|
|
; DAZ-NEXT: store volatile float [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[AFN_25ULP:%.*]] = fdiv afn float 1.000000e+00, [[X]], !fpmath !0
|
|
; DAZ-NEXT: store volatile float [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast float 1.000000e+00, [[X]]
|
|
; DAZ-NEXT: store volatile float [[FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[FAST_25ULP:%.*]] = fdiv fast float 1.000000e+00, [[X]], !fpmath !0
|
|
; DAZ-NEXT: store volatile float [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[TMP1:%.*]] = fneg float [[X]]
|
|
; DAZ-NEXT: [[NEG_MD_1ULP:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP1]])
|
|
; DAZ-NEXT: store volatile float [[NEG_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[TMP2:%.*]] = fneg float [[X]]
|
|
; DAZ-NEXT: [[NEG_MD_25ULP:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; DAZ-NEXT: store volatile float [[NEG_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[NEG_AFN_NO_MD:%.*]] = fdiv afn float -1.000000e+00, [[X]]
|
|
; DAZ-NEXT: store volatile float [[NEG_AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[NEG_AFN_25ULP:%.*]] = fdiv afn float -1.000000e+00, [[X]], !fpmath !0
|
|
; DAZ-NEXT: store volatile float [[NEG_AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[NEG_FAST_NO_MD:%.*]] = fdiv fast float -1.000000e+00, [[X]]
|
|
; DAZ-NEXT: store volatile float [[NEG_FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: ret void
|
|
;
|
|
%no.md = fdiv float 1.000000e+00, %x
|
|
store volatile float %no.md, ptr addrspace(1) %out, align 4
|
|
%md.1ulp = fdiv float 1.000000e+00, %x, !fpmath !2
|
|
store volatile float %md.1ulp, ptr addrspace(1) %out, align 4
|
|
%md.25ulp = fdiv float 1.000000e+00, %x, !fpmath !0
|
|
store volatile float %md.25ulp, ptr addrspace(1) %out, align 4
|
|
%md.half.ulp = fdiv float 1.000000e+00, %x, !fpmath !1
|
|
store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4
|
|
%afn.no.md = fdiv afn float 1.000000e+00, %x
|
|
store volatile float %afn.no.md, ptr addrspace(1) %out, align 4
|
|
%afn.25ulp = fdiv afn float 1.000000e+00, %x, !fpmath !0
|
|
store volatile float %afn.25ulp, ptr addrspace(1) %out, align 4
|
|
%fast.no.md = fdiv fast float 1.000000e+00, %x
|
|
store volatile float %fast.no.md, ptr addrspace(1) %out, align 4
|
|
%fast.25ulp = fdiv fast float 1.000000e+00, %x, !fpmath !0
|
|
store volatile float %fast.25ulp, ptr addrspace(1) %out, align 4
|
|
%neg.md.1ulp = fdiv float -1.000000e+00, %x, !fpmath !2
|
|
store volatile float %neg.md.1ulp, ptr addrspace(1) %out, align 4
|
|
%neg.md.25ulp = fdiv float -1.000000e+00, %x, !fpmath !0
|
|
store volatile float %neg.md.25ulp, ptr addrspace(1) %out, align 4
|
|
%neg.afn.no.md = fdiv afn float -1.000000e+00, %x
|
|
store volatile float %neg.afn.no.md, ptr addrspace(1) %out, align 4
|
|
%neg.afn.25ulp = fdiv afn float -1.000000e+00, %x, !fpmath !0
|
|
store volatile float %neg.afn.25ulp, ptr addrspace(1) %out, align 4
|
|
%neg.fast.no.md = fdiv fast float -1.000000e+00, %x
|
|
store volatile float %neg.fast.no.md, ptr addrspace(1) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @rcp_fdiv_f32_fpmath_flags(ptr addrspace(1) %out, float %x) {
|
|
; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_fpmath_flags
|
|
; IEEE-GOODFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
|
|
; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NINF_NNAN:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP6]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP7]])
|
|
; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NINF:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]])
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NINF]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP11]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP11]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = sub i32 0, [[TMP13]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP12]])
|
|
; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NNAN:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP15]], i32 [[TMP14]])
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = extractvalue { float, i32 } [[TMP16]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = extractvalue { float, i32 } [[TMP16]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = sub i32 0, [[TMP18]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = call nsz float @llvm.amdgcn.rcp.f32(float [[TMP17]])
|
|
; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NSZ:%.*]] = call nsz float @llvm.ldexp.f32.i32(float [[TMP20]], i32 [[TMP19]])
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NSZ]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: ret void
|
|
;
|
|
; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_fpmath_flags
|
|
; IEEE-BADFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
|
|
; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[X]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-BADFREXP-NEXT: [[MD_1ULP_NINF_NNAN:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[X]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP7]])
|
|
; IEEE-BADFREXP-NEXT: [[MD_1ULP_NINF:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]])
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NINF]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP11]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[X]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = sub i32 0, [[TMP13]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP12]])
|
|
; IEEE-BADFREXP-NEXT: [[MD_1ULP_NNAN:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP15]], i32 [[TMP14]])
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = extractvalue { float, i32 } [[TMP16]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[X]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = sub i32 0, [[TMP18]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = call nsz float @llvm.amdgcn.rcp.f32(float [[TMP17]])
|
|
; IEEE-BADFREXP-NEXT: [[MD_1ULP_NSZ:%.*]] = call nsz float @llvm.ldexp.f32.i32(float [[TMP20]], i32 [[TMP19]])
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NSZ]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: ret void
|
|
;
|
|
; DAZ-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_fpmath_flags
|
|
; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
|
|
; DAZ-NEXT: [[MD_1ULP_NINF_NNAN:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[X]])
|
|
; DAZ-NEXT: store volatile float [[MD_1ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[MD_1ULP_NINF:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[X]])
|
|
; DAZ-NEXT: store volatile float [[MD_1ULP_NINF]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[MD_1ULP_NNAN:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[X]])
|
|
; DAZ-NEXT: store volatile float [[MD_1ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[MD_1ULP_NSZ:%.*]] = call nsz float @llvm.amdgcn.rcp.f32(float [[X]])
|
|
; DAZ-NEXT: store volatile float [[MD_1ULP_NSZ]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: ret void
|
|
;
|
|
%md.1ulp.ninf.nnan = fdiv ninf nnan float 1.000000e+00, %x, !fpmath !2
|
|
store volatile float %md.1ulp.ninf.nnan, ptr addrspace(1) %out, align 4
|
|
|
|
%md.1ulp.ninf = fdiv ninf float 1.000000e+00, %x, !fpmath !2
|
|
store volatile float %md.1ulp.ninf, ptr addrspace(1) %out, align 4
|
|
|
|
%md.1ulp.nnan = fdiv nnan float 1.000000e+00, %x, !fpmath !2
|
|
store volatile float %md.1ulp.nnan, ptr addrspace(1) %out, align 4
|
|
|
|
%md.1ulp.nsz = fdiv nsz float 1.000000e+00, %x, !fpmath !2
|
|
store volatile float %md.1ulp.nsz, ptr addrspace(1) %out, align 4
|
|
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @rcp_fdiv_f32_knownfinite(ptr addrspace(1) %out,
|
|
; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_knownfinite
|
|
; IEEE-GOODFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(nan) [[NO_NAN:%.*]], float nofpclass(nan) [[NO_INF:%.*]], float nofpclass(nan inf) [[NO_INF_NAN:%.*]]) #[[ATTR1]] {
|
|
; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_NAN]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NO_NAN:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NO_NAN]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_INF]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP6]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP7]])
|
|
; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NO_INF:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]])
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NO_INF]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_INF_NAN]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP11]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP11]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = sub i32 0, [[TMP13]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP12]])
|
|
; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NO_INF_NAN:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP15]], i32 [[TMP14]])
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NO_INF_NAN]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: ret void
|
|
;
|
|
; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_knownfinite
|
|
; IEEE-BADFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(nan) [[NO_NAN:%.*]], float nofpclass(nan) [[NO_INF:%.*]], float nofpclass(nan inf) [[NO_INF_NAN:%.*]]) #[[ATTR1]] {
|
|
; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_NAN]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[NO_NAN]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-BADFREXP-NEXT: [[MD_1ULP_NO_NAN:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NO_NAN]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_INF]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[NO_INF]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP7]])
|
|
; IEEE-BADFREXP-NEXT: [[MD_1ULP_NO_INF:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]])
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NO_INF]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_INF_NAN]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP11]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[NO_INF_NAN]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = sub i32 0, [[TMP13]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP12]])
|
|
; IEEE-BADFREXP-NEXT: [[MD_1ULP_NO_INF_NAN:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP15]], i32 [[TMP14]])
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NO_INF_NAN]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: ret void
|
|
;
|
|
; DAZ-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_knownfinite
|
|
; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(nan) [[NO_NAN:%.*]], float nofpclass(nan) [[NO_INF:%.*]], float nofpclass(nan inf) [[NO_INF_NAN:%.*]]) #[[ATTR1]] {
|
|
; DAZ-NEXT: [[MD_1ULP_NO_NAN:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[NO_NAN]])
|
|
; DAZ-NEXT: store volatile float [[MD_1ULP_NO_NAN]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[MD_1ULP_NO_INF:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[NO_INF]])
|
|
; DAZ-NEXT: store volatile float [[MD_1ULP_NO_INF]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[MD_1ULP_NO_INF_NAN:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[NO_INF_NAN]])
|
|
; DAZ-NEXT: store volatile float [[MD_1ULP_NO_INF_NAN]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: ret void
|
|
;
|
|
float nofpclass(nan) %no.nan,
|
|
float nofpclass(nan) %no.inf,
|
|
float nofpclass(inf nan) %no.inf.nan) {
|
|
%md.1ulp.no.nan = fdiv float 1.000000e+00, %no.nan, !fpmath !2
|
|
store volatile float %md.1ulp.no.nan, ptr addrspace(1) %out, align 4
|
|
|
|
%md.1ulp.no.inf = fdiv float 1.000000e+00, %no.inf, !fpmath !2
|
|
store volatile float %md.1ulp.no.inf, ptr addrspace(1) %out, align 4
|
|
|
|
%md.1ulp.no.inf.nan = fdiv float 1.000000e+00, %no.inf.nan, !fpmath !2
|
|
store volatile float %md.1ulp.no.inf.nan, ptr addrspace(1) %out, align 4
|
|
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @rcp_fdiv_f32_nozero(ptr addrspace(1) %out,
|
|
; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_nozero
|
|
; IEEE-GOODFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(zero) [[NO_ZERO:%.*]], float nofpclass(zero sub) [[NO_ZERO_SUB:%.*]]) #[[ATTR1]] {
|
|
; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_ZERO]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NO_ZERO:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NO_ZERO]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_ZERO_SUB]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP6]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP7]])
|
|
; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NO_ZERO_SUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]])
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NO_ZERO_SUB]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: ret void
|
|
;
|
|
; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_nozero
|
|
; IEEE-BADFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(zero) [[NO_ZERO:%.*]], float nofpclass(zero sub) [[NO_ZERO_SUB:%.*]]) #[[ATTR1]] {
|
|
; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_ZERO]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[NO_ZERO]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-BADFREXP-NEXT: [[MD_1ULP_NO_ZERO:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NO_ZERO]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_ZERO_SUB]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[NO_ZERO_SUB]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP7]])
|
|
; IEEE-BADFREXP-NEXT: [[MD_1ULP_NO_ZERO_SUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]])
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NO_ZERO_SUB]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: ret void
|
|
;
|
|
; DAZ-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_nozero
|
|
; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(zero) [[NO_ZERO:%.*]], float nofpclass(zero sub) [[NO_ZERO_SUB:%.*]]) #[[ATTR1]] {
|
|
; DAZ-NEXT: [[MD_1ULP_NO_ZERO:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[NO_ZERO]])
|
|
; DAZ-NEXT: store volatile float [[MD_1ULP_NO_ZERO]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[MD_1ULP_NO_ZERO_SUB:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[NO_ZERO_SUB]])
|
|
; DAZ-NEXT: store volatile float [[MD_1ULP_NO_ZERO_SUB]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: ret void
|
|
;
|
|
float nofpclass(zero) %no.zero,
|
|
float nofpclass(zero sub) %no.zero.sub) {
|
|
%md.1ulp.no.zero = fdiv float 1.000000e+00, %no.zero, !fpmath !2
|
|
store volatile float %md.1ulp.no.zero, ptr addrspace(1) %out, align 4
|
|
|
|
%md.1ulp.no.zero.sub = fdiv float 1.000000e+00, %no.zero.sub, !fpmath !2
|
|
store volatile float %md.1ulp.no.zero.sub, ptr addrspace(1) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @rcp_fdiv_f32_nosub(ptr addrspace(1) %out,
|
|
; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_nosub
|
|
; IEEE-GOODFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(sub) [[NO_SUB:%.*]], float nofpclass(nsub) [[NO_NSUB:%.*]], float nofpclass(psub) [[NO_PSUB:%.*]]) #[[ATTR1]] {
|
|
; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_SUB]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NO_SUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NO_SUB]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_NSUB]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP6]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP7]])
|
|
; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NO_NSUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]])
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NO_NSUB]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_PSUB]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP11]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP11]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = sub i32 0, [[TMP13]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP12]])
|
|
; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NO_PSUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP15]], i32 [[TMP14]])
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NO_PSUB]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: ret void
|
|
;
|
|
; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_nosub
|
|
; IEEE-BADFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(sub) [[NO_SUB:%.*]], float nofpclass(nsub) [[NO_NSUB:%.*]], float nofpclass(psub) [[NO_PSUB:%.*]]) #[[ATTR1]] {
|
|
; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_SUB]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[NO_SUB]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-BADFREXP-NEXT: [[MD_1ULP_NO_SUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NO_SUB]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_NSUB]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[NO_NSUB]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP7]])
|
|
; IEEE-BADFREXP-NEXT: [[MD_1ULP_NO_NSUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]])
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NO_NSUB]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_PSUB]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP11]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[NO_PSUB]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = sub i32 0, [[TMP13]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP12]])
|
|
; IEEE-BADFREXP-NEXT: [[MD_1ULP_NO_PSUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP15]], i32 [[TMP14]])
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NO_PSUB]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: ret void
|
|
;
|
|
; DAZ-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_nosub
|
|
; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(sub) [[NO_SUB:%.*]], float nofpclass(nsub) [[NO_NSUB:%.*]], float nofpclass(psub) [[NO_PSUB:%.*]]) #[[ATTR1]] {
|
|
; DAZ-NEXT: [[MD_1ULP_NO_SUB:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[NO_SUB]])
|
|
; DAZ-NEXT: store volatile float [[MD_1ULP_NO_SUB]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[MD_1ULP_NO_NSUB:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[NO_NSUB]])
|
|
; DAZ-NEXT: store volatile float [[MD_1ULP_NO_NSUB]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[MD_1ULP_NO_PSUB:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[NO_PSUB]])
|
|
; DAZ-NEXT: store volatile float [[MD_1ULP_NO_PSUB]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: ret void
|
|
;
|
|
float nofpclass(sub) %no.sub,
|
|
float nofpclass(nsub) %no.nsub,
|
|
float nofpclass(psub) %no.psub) {
|
|
%md.1ulp.no.sub = fdiv float 1.000000e+00, %no.sub, !fpmath !2
|
|
store volatile float %md.1ulp.no.sub, ptr addrspace(1) %out, align 4
|
|
|
|
%md.1ulp.no.nsub = fdiv float 1.000000e+00, %no.nsub, !fpmath !2
|
|
store volatile float %md.1ulp.no.nsub, ptr addrspace(1) %out, align 4
|
|
|
|
%md.1ulp.no.psub = fdiv float 1.000000e+00, %no.psub, !fpmath !2
|
|
store volatile float %md.1ulp.no.psub, ptr addrspace(1) %out, align 4
|
|
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @rcp_fdiv_f32_assume_nosub(ptr addrspace(1) %out, float %x) {
|
|
; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_assume_nosub
|
|
; IEEE-GOODFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
|
|
; IEEE-GOODFREXP-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]])
|
|
; IEEE-GOODFREXP-NEXT: [[IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_X]], 0x3810000000000000
|
|
; IEEE-GOODFREXP-NEXT: call void @llvm.assume(i1 [[IS_NOT_SUBNORMAL]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NO_SUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NO_SUB]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: ret void
|
|
;
|
|
; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_assume_nosub
|
|
; IEEE-BADFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
|
|
; IEEE-BADFREXP-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]])
|
|
; IEEE-BADFREXP-NEXT: [[IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_X]], 0x3810000000000000
|
|
; IEEE-BADFREXP-NEXT: call void @llvm.assume(i1 [[IS_NOT_SUBNORMAL]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[X]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-BADFREXP-NEXT: [[MD_1ULP_NO_SUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NO_SUB]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: ret void
|
|
;
|
|
; DAZ-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_assume_nosub
|
|
; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
|
|
; DAZ-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]])
|
|
; DAZ-NEXT: [[IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_X]], 0x3810000000000000
|
|
; DAZ-NEXT: call void @llvm.assume(i1 [[IS_NOT_SUBNORMAL]])
|
|
; DAZ-NEXT: [[MD_1ULP_NO_SUB:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[X]])
|
|
; DAZ-NEXT: store volatile float [[MD_1ULP_NO_SUB]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: ret void
|
|
;
|
|
%fabs.x = call float @llvm.fabs.f32(float %x)
|
|
%is.not.subnormal = fcmp oge float %fabs.x, 0x3810000000000000
|
|
call void @llvm.assume(i1 %is.not.subnormal)
|
|
%md.1ulp.no.sub = fdiv float 1.000000e+00, %x, !fpmath !2
|
|
store volatile float %md.1ulp.no.sub, ptr addrspace(1) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
; Test if we have an assumption on the output that it's not denormal.
|
|
define amdgpu_kernel void @rcp_fdiv_f32_assume_nosub_assume_result_nosub(ptr addrspace(1) %out, float %x) {
|
|
; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_assume_nosub_assume_result_nosub
|
|
; IEEE-GOODFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
|
|
; IEEE-GOODFREXP-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]])
|
|
; IEEE-GOODFREXP-NEXT: [[IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_X]], 0x3810000000000000
|
|
; IEEE-GOODFREXP-NEXT: call void @llvm.assume(i1 [[IS_NOT_SUBNORMAL]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NO_SUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
|
|
; IEEE-GOODFREXP-NEXT: [[FABS_RESULT:%.*]] = call float @llvm.fabs.f32(float [[MD_1ULP_NO_SUB]])
|
|
; IEEE-GOODFREXP-NEXT: [[RESULT_IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_RESULT]], 0x3810000000000000
|
|
; IEEE-GOODFREXP-NEXT: call void @llvm.assume(i1 [[RESULT_IS_NOT_SUBNORMAL]])
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NO_SUB]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: ret void
|
|
;
|
|
; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_assume_nosub_assume_result_nosub
|
|
; IEEE-BADFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
|
|
; IEEE-BADFREXP-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]])
|
|
; IEEE-BADFREXP-NEXT: [[IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_X]], 0x3810000000000000
|
|
; IEEE-BADFREXP-NEXT: call void @llvm.assume(i1 [[IS_NOT_SUBNORMAL]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[X]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-BADFREXP-NEXT: [[MD_1ULP_NO_SUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
|
|
; IEEE-BADFREXP-NEXT: [[FABS_RESULT:%.*]] = call float @llvm.fabs.f32(float [[MD_1ULP_NO_SUB]])
|
|
; IEEE-BADFREXP-NEXT: [[RESULT_IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_RESULT]], 0x3810000000000000
|
|
; IEEE-BADFREXP-NEXT: call void @llvm.assume(i1 [[RESULT_IS_NOT_SUBNORMAL]])
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NO_SUB]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: ret void
|
|
;
|
|
; DAZ-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_assume_nosub_assume_result_nosub
|
|
; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
|
|
; DAZ-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]])
|
|
; DAZ-NEXT: [[IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_X]], 0x3810000000000000
|
|
; DAZ-NEXT: call void @llvm.assume(i1 [[IS_NOT_SUBNORMAL]])
|
|
; DAZ-NEXT: [[MD_1ULP_NO_SUB:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[X]])
|
|
; DAZ-NEXT: [[FABS_RESULT:%.*]] = call float @llvm.fabs.f32(float [[MD_1ULP_NO_SUB]])
|
|
; DAZ-NEXT: [[RESULT_IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_RESULT]], 0x3810000000000000
|
|
; DAZ-NEXT: call void @llvm.assume(i1 [[RESULT_IS_NOT_SUBNORMAL]])
|
|
; DAZ-NEXT: store volatile float [[MD_1ULP_NO_SUB]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: ret void
|
|
;
|
|
%fabs.x = call float @llvm.fabs.f32(float %x)
|
|
%is.not.subnormal = fcmp oge float %fabs.x, 0x3810000000000000
|
|
call void @llvm.assume(i1 %is.not.subnormal)
|
|
%md.1ulp.no.sub = fdiv float 1.000000e+00, %x, !fpmath !2
|
|
|
|
%fabs.result = call float @llvm.fabs.f32(float %md.1ulp.no.sub)
|
|
%result.is.not.subnormal = fcmp oge float %fabs.result, 0x3810000000000000
|
|
call void @llvm.assume(i1 %result.is.not.subnormal)
|
|
store volatile float %md.1ulp.no.sub, ptr addrspace(1) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_flags(ptr addrspace(1) %out, <2 x float> %x) {
|
|
; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_flags
|
|
; IEEE-GOODFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] {
|
|
; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP1]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractvalue { float, i32 } [[TMP3]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = extractvalue { float, i32 } [[TMP3]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = sub i32 0, [[TMP5]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP4]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP7]], i32 [[TMP6]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP2]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = extractvalue { float, i32 } [[TMP9]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP9]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = sub i32 0, [[TMP11]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP10]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP13]], i32 [[TMP12]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NINF_NNAN:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP14]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[X]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[X]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP16]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP18]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = sub i32 0, [[TMP20]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP19]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP21]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP17]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP24]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = extractvalue { float, i32 } [[TMP24]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = sub i32 0, [[TMP26]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP25]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP28]], i32 [[TMP27]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = insertelement <2 x float> poison, float [[TMP23]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NINF:%.*]] = insertelement <2 x float> [[TMP30]], float [[TMP29]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_NINF]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = extractelement <2 x float> [[X]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = extractelement <2 x float> [[X]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP31]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = extractvalue { float, i32 } [[TMP33]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = extractvalue { float, i32 } [[TMP33]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = sub i32 0, [[TMP35]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP34]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP37]], i32 [[TMP36]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP32]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP40:%.*]] = extractvalue { float, i32 } [[TMP39]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP41:%.*]] = extractvalue { float, i32 } [[TMP39]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP42:%.*]] = sub i32 0, [[TMP41]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP43:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP40]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP44:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP43]], i32 [[TMP42]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP45:%.*]] = insertelement <2 x float> poison, float [[TMP38]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NNAN:%.*]] = insertelement <2 x float> [[TMP45]], float [[TMP44]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[TMP46:%.*]] = extractelement <2 x float> [[X]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP47:%.*]] = extractelement <2 x float> [[X]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP48:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP46]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP49:%.*]] = extractvalue { float, i32 } [[TMP48]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP50:%.*]] = extractvalue { float, i32 } [[TMP48]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP51:%.*]] = sub i32 0, [[TMP50]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP52:%.*]] = call nsz float @llvm.amdgcn.rcp.f32(float [[TMP49]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP53:%.*]] = call nsz float @llvm.ldexp.f32.i32(float [[TMP52]], i32 [[TMP51]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP54:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP47]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP55:%.*]] = extractvalue { float, i32 } [[TMP54]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP56:%.*]] = extractvalue { float, i32 } [[TMP54]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP57:%.*]] = sub i32 0, [[TMP56]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP58:%.*]] = call nsz float @llvm.amdgcn.rcp.f32(float [[TMP55]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP59:%.*]] = call nsz float @llvm.ldexp.f32.i32(float [[TMP58]], i32 [[TMP57]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP60:%.*]] = insertelement <2 x float> poison, float [[TMP53]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NSZ:%.*]] = insertelement <2 x float> [[TMP60]], float [[TMP59]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_NSZ]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: ret void
|
|
;
|
|
; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_flags
|
|
; IEEE-BADFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] {
|
|
; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP1]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractvalue { float, i32 } [[TMP3]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP1]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = sub i32 0, [[TMP5]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP4]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP7]], i32 [[TMP6]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP2]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = extractvalue { float, i32 } [[TMP9]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP2]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = sub i32 0, [[TMP11]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP10]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP13]], i32 [[TMP12]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[MD_1ULP_NINF_NNAN:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP14]], i64 1
|
|
; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[X]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[X]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP16]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP16]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = sub i32 0, [[TMP20]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP19]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP21]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP17]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP24]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP17]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = sub i32 0, [[TMP26]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP25]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP28]], i32 [[TMP27]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = insertelement <2 x float> poison, float [[TMP23]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[MD_1ULP_NINF:%.*]] = insertelement <2 x float> [[TMP30]], float [[TMP29]], i64 1
|
|
; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_NINF]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = extractelement <2 x float> [[X]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = extractelement <2 x float> [[X]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP31]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = extractvalue { float, i32 } [[TMP33]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP31]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = sub i32 0, [[TMP35]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP34]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP37]], i32 [[TMP36]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP32]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP40:%.*]] = extractvalue { float, i32 } [[TMP39]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP41:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP32]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP42:%.*]] = sub i32 0, [[TMP41]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP43:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP40]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP44:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP43]], i32 [[TMP42]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP45:%.*]] = insertelement <2 x float> poison, float [[TMP38]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[MD_1ULP_NNAN:%.*]] = insertelement <2 x float> [[TMP45]], float [[TMP44]], i64 1
|
|
; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[TMP46:%.*]] = extractelement <2 x float> [[X]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP47:%.*]] = extractelement <2 x float> [[X]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP48:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP46]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP49:%.*]] = extractvalue { float, i32 } [[TMP48]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP50:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP46]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP51:%.*]] = sub i32 0, [[TMP50]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP52:%.*]] = call nsz float @llvm.amdgcn.rcp.f32(float [[TMP49]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP53:%.*]] = call nsz float @llvm.ldexp.f32.i32(float [[TMP52]], i32 [[TMP51]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP54:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP47]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP55:%.*]] = extractvalue { float, i32 } [[TMP54]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP56:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP47]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP57:%.*]] = sub i32 0, [[TMP56]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP58:%.*]] = call nsz float @llvm.amdgcn.rcp.f32(float [[TMP55]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP59:%.*]] = call nsz float @llvm.ldexp.f32.i32(float [[TMP58]], i32 [[TMP57]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP60:%.*]] = insertelement <2 x float> poison, float [[TMP53]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[MD_1ULP_NSZ:%.*]] = insertelement <2 x float> [[TMP60]], float [[TMP59]], i64 1
|
|
; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_NSZ]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: ret void
|
|
;
|
|
; DAZ-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_flags
|
|
; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] {
|
|
; DAZ-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 0
|
|
; DAZ-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X]], i64 1
|
|
; DAZ-NEXT: [[TMP3:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP1]])
|
|
; DAZ-NEXT: [[TMP4:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; DAZ-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0
|
|
; DAZ-NEXT: [[MD_1ULP_NINF_NNAN:%.*]] = insertelement <2 x float> [[TMP5]], float [[TMP4]], i64 1
|
|
; DAZ-NEXT: store volatile <2 x float> [[MD_1ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[X]], i64 0
|
|
; DAZ-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[X]], i64 1
|
|
; DAZ-NEXT: [[TMP8:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP6]])
|
|
; DAZ-NEXT: [[TMP9:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP7]])
|
|
; DAZ-NEXT: [[TMP10:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i64 0
|
|
; DAZ-NEXT: [[MD_1ULP_NINF:%.*]] = insertelement <2 x float> [[TMP10]], float [[TMP9]], i64 1
|
|
; DAZ-NEXT: store volatile <2 x float> [[MD_1ULP_NINF]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[X]], i64 0
|
|
; DAZ-NEXT: [[TMP12:%.*]] = extractelement <2 x float> [[X]], i64 1
|
|
; DAZ-NEXT: [[TMP13:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP11]])
|
|
; DAZ-NEXT: [[TMP14:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP12]])
|
|
; DAZ-NEXT: [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP13]], i64 0
|
|
; DAZ-NEXT: [[MD_1ULP_NNAN:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP14]], i64 1
|
|
; DAZ-NEXT: store volatile <2 x float> [[MD_1ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[X]], i64 0
|
|
; DAZ-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[X]], i64 1
|
|
; DAZ-NEXT: [[TMP18:%.*]] = call nsz float @llvm.amdgcn.rcp.f32(float [[TMP16]])
|
|
; DAZ-NEXT: [[TMP19:%.*]] = call nsz float @llvm.amdgcn.rcp.f32(float [[TMP17]])
|
|
; DAZ-NEXT: [[TMP20:%.*]] = insertelement <2 x float> poison, float [[TMP18]], i64 0
|
|
; DAZ-NEXT: [[MD_1ULP_NSZ:%.*]] = insertelement <2 x float> [[TMP20]], float [[TMP19]], i64 1
|
|
; DAZ-NEXT: store volatile <2 x float> [[MD_1ULP_NSZ]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: ret void
|
|
;
|
|
%md.1ulp.ninf.nnan = fdiv ninf nnan <2 x float> <float 1.0, float 1.0>, %x, !fpmath !2
|
|
store volatile <2 x float> %md.1ulp.ninf.nnan, ptr addrspace(1) %out, align 4
|
|
|
|
%md.1ulp.ninf = fdiv ninf <2 x float> <float 1.0, float 1.0>, %x, !fpmath !2
|
|
store volatile <2 x float> %md.1ulp.ninf, ptr addrspace(1) %out, align 4
|
|
|
|
%md.1ulp.nnan = fdiv nnan <2 x float> <float 1.0, float 1.0>, %x, !fpmath !2
|
|
store volatile <2 x float> %md.1ulp.nnan, ptr addrspace(1) %out, align 4
|
|
|
|
%md.1ulp.nsz = fdiv nsz <2 x float> <float 1.0, float 1.0>, %x, !fpmath !2
|
|
store volatile <2 x float> %md.1ulp.nsz, ptr addrspace(1) %out, align 4
|
|
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @fdiv_fpmath_f32_vector(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b) {
|
|
; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_vector
|
|
; IEEE-GOODFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[A:%.*]], <2 x float> [[B:%.*]]) #[[ATTR1]] {
|
|
; IEEE-GOODFREXP-NEXT: [[NO_MD:%.*]] = fdiv <2 x float> [[A]], [[B]]
|
|
; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 8
|
|
; IEEE-GOODFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv <2 x float> [[A]], [[B]], !fpmath !1
|
|
; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 8
|
|
; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[A]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[A]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[B]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[B]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP6]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP1]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = extractvalue { float, i32 } [[TMP9]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP9]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = fmul float [[TMP10]], [[TMP8]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = sub i32 [[TMP11]], [[TMP7]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP12]], i32 [[TMP13]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = extractvalue { float, i32 } [[TMP15]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = extractvalue { float, i32 } [[TMP15]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP16]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP2]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP19]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = fmul float [[TMP20]], [[TMP18]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = sub i32 [[TMP21]], [[TMP17]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP23]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = insertelement <2 x float> poison, float [[TMP14]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[MD_1ULP:%.*]] = insertelement <2 x float> [[TMP25]], float [[TMP24]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 8
|
|
; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = extractelement <2 x float> [[A]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = extractelement <2 x float> [[A]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = extractelement <2 x float> [[B]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = extractelement <2 x float> [[B]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP28]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = extractvalue { float, i32 } [[TMP30]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = extractvalue { float, i32 } [[TMP30]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP31]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP26]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = extractvalue { float, i32 } [[TMP34]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = extractvalue { float, i32 } [[TMP34]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = fmul float [[TMP35]], [[TMP33]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = sub i32 [[TMP36]], [[TMP32]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP37]], i32 [[TMP38]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP40:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP29]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP41:%.*]] = extractvalue { float, i32 } [[TMP40]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP42:%.*]] = extractvalue { float, i32 } [[TMP40]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP43:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP41]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP44:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP27]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP45:%.*]] = extractvalue { float, i32 } [[TMP44]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP46:%.*]] = extractvalue { float, i32 } [[TMP44]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP47:%.*]] = fmul float [[TMP45]], [[TMP43]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP48:%.*]] = sub i32 [[TMP46]], [[TMP42]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP49:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP47]], i32 [[TMP48]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP50:%.*]] = insertelement <2 x float> poison, float [[TMP39]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[MD_25ULP:%.*]] = insertelement <2 x float> [[TMP50]], float [[TMP49]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 8
|
|
; IEEE-GOODFREXP-NEXT: ret void
|
|
;
|
|
; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_vector
|
|
; IEEE-BADFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[A:%.*]], <2 x float> [[B:%.*]]) #[[ATTR1]] {
|
|
; IEEE-BADFREXP-NEXT: [[NO_MD:%.*]] = fdiv <2 x float> [[A]], [[B]]
|
|
; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 8
|
|
; IEEE-BADFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv <2 x float> [[A]], [[B]], !fpmath !1
|
|
; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 8
|
|
; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[A]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[A]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[B]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[B]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP3]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP6]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP1]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = extractvalue { float, i32 } [[TMP9]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP1]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = fmul float [[TMP10]], [[TMP8]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = sub i32 [[TMP11]], [[TMP7]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP12]], i32 [[TMP13]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = extractvalue { float, i32 } [[TMP15]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP4]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP16]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP2]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP2]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = fmul float [[TMP20]], [[TMP18]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = sub i32 [[TMP21]], [[TMP17]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP23]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = insertelement <2 x float> poison, float [[TMP14]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[MD_1ULP:%.*]] = insertelement <2 x float> [[TMP25]], float [[TMP24]], i64 1
|
|
; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 8
|
|
; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = extractelement <2 x float> [[A]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = extractelement <2 x float> [[A]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = extractelement <2 x float> [[B]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = extractelement <2 x float> [[B]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP28]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = extractvalue { float, i32 } [[TMP30]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP28]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP31]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP26]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = extractvalue { float, i32 } [[TMP34]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP26]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = fmul float [[TMP35]], [[TMP33]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = sub i32 [[TMP36]], [[TMP32]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP37]], i32 [[TMP38]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP40:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP29]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP41:%.*]] = extractvalue { float, i32 } [[TMP40]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP42:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP29]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP43:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP41]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP44:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP27]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP45:%.*]] = extractvalue { float, i32 } [[TMP44]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP46:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP27]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP47:%.*]] = fmul float [[TMP45]], [[TMP43]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP48:%.*]] = sub i32 [[TMP46]], [[TMP42]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP49:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP47]], i32 [[TMP48]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP50:%.*]] = insertelement <2 x float> poison, float [[TMP39]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[MD_25ULP:%.*]] = insertelement <2 x float> [[TMP50]], float [[TMP49]], i64 1
|
|
; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 8
|
|
; IEEE-BADFREXP-NEXT: ret void
|
|
;
|
|
; DAZ-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_vector
|
|
; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[A:%.*]], <2 x float> [[B:%.*]]) #[[ATTR1]] {
|
|
; DAZ-NEXT: [[NO_MD:%.*]] = fdiv <2 x float> [[A]], [[B]]
|
|
; DAZ-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 8
|
|
; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = fdiv <2 x float> [[A]], [[B]], !fpmath !1
|
|
; DAZ-NEXT: store volatile <2 x float> [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 8
|
|
; DAZ-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[A]], i64 0
|
|
; DAZ-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[A]], i64 1
|
|
; DAZ-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[B]], i64 0
|
|
; DAZ-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[B]], i64 1
|
|
; DAZ-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
|
|
; DAZ-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
|
|
; DAZ-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1
|
|
; DAZ-NEXT: [[TMP8:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP6]])
|
|
; DAZ-NEXT: [[TMP9:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP1]])
|
|
; DAZ-NEXT: [[TMP10:%.*]] = extractvalue { float, i32 } [[TMP9]], 0
|
|
; DAZ-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP9]], 1
|
|
; DAZ-NEXT: [[TMP12:%.*]] = fmul float [[TMP10]], [[TMP8]]
|
|
; DAZ-NEXT: [[TMP13:%.*]] = sub i32 [[TMP11]], [[TMP7]]
|
|
; DAZ-NEXT: [[TMP14:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP12]], i32 [[TMP13]])
|
|
; DAZ-NEXT: [[TMP15:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
|
|
; DAZ-NEXT: [[TMP16:%.*]] = extractvalue { float, i32 } [[TMP15]], 0
|
|
; DAZ-NEXT: [[TMP17:%.*]] = extractvalue { float, i32 } [[TMP15]], 1
|
|
; DAZ-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP16]])
|
|
; DAZ-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP2]])
|
|
; DAZ-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0
|
|
; DAZ-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP19]], 1
|
|
; DAZ-NEXT: [[TMP22:%.*]] = fmul float [[TMP20]], [[TMP18]]
|
|
; DAZ-NEXT: [[TMP23:%.*]] = sub i32 [[TMP21]], [[TMP17]]
|
|
; DAZ-NEXT: [[TMP24:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP23]])
|
|
; DAZ-NEXT: [[TMP25:%.*]] = insertelement <2 x float> poison, float [[TMP14]], i64 0
|
|
; DAZ-NEXT: [[MD_1ULP:%.*]] = insertelement <2 x float> [[TMP25]], float [[TMP24]], i64 1
|
|
; DAZ-NEXT: store volatile <2 x float> [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 8
|
|
; DAZ-NEXT: [[TMP26:%.*]] = extractelement <2 x float> [[A]], i64 0
|
|
; DAZ-NEXT: [[TMP27:%.*]] = extractelement <2 x float> [[A]], i64 1
|
|
; DAZ-NEXT: [[TMP28:%.*]] = extractelement <2 x float> [[B]], i64 0
|
|
; DAZ-NEXT: [[TMP29:%.*]] = extractelement <2 x float> [[B]], i64 1
|
|
; DAZ-NEXT: [[TMP30:%.*]] = call float @llvm.amdgcn.fdiv.fast(float [[TMP26]], float [[TMP28]])
|
|
; DAZ-NEXT: [[TMP31:%.*]] = call float @llvm.amdgcn.fdiv.fast(float [[TMP27]], float [[TMP29]])
|
|
; DAZ-NEXT: [[TMP32:%.*]] = insertelement <2 x float> poison, float [[TMP30]], i64 0
|
|
; DAZ-NEXT: [[MD_25ULP:%.*]] = insertelement <2 x float> [[TMP32]], float [[TMP31]], i64 1
|
|
; DAZ-NEXT: store volatile <2 x float> [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 8
|
|
; DAZ-NEXT: ret void
|
|
;
|
|
%no.md = fdiv <2 x float> %a, %b
|
|
store volatile <2 x float> %no.md, ptr addrspace(1) %out, align 8
|
|
%md.half.ulp = fdiv <2 x float> %a, %b, !fpmath !1
|
|
store volatile <2 x float> %md.half.ulp, ptr addrspace(1) %out, align 8
|
|
%md.1ulp = fdiv <2 x float> %a, %b, !fpmath !2
|
|
store volatile <2 x float> %md.1ulp, ptr addrspace(1) %out, align 8
|
|
%md.25ulp = fdiv <2 x float> %a, %b, !fpmath !0
|
|
store volatile <2 x float> %md.25ulp, ptr addrspace(1) %out, align 8
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath(ptr addrspace(1) %out, <2 x float> %x) {
|
|
; CHECK-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath
|
|
; CHECK-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1:[0-9]+]] {
|
|
; CHECK-NEXT: [[NO_MD:%.*]] = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, [[X]]
|
|
; CHECK-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 8
|
|
; CHECK-NEXT: [[MD_HALF_ULP:%.*]] = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, [[X]], !fpmath !1
|
|
; CHECK-NEXT: store volatile <2 x float> [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 8
|
|
; CHECK-NEXT: [[AFN_NO_MD:%.*]] = fdiv afn <2 x float> <float 1.000000e+00, float 1.000000e+00>, [[X]]
|
|
; CHECK-NEXT: store volatile <2 x float> [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 8
|
|
; CHECK-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, [[X]]
|
|
; CHECK-NEXT: store volatile <2 x float> [[FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 8
|
|
; CHECK-NEXT: [[AFN_25ULP:%.*]] = fdiv afn <2 x float> <float 1.000000e+00, float 1.000000e+00>, [[X]], !fpmath !0
|
|
; CHECK-NEXT: store volatile <2 x float> [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 8
|
|
; CHECK-NEXT: [[FAST_25ULP:%.*]] = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, [[X]], !fpmath !0
|
|
; CHECK-NEXT: store volatile <2 x float> [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 8
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%no.md = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x
|
|
store volatile <2 x float> %no.md, ptr addrspace(1) %out, align 8
|
|
%md.half.ulp = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x, !fpmath !1
|
|
store volatile <2 x float> %md.half.ulp, ptr addrspace(1) %out, align 8
|
|
%afn.no.md = fdiv afn <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x
|
|
store volatile <2 x float> %afn.no.md, ptr addrspace(1) %out, align 8
|
|
%fast.no.md = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x
|
|
store volatile <2 x float> %fast.no.md, ptr addrspace(1) %out, align 8
|
|
%afn.25ulp = fdiv afn <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x, !fpmath !0
|
|
store volatile <2 x float> %afn.25ulp, ptr addrspace(1) %out, align 8
|
|
%fast.25ulp = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x, !fpmath !0
|
|
store volatile <2 x float> %fast.25ulp, ptr addrspace(1) %out, align 8
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @rcp_fdiv_f32_fpmath_vector_nonsplat(ptr addrspace(1) %out, <2 x float> %x) {
|
|
; CHECK-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_fpmath_vector_nonsplat
|
|
; CHECK-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: [[NO_MD:%.*]] = fdiv <2 x float> <float 1.000000e+00, float 2.000000e+00>, [[X]]
|
|
; CHECK-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 8
|
|
; CHECK-NEXT: [[AFN_NO_MD:%.*]] = fdiv afn <2 x float> <float 1.000000e+00, float 2.000000e+00>, [[X]]
|
|
; CHECK-NEXT: store volatile <2 x float> [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 8
|
|
; CHECK-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast <2 x float> <float 1.000000e+00, float 2.000000e+00>, [[X]]
|
|
; CHECK-NEXT: store volatile <2 x float> [[FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 8
|
|
; CHECK-NEXT: [[AFN_25ULP:%.*]] = fdiv afn <2 x float> <float 1.000000e+00, float 2.000000e+00>, [[X]], !fpmath !0
|
|
; CHECK-NEXT: store volatile <2 x float> [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 8
|
|
; CHECK-NEXT: [[FAST_25ULP:%.*]] = fdiv fast <2 x float> <float 1.000000e+00, float 2.000000e+00>, [[X]], !fpmath !0
|
|
; CHECK-NEXT: store volatile <2 x float> [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 8
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%no.md = fdiv <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x
|
|
store volatile <2 x float> %no.md, ptr addrspace(1) %out, align 8
|
|
%afn.no.md = fdiv afn <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x
|
|
store volatile <2 x float> %afn.no.md, ptr addrspace(1) %out, align 8
|
|
%fast.no.md = fdiv fast <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x
|
|
store volatile <2 x float> %fast.no.md, ptr addrspace(1) %out, align 8
|
|
%afn.25ulp = fdiv afn <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x, !fpmath !0
|
|
store volatile <2 x float> %afn.25ulp, ptr addrspace(1) %out, align 8
|
|
%fast.25ulp = fdiv fast <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x, !fpmath !0
|
|
store volatile <2 x float> %fast.25ulp, ptr addrspace(1) %out, align 8
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_partial_constant(ptr addrspace(1) %out, <2 x float> %x, <2 x float> %y) {
|
|
; CHECK-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_partial_constant
|
|
; CHECK-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: [[X_INSERT:%.*]] = insertelement <2 x float> [[X]], float 1.000000e+00, i32 0
|
|
; CHECK-NEXT: [[AFN_25ULP:%.*]] = fdiv afn <2 x float> [[X_INSERT]], [[Y]], !fpmath !0
|
|
; CHECK-NEXT: store volatile <2 x float> [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 8
|
|
; CHECK-NEXT: [[FAST_25ULP:%.*]] = fdiv fast <2 x float> [[X_INSERT]], [[Y]], !fpmath !0
|
|
; CHECK-NEXT: store volatile <2 x float> [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 8
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%x.insert = insertelement <2 x float> %x, float 1.000000e+00, i32 0
|
|
%afn.25ulp = fdiv afn <2 x float> %x.insert, %y, !fpmath !0
|
|
store volatile <2 x float> %afn.25ulp, ptr addrspace(1) %out, align 8
|
|
%fast.25ulp = fdiv fast <2 x float> %x.insert, %y, !fpmath !0
|
|
store volatile <2 x float> %fast.25ulp, ptr addrspace(1) %out, align 8
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_partial_constant_arcp(ptr addrspace(1) %out, <2 x float> %x, <2 x float> %y) {
|
|
; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_partial_constant_arcp
|
|
; IEEE-GOODFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) #[[ATTR1]] {
|
|
; IEEE-GOODFREXP-NEXT: [[X_INSERT:%.*]] = insertelement <2 x float> [[X]], float 1.000000e+00, i32 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X_INSERT]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X_INSERT]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[Y]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[Y]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = sub i32 0, [[TMP7]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP6]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP8]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fmul arcp float [[TMP1]], [[TMP10]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP13]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = fmul arcp float [[TMP2]], [[TMP17]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = insertelement <2 x float> poison, float [[TMP11]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[ARCP_25ULP:%.*]] = insertelement <2 x float> [[TMP19]], float [[TMP18]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[ARCP_25ULP]], ptr addrspace(1) [[OUT]], align 8
|
|
; IEEE-GOODFREXP-NEXT: ret void
|
|
;
|
|
; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_partial_constant_arcp
|
|
; IEEE-BADFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) #[[ATTR1]] {
|
|
; IEEE-BADFREXP-NEXT: [[X_INSERT:%.*]] = insertelement <2 x float> [[X]], float 1.000000e+00, i32 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X_INSERT]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X_INSERT]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[Y]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[Y]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP3]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = sub i32 0, [[TMP7]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP6]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP8]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fmul arcp float [[TMP1]], [[TMP10]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP4]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP13]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = fmul arcp float [[TMP2]], [[TMP17]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = insertelement <2 x float> poison, float [[TMP11]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[ARCP_25ULP:%.*]] = insertelement <2 x float> [[TMP19]], float [[TMP18]], i64 1
|
|
; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[ARCP_25ULP]], ptr addrspace(1) [[OUT]], align 8
|
|
; IEEE-BADFREXP-NEXT: ret void
|
|
;
|
|
; DAZ-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_partial_constant_arcp
|
|
; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) #[[ATTR1]] {
|
|
; DAZ-NEXT: [[X_INSERT:%.*]] = insertelement <2 x float> [[X]], float 1.000000e+00, i32 0
|
|
; DAZ-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X_INSERT]], i64 0
|
|
; DAZ-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X_INSERT]], i64 1
|
|
; DAZ-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[Y]], i64 0
|
|
; DAZ-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[Y]], i64 1
|
|
; DAZ-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP3]])
|
|
; DAZ-NEXT: [[TMP6:%.*]] = fmul arcp float [[TMP1]], [[TMP5]]
|
|
; DAZ-NEXT: [[TMP7:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP4]])
|
|
; DAZ-NEXT: [[TMP8:%.*]] = fmul arcp float [[TMP2]], [[TMP7]]
|
|
; DAZ-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i64 0
|
|
; DAZ-NEXT: [[ARCP_25ULP:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP8]], i64 1
|
|
; DAZ-NEXT: store volatile <2 x float> [[ARCP_25ULP]], ptr addrspace(1) [[OUT]], align 8
|
|
; DAZ-NEXT: ret void
|
|
;
|
|
%x.insert = insertelement <2 x float> %x, float 1.000000e+00, i32 0
|
|
%arcp.25ulp = fdiv arcp <2 x float> %x.insert, %y, !fpmath !0
|
|
store volatile <2 x float> %arcp.25ulp, ptr addrspace(1) %out, align 8
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @rsq_f32_fpmath(ptr addrspace(1) %out, float %x) {
|
|
; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rsq_f32_fpmath
|
|
; IEEE-GOODFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
|
|
; IEEE-GOODFREXP-NEXT: [[SQRT_X_NO_MD:%.*]] = call contract float @llvm.sqrt.f32(float [[X]])
|
|
; IEEE-GOODFREXP-NEXT: [[NO_MD:%.*]] = fdiv contract float 1.000000e+00, [[SQRT_X_NO_MD]]
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = fcmp contract olt float [[X]], 0x3810000000000000
|
|
; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = select contract i1 [[TMP1]], float 0x4170000000000000, float 1.000000e+00
|
|
; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = fmul contract float [[X]], [[TMP2]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP3]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = select contract i1 [[TMP1]], float 4.096000e+03, float 1.000000e+00
|
|
; IEEE-GOODFREXP-NEXT: [[MD_1ULP:%.*]] = fmul contract float [[TMP4]], [[TMP5]]
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[SQRT_MD_1ULP_MULTI_USE:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !2
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[SQRT_MD_1ULP_MULTI_USE]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_MD_1ULP_MULTI_USE]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP6]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP7]])
|
|
; IEEE-GOODFREXP-NEXT: [[MD_1ULP_MULTI_USE:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]])
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_MULTI_USE]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fcmp contract olt float [[X]], 0x3810000000000000
|
|
; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = select contract i1 [[TMP11]], float 0x4170000000000000, float 1.000000e+00
|
|
; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = fmul contract float [[X]], [[TMP12]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP13]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = select contract i1 [[TMP11]], float 4.096000e+03, float 1.000000e+00
|
|
; IEEE-GOODFREXP-NEXT: [[MD_25ULP:%.*]] = fmul contract float [[TMP14]], [[TMP15]]
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[SQRT_MD_HALF_ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !1
|
|
; IEEE-GOODFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv contract float 1.000000e+00, [[SQRT_MD_HALF_ULP]], !fpmath !1
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[SQRT_X_AFN_NO_MD:%.*]] = call contract afn float @llvm.sqrt.f32(float [[X]])
|
|
; IEEE-GOODFREXP-NEXT: [[AFN_NO_MD:%.*]] = fdiv contract afn float 1.000000e+00, [[SQRT_X_AFN_NO_MD]]
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[SQRT_X_AFN_25ULP:%.*]] = call contract afn float @llvm.sqrt.f32(float [[X]]), !fpmath !0
|
|
; IEEE-GOODFREXP-NEXT: [[AFN_25ULP:%.*]] = fdiv contract afn float 1.000000e+00, [[SQRT_X_AFN_25ULP]], !fpmath !0
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[SQRT_X_FAST_NO_MD:%.*]] = call fast float @llvm.sqrt.f32(float [[X]])
|
|
; IEEE-GOODFREXP-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast float 1.000000e+00, [[SQRT_X_FAST_NO_MD]]
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[SQRT_X_FAST_25ULP:%.*]] = call fast float @llvm.sqrt.f32(float [[X]]), !fpmath !0
|
|
; IEEE-GOODFREXP-NEXT: [[FAST_25ULP:%.*]] = fdiv fast float 1.000000e+00, [[SQRT_X_FAST_25ULP]], !fpmath !0
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = fcmp contract olt float [[X]], 0x3810000000000000
|
|
; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = select contract i1 [[TMP16]], float 0x4170000000000000, float 1.000000e+00
|
|
; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = fmul contract float [[X]], [[TMP17]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP18]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = select contract i1 [[TMP16]], float 4.096000e+03, float 1.000000e+00
|
|
; IEEE-GOODFREXP-NEXT: [[FDIV_OPENCL:%.*]] = fmul contract float [[TMP19]], [[TMP20]]
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = fcmp contract olt float [[X]], 0x3810000000000000
|
|
; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = select contract i1 [[TMP21]], float 0x4170000000000000, float 1.000000e+00
|
|
; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = fmul contract float [[X]], [[TMP22]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP23]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = select contract i1 [[TMP21]], float -4.096000e+03, float -1.000000e+00
|
|
; IEEE-GOODFREXP-NEXT: [[NEG_FDIV_OPENCL:%.*]] = fmul contract float [[TMP24]], [[TMP25]]
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[NEG_FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[SQRT_X_HALF_ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_X_HALF_ULP]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = extractvalue { float, i32 } [[TMP26]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = extractvalue { float, i32 } [[TMP26]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = sub i32 0, [[TMP28]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP27]])
|
|
; IEEE-GOODFREXP-NEXT: [[FDIV_SQRT_MISMATCH_MD0:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP30]], i32 [[TMP29]])
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[FDIV_SQRT_MISMATCH_MD0]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[SQRT_MISMATCH_MD1:%.*]] = call afn float @llvm.sqrt.f32(float [[X]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_MISMATCH_MD1]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = extractvalue { float, i32 } [[TMP31]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = extractvalue { float, i32 } [[TMP31]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = sub i32 0, [[TMP33]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP32]])
|
|
; IEEE-GOODFREXP-NEXT: [[FDIV_SQRT_MISMATCH_MD1:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP34]])
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[FDIV_SQRT_MISMATCH_MD1]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[SQRT_MISMATCH_MD2:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !3
|
|
; IEEE-GOODFREXP-NEXT: [[FDIV_SQRT_MISMATCH_MD2:%.*]] = fdiv contract afn float 1.000000e+00, [[SQRT_MISMATCH_MD2]]
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[FDIV_SQRT_MISMATCH_MD2]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: ret void
|
|
;
|
|
; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rsq_f32_fpmath
|
|
; IEEE-BADFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
|
|
; IEEE-BADFREXP-NEXT: [[SQRT_X_NO_MD:%.*]] = call contract float @llvm.sqrt.f32(float [[X]])
|
|
; IEEE-BADFREXP-NEXT: [[NO_MD:%.*]] = fdiv contract float 1.000000e+00, [[SQRT_X_NO_MD]]
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = fcmp contract olt float [[X]], 0x3810000000000000
|
|
; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = select contract i1 [[TMP1]], float 0x4170000000000000, float 1.000000e+00
|
|
; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = fmul contract float [[X]], [[TMP2]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP3]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = select contract i1 [[TMP1]], float 4.096000e+03, float 1.000000e+00
|
|
; IEEE-BADFREXP-NEXT: [[MD_1ULP:%.*]] = fmul contract float [[TMP4]], [[TMP5]]
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[SQRT_MD_1ULP_MULTI_USE:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !2
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[SQRT_MD_1ULP_MULTI_USE]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_MD_1ULP_MULTI_USE]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[SQRT_MD_1ULP_MULTI_USE]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP7]])
|
|
; IEEE-BADFREXP-NEXT: [[MD_1ULP_MULTI_USE:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]])
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_MULTI_USE]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fcmp contract olt float [[X]], 0x3810000000000000
|
|
; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = select contract i1 [[TMP11]], float 0x4170000000000000, float 1.000000e+00
|
|
; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = fmul contract float [[X]], [[TMP12]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP13]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = select contract i1 [[TMP11]], float 4.096000e+03, float 1.000000e+00
|
|
; IEEE-BADFREXP-NEXT: [[MD_25ULP:%.*]] = fmul contract float [[TMP14]], [[TMP15]]
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[SQRT_MD_HALF_ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !1
|
|
; IEEE-BADFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv contract float 1.000000e+00, [[SQRT_MD_HALF_ULP]], !fpmath !1
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[SQRT_X_AFN_NO_MD:%.*]] = call contract afn float @llvm.sqrt.f32(float [[X]])
|
|
; IEEE-BADFREXP-NEXT: [[AFN_NO_MD:%.*]] = fdiv contract afn float 1.000000e+00, [[SQRT_X_AFN_NO_MD]]
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[SQRT_X_AFN_25ULP:%.*]] = call contract afn float @llvm.sqrt.f32(float [[X]]), !fpmath !0
|
|
; IEEE-BADFREXP-NEXT: [[AFN_25ULP:%.*]] = fdiv contract afn float 1.000000e+00, [[SQRT_X_AFN_25ULP]], !fpmath !0
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[SQRT_X_FAST_NO_MD:%.*]] = call fast float @llvm.sqrt.f32(float [[X]])
|
|
; IEEE-BADFREXP-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast float 1.000000e+00, [[SQRT_X_FAST_NO_MD]]
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[SQRT_X_FAST_25ULP:%.*]] = call fast float @llvm.sqrt.f32(float [[X]]), !fpmath !0
|
|
; IEEE-BADFREXP-NEXT: [[FAST_25ULP:%.*]] = fdiv fast float 1.000000e+00, [[SQRT_X_FAST_25ULP]], !fpmath !0
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = fcmp contract olt float [[X]], 0x3810000000000000
|
|
; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = select contract i1 [[TMP16]], float 0x4170000000000000, float 1.000000e+00
|
|
; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = fmul contract float [[X]], [[TMP17]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP18]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = select contract i1 [[TMP16]], float 4.096000e+03, float 1.000000e+00
|
|
; IEEE-BADFREXP-NEXT: [[FDIV_OPENCL:%.*]] = fmul contract float [[TMP19]], [[TMP20]]
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = fcmp contract olt float [[X]], 0x3810000000000000
|
|
; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = select contract i1 [[TMP21]], float 0x4170000000000000, float 1.000000e+00
|
|
; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = fmul contract float [[X]], [[TMP22]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP23]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = select contract i1 [[TMP21]], float -4.096000e+03, float -1.000000e+00
|
|
; IEEE-BADFREXP-NEXT: [[NEG_FDIV_OPENCL:%.*]] = fmul contract float [[TMP24]], [[TMP25]]
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[NEG_FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[SQRT_X_HALF_ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !1
|
|
; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_X_HALF_ULP]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = extractvalue { float, i32 } [[TMP26]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[SQRT_X_HALF_ULP]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = sub i32 0, [[TMP28]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP27]])
|
|
; IEEE-BADFREXP-NEXT: [[FDIV_SQRT_MISMATCH_MD0:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP30]], i32 [[TMP29]])
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[FDIV_SQRT_MISMATCH_MD0]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[SQRT_MISMATCH_MD1:%.*]] = call afn float @llvm.sqrt.f32(float [[X]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_MISMATCH_MD1]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = extractvalue { float, i32 } [[TMP31]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[SQRT_MISMATCH_MD1]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = sub i32 0, [[TMP33]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP32]])
|
|
; IEEE-BADFREXP-NEXT: [[FDIV_SQRT_MISMATCH_MD1:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP34]])
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[FDIV_SQRT_MISMATCH_MD1]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[SQRT_MISMATCH_MD2:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !3
|
|
; IEEE-BADFREXP-NEXT: [[FDIV_SQRT_MISMATCH_MD2:%.*]] = fdiv contract afn float 1.000000e+00, [[SQRT_MISMATCH_MD2]]
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[FDIV_SQRT_MISMATCH_MD2]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: ret void
|
|
;
|
|
; DAZ-LABEL: define amdgpu_kernel void @rsq_f32_fpmath
|
|
; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
|
|
; DAZ-NEXT: [[SQRT_X_NO_MD:%.*]] = call contract float @llvm.sqrt.f32(float [[X]])
|
|
; DAZ-NEXT: [[NO_MD:%.*]] = fdiv contract float 1.000000e+00, [[SQRT_X_NO_MD]]
|
|
; DAZ-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[MD_1ULP:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[X]])
|
|
; DAZ-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[SQRT_MD_1ULP_MULTI_USE:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !2
|
|
; DAZ-NEXT: store volatile float [[SQRT_MD_1ULP_MULTI_USE]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[MD_1ULP_MULTI_USE:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[SQRT_MD_1ULP_MULTI_USE]])
|
|
; DAZ-NEXT: store volatile float [[MD_1ULP_MULTI_USE]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[MD_25ULP:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[X]])
|
|
; DAZ-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[SQRT_MD_HALF_ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !1
|
|
; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = fdiv contract float 1.000000e+00, [[SQRT_MD_HALF_ULP]], !fpmath !1
|
|
; DAZ-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[SQRT_X_AFN_NO_MD:%.*]] = call contract afn float @llvm.sqrt.f32(float [[X]])
|
|
; DAZ-NEXT: [[AFN_NO_MD:%.*]] = fdiv contract afn float 1.000000e+00, [[SQRT_X_AFN_NO_MD]]
|
|
; DAZ-NEXT: store volatile float [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[SQRT_X_AFN_25ULP:%.*]] = call contract afn float @llvm.sqrt.f32(float [[X]]), !fpmath !0
|
|
; DAZ-NEXT: [[AFN_25ULP:%.*]] = fdiv contract afn float 1.000000e+00, [[SQRT_X_AFN_25ULP]], !fpmath !0
|
|
; DAZ-NEXT: store volatile float [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[SQRT_X_FAST_NO_MD:%.*]] = call fast float @llvm.sqrt.f32(float [[X]])
|
|
; DAZ-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast float 1.000000e+00, [[SQRT_X_FAST_NO_MD]]
|
|
; DAZ-NEXT: store volatile float [[FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[SQRT_X_FAST_25ULP:%.*]] = call fast float @llvm.sqrt.f32(float [[X]]), !fpmath !0
|
|
; DAZ-NEXT: [[FAST_25ULP:%.*]] = fdiv fast float 1.000000e+00, [[SQRT_X_FAST_25ULP]], !fpmath !0
|
|
; DAZ-NEXT: store volatile float [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[FDIV_OPENCL:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[X]])
|
|
; DAZ-NEXT: store volatile float [[FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[TMP1:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[X]])
|
|
; DAZ-NEXT: [[NEG_FDIV_OPENCL:%.*]] = fneg contract float [[TMP1]]
|
|
; DAZ-NEXT: store volatile float [[NEG_FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[SQRT_X_HALF_ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !1
|
|
; DAZ-NEXT: [[FDIV_SQRT_MISMATCH_MD0:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[SQRT_X_HALF_ULP]])
|
|
; DAZ-NEXT: store volatile float [[FDIV_SQRT_MISMATCH_MD0]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[SQRT_MISMATCH_MD1:%.*]] = call afn float @llvm.sqrt.f32(float [[X]])
|
|
; DAZ-NEXT: [[FDIV_SQRT_MISMATCH_MD1:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[SQRT_MISMATCH_MD1]])
|
|
; DAZ-NEXT: store volatile float [[FDIV_SQRT_MISMATCH_MD1]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[SQRT_MISMATCH_MD2:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !3
|
|
; DAZ-NEXT: [[FDIV_SQRT_MISMATCH_MD2:%.*]] = fdiv contract afn float 1.000000e+00, [[SQRT_MISMATCH_MD2]]
|
|
; DAZ-NEXT: store volatile float [[FDIV_SQRT_MISMATCH_MD2]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: ret void
|
|
;
|
|
%sqrt.x.no.md = call contract float @llvm.sqrt.f32(float %x)
|
|
%no.md = fdiv contract float 1.000000e+00, %sqrt.x.no.md
|
|
store volatile float %no.md, ptr addrspace(1) %out, align 4
|
|
|
|
; Matches the rsq instruction accuracy
|
|
%sqrt.md.1ulp = call contract float @llvm.sqrt.f32(float %x), !fpmath !2
|
|
%md.1ulp = fdiv contract float 1.000000e+00, %sqrt.md.1ulp, !fpmath !2
|
|
store volatile float %md.1ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%sqrt.md.1ulp.multi.use = call contract float @llvm.sqrt.f32(float %x), !fpmath !2
|
|
store volatile float %sqrt.md.1ulp.multi.use, ptr addrspace(1) %out, align 4
|
|
%md.1ulp.multi.use = fdiv contract float 1.000000e+00, %sqrt.md.1ulp.multi.use, !fpmath !2
|
|
store volatile float %md.1ulp.multi.use, ptr addrspace(1) %out, align 4
|
|
|
|
%sqrt.md.25ulp = call contract float @llvm.sqrt.f32(float %x), !fpmath !0
|
|
%md.25ulp = fdiv contract float 1.000000e+00, %sqrt.md.25ulp, !fpmath !0
|
|
store volatile float %md.25ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%sqrt.md.half.ulp = call contract float @llvm.sqrt.f32(float %x), !fpmath !1
|
|
%md.half.ulp = fdiv contract float 1.000000e+00, %sqrt.md.half.ulp, !fpmath !1
|
|
store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%sqrt.x.afn.no.md = call contract afn float @llvm.sqrt.f32(float %x)
|
|
%afn.no.md = fdiv contract afn float 1.000000e+00, %sqrt.x.afn.no.md
|
|
store volatile float %afn.no.md, ptr addrspace(1) %out, align 4
|
|
|
|
%sqrt.x.afn.25ulp = call contract afn float @llvm.sqrt.f32(float %x), !fpmath !0
|
|
%afn.25ulp = fdiv contract afn float 1.000000e+00, %sqrt.x.afn.25ulp, !fpmath !0
|
|
store volatile float %afn.25ulp, ptr addrspace(1) %out, align 4
|
|
|
|
%sqrt.x.fast.no.md = call fast float @llvm.sqrt.f32(float %x)
|
|
%fast.no.md = fdiv fast float 1.000000e+00, %sqrt.x.fast.no.md
|
|
store volatile float %fast.no.md, ptr addrspace(1) %out, align 4
|
|
|
|
%sqrt.x.fast.25ulp = call fast float @llvm.sqrt.f32(float %x), !fpmath !0
|
|
%fast.25ulp = fdiv fast float 1.000000e+00, %sqrt.x.fast.25ulp, !fpmath !0
|
|
store volatile float %fast.25ulp, ptr addrspace(1) %out, align 4
|
|
|
|
|
|
; Test mismatched metadata/flags between the sqrt and fdiv
|
|
|
|
; Test the expected opencl default pattern
|
|
%sqrt.x.3ulp = call contract float @llvm.sqrt.f32(float %x), !fpmath !3 ; OpenCL default requires 3 for sqrt and 2.5 for fdiv
|
|
%fdiv.opencl = fdiv contract float 1.0, %sqrt.x.3ulp, !fpmath !0
|
|
store volatile float %fdiv.opencl, ptr addrspace(1) %out, align 4
|
|
|
|
%neg.sqrt.x.3ulp = call contract float @llvm.sqrt.f32(float %x), !fpmath !3 ; OpenCL default requires 3 for sqrt and 2.5 for fdiv
|
|
%neg.fdiv.opencl = fdiv contract float -1.0, %neg.sqrt.x.3ulp, !fpmath !0
|
|
store volatile float %neg.fdiv.opencl, ptr addrspace(1) %out, align 4
|
|
|
|
; sqrt demands higher precision than fdiv
|
|
%sqrt.x.half.ulp = call contract float @llvm.sqrt.f32(float %x), !fpmath !1
|
|
%fdiv.sqrt.mismatch.md0 = fdiv contract float 1.0, %sqrt.x.half.ulp, !fpmath !0
|
|
store volatile float %fdiv.sqrt.mismatch.md0, ptr addrspace(1) %out, align 4
|
|
|
|
; sqrt demands full precision but has afn
|
|
%sqrt.mismatch.md1 = call afn float @llvm.sqrt.f32(float %x)
|
|
%fdiv.sqrt.mismatch.md1 = fdiv contract float 1.0, %sqrt.mismatch.md1, !fpmath !0
|
|
store volatile float %fdiv.sqrt.mismatch.md1, ptr addrspace(1) %out, align 4
|
|
|
|
; sqrt has relaxed precision fdiv has afn only
|
|
%sqrt.mismatch.md2 = call contract float @llvm.sqrt.f32(float %x), !fpmath !3
|
|
%fdiv.sqrt.mismatch.md2 = fdiv contract afn float 1.0, %sqrt.mismatch.md2
|
|
store volatile float %fdiv.sqrt.mismatch.md2, ptr addrspace(1) %out, align 4
|
|
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @rsq_f32_fpmath_flags(ptr addrspace(1) %out, float %x) {
|
|
; IEEE-LABEL: define amdgpu_kernel void @rsq_f32_fpmath_flags
|
|
; IEEE-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
|
|
; IEEE-NEXT: [[TMP1:%.*]] = fcmp nnan ninf contract olt float [[X]], 0x3810000000000000
|
|
; IEEE-NEXT: [[TMP2:%.*]] = select nnan ninf contract i1 [[TMP1]], float 0x4170000000000000, float 1.000000e+00
|
|
; IEEE-NEXT: [[TMP3:%.*]] = fmul nnan ninf contract float [[X]], [[TMP2]]
|
|
; IEEE-NEXT: [[TMP4:%.*]] = call nnan ninf contract float @llvm.amdgcn.rsq.f32(float [[TMP3]])
|
|
; IEEE-NEXT: [[TMP5:%.*]] = select nnan ninf contract i1 [[TMP1]], float 4.096000e+03, float 1.000000e+00
|
|
; IEEE-NEXT: [[FDIV_OPENCL_NINF_NNAN:%.*]] = fmul nnan ninf contract float [[TMP4]], [[TMP5]]
|
|
; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-NEXT: [[TMP6:%.*]] = fcmp ninf contract olt float [[X]], 0x3810000000000000
|
|
; IEEE-NEXT: [[TMP7:%.*]] = select ninf contract i1 [[TMP6]], float 0x4170000000000000, float 1.000000e+00
|
|
; IEEE-NEXT: [[TMP8:%.*]] = fmul ninf contract float [[X]], [[TMP7]]
|
|
; IEEE-NEXT: [[TMP9:%.*]] = call ninf contract float @llvm.amdgcn.rsq.f32(float [[TMP8]])
|
|
; IEEE-NEXT: [[TMP10:%.*]] = select ninf contract i1 [[TMP6]], float 4.096000e+03, float 1.000000e+00
|
|
; IEEE-NEXT: [[FDIV_OPENCL_NINF:%.*]] = fmul ninf contract float [[TMP9]], [[TMP10]]
|
|
; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NINF]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-NEXT: [[TMP11:%.*]] = fcmp nnan contract olt float [[X]], 0x3810000000000000
|
|
; IEEE-NEXT: [[TMP12:%.*]] = select nnan contract i1 [[TMP11]], float 0x4170000000000000, float 1.000000e+00
|
|
; IEEE-NEXT: [[TMP13:%.*]] = fmul nnan contract float [[X]], [[TMP12]]
|
|
; IEEE-NEXT: [[TMP14:%.*]] = call nnan contract float @llvm.amdgcn.rsq.f32(float [[TMP13]])
|
|
; IEEE-NEXT: [[TMP15:%.*]] = select nnan contract i1 [[TMP11]], float 4.096000e+03, float 1.000000e+00
|
|
; IEEE-NEXT: [[FDIV_OPENCL_NNAN:%.*]] = fmul nnan contract float [[TMP14]], [[TMP15]]
|
|
; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NNAN]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-NEXT: [[TMP16:%.*]] = fcmp nsz contract olt float [[X]], 0x3810000000000000
|
|
; IEEE-NEXT: [[TMP17:%.*]] = select nsz contract i1 [[TMP16]], float 0x4170000000000000, float 1.000000e+00
|
|
; IEEE-NEXT: [[TMP18:%.*]] = fmul nsz contract float [[X]], [[TMP17]]
|
|
; IEEE-NEXT: [[TMP19:%.*]] = call nsz contract float @llvm.amdgcn.rsq.f32(float [[TMP18]])
|
|
; IEEE-NEXT: [[TMP20:%.*]] = select nsz contract i1 [[TMP16]], float 4.096000e+03, float 1.000000e+00
|
|
; IEEE-NEXT: [[FDIV_OPENCL_NSZ:%.*]] = fmul nsz contract float [[TMP19]], [[TMP20]]
|
|
; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NSZ]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-NEXT: [[TMP21:%.*]] = fcmp nnan ninf contract olt float [[X]], 0x3810000000000000
|
|
; IEEE-NEXT: [[TMP22:%.*]] = select nnan ninf contract i1 [[TMP21]], float 0x4170000000000000, float 1.000000e+00
|
|
; IEEE-NEXT: [[TMP23:%.*]] = fmul nnan ninf contract float [[X]], [[TMP22]]
|
|
; IEEE-NEXT: [[TMP24:%.*]] = call nnan ninf contract float @llvm.amdgcn.rsq.f32(float [[TMP23]])
|
|
; IEEE-NEXT: [[TMP25:%.*]] = select nnan ninf contract i1 [[TMP21]], float 4.096000e+03, float 1.000000e+00
|
|
; IEEE-NEXT: [[FDIV_OPENCL_NNAN_MIX0:%.*]] = fmul nnan ninf contract float [[TMP24]], [[TMP25]]
|
|
; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NNAN_MIX0]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-NEXT: [[TMP26:%.*]] = fcmp nnan ninf contract olt float [[X]], 0x3810000000000000
|
|
; IEEE-NEXT: [[TMP27:%.*]] = select nnan ninf contract i1 [[TMP26]], float 0x4170000000000000, float 1.000000e+00
|
|
; IEEE-NEXT: [[TMP28:%.*]] = fmul nnan ninf contract float [[X]], [[TMP27]]
|
|
; IEEE-NEXT: [[TMP29:%.*]] = call nnan ninf contract float @llvm.amdgcn.rsq.f32(float [[TMP28]])
|
|
; IEEE-NEXT: [[TMP30:%.*]] = select nnan ninf contract i1 [[TMP26]], float 4.096000e+03, float 1.000000e+00
|
|
; IEEE-NEXT: [[FDIV_OPENCL_NNAN_MIX1:%.*]] = fmul nnan ninf contract float [[TMP29]], [[TMP30]]
|
|
; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NNAN_MIX1]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-NEXT: ret void
|
|
;
|
|
; DAZ-LABEL: define amdgpu_kernel void @rsq_f32_fpmath_flags
|
|
; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
|
|
; DAZ-NEXT: [[FDIV_OPENCL_NINF_NNAN:%.*]] = call nnan ninf contract float @llvm.amdgcn.rsq.f32(float [[X]])
|
|
; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[FDIV_OPENCL_NINF:%.*]] = call ninf contract float @llvm.amdgcn.rsq.f32(float [[X]])
|
|
; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NINF]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[FDIV_OPENCL_NNAN:%.*]] = call nnan contract float @llvm.amdgcn.rsq.f32(float [[X]])
|
|
; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NNAN]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[FDIV_OPENCL_NSZ:%.*]] = call nsz contract float @llvm.amdgcn.rsq.f32(float [[X]])
|
|
; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NSZ]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[FDIV_OPENCL_NNAN_MIX0:%.*]] = call nnan ninf contract float @llvm.amdgcn.rsq.f32(float [[X]])
|
|
; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NNAN_MIX0]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[FDIV_OPENCL_NNAN_MIX1:%.*]] = call nnan ninf contract float @llvm.amdgcn.rsq.f32(float [[X]])
|
|
; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NNAN_MIX1]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: ret void
|
|
;
|
|
%sqrt.x.3ulp.ninf.nnan = call contract ninf nnan float @llvm.sqrt.f32(float %x), !fpmath !3 ; OpenCL default requires 3 for sqrt and 2.5 for fdiv
|
|
%fdiv.opencl.ninf.nnan = fdiv contract ninf nnan float 1.0, %sqrt.x.3ulp.ninf.nnan, !fpmath !0
|
|
store volatile float %fdiv.opencl.ninf.nnan, ptr addrspace(1) %out, align 4
|
|
|
|
%sqrt.x.3ulp.ninf = call contract ninf float @llvm.sqrt.f32(float %x), !fpmath !3 ; OpenCL default requires 3 for sqrt and 2.5 for fdiv
|
|
%fdiv.opencl.ninf = fdiv contract ninf float 1.0, %sqrt.x.3ulp.ninf, !fpmath !0
|
|
store volatile float %fdiv.opencl.ninf, ptr addrspace(1) %out, align 4
|
|
|
|
%sqrt.x.3ulp.nnan = call contract nnan float @llvm.sqrt.f32(float %x), !fpmath !3 ; OpenCL default requires 3 for sqrt and 2.5 for fdiv
|
|
%fdiv.opencl.nnan = fdiv contract nnan float 1.0, %sqrt.x.3ulp.nnan, !fpmath !0
|
|
store volatile float %fdiv.opencl.nnan, ptr addrspace(1) %out, align 4
|
|
|
|
%sqrt.x.3ulp.nsz = call contract nsz float @llvm.sqrt.f32(float %x), !fpmath !3 ; OpenCL default requires 3 for sqrt and 2.5 for fdiv
|
|
%fdiv.opencl.nsz = fdiv contract nsz float 1.0, %sqrt.x.3ulp.nsz, !fpmath !0
|
|
store volatile float %fdiv.opencl.nsz, ptr addrspace(1) %out, align 4
|
|
|
|
%sqrt.x.3ulp.ninf.mix0 = call contract ninf float @llvm.sqrt.f32(float %x), !fpmath !3
|
|
%fdiv.opencl.nnan.mix0 = fdiv contract nnan float 1.0, %sqrt.x.3ulp.ninf.mix0, !fpmath !0
|
|
store volatile float %fdiv.opencl.nnan.mix0, ptr addrspace(1) %out, align 4
|
|
|
|
%sqrt.x.3ulp.ninf.mix1 = call contract ninf float @llvm.sqrt.f32(float %x), !fpmath !3
|
|
%fdiv.opencl.nnan.mix1 = fdiv contract nnan float 1.0, %sqrt.x.3ulp.ninf.mix1, !fpmath !0
|
|
store volatile float %fdiv.opencl.nnan.mix1, ptr addrspace(1) %out, align 4
|
|
|
|
ret void
|
|
}
|
|
|
|
define float @rsq_f32_missing_contract0(float %x) {
|
|
; IEEE-GOODFREXP-LABEL: define float @rsq_f32_missing_contract0
|
|
; IEEE-GOODFREXP-SAME: (float [[X:%.*]]) #[[ATTR1]] {
|
|
; IEEE-GOODFREXP-NEXT: [[SQRT_X_3ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !2
|
|
; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_X_3ULP]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-GOODFREXP-NEXT: [[FDIV_OPENCL:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
|
|
; IEEE-GOODFREXP-NEXT: ret float [[FDIV_OPENCL]]
|
|
;
|
|
; IEEE-BADFREXP-LABEL: define float @rsq_f32_missing_contract0
|
|
; IEEE-BADFREXP-SAME: (float [[X:%.*]]) #[[ATTR1]] {
|
|
; IEEE-BADFREXP-NEXT: [[SQRT_X_3ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !2
|
|
; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_X_3ULP]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[SQRT_X_3ULP]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-BADFREXP-NEXT: [[FDIV_OPENCL:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
|
|
; IEEE-BADFREXP-NEXT: ret float [[FDIV_OPENCL]]
|
|
;
|
|
; DAZ-LABEL: define float @rsq_f32_missing_contract0
|
|
; DAZ-SAME: (float [[X:%.*]]) #[[ATTR1]] {
|
|
; DAZ-NEXT: [[SQRT_X_3ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !2
|
|
; DAZ-NEXT: [[FDIV_OPENCL:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP]])
|
|
; DAZ-NEXT: ret float [[FDIV_OPENCL]]
|
|
;
|
|
%sqrt.x.3ulp = call float @llvm.sqrt.f32(float %x), !fpmath !2
|
|
%fdiv.opencl = fdiv contract float 1.0, %sqrt.x.3ulp, !fpmath !2
|
|
ret float %fdiv.opencl
|
|
}
|
|
|
|
define float @rsq_f32_missing_contract1(float %x) {
|
|
; IEEE-GOODFREXP-LABEL: define float @rsq_f32_missing_contract1
|
|
; IEEE-GOODFREXP-SAME: (float [[X:%.*]]) #[[ATTR1]] {
|
|
; IEEE-GOODFREXP-NEXT: [[SQRT_X_3ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !2
|
|
; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_X_3ULP]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-GOODFREXP-NEXT: [[FDIV_OPENCL:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
|
|
; IEEE-GOODFREXP-NEXT: ret float [[FDIV_OPENCL]]
|
|
;
|
|
; IEEE-BADFREXP-LABEL: define float @rsq_f32_missing_contract1
|
|
; IEEE-BADFREXP-SAME: (float [[X:%.*]]) #[[ATTR1]] {
|
|
; IEEE-BADFREXP-NEXT: [[SQRT_X_3ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !2
|
|
; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_X_3ULP]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[SQRT_X_3ULP]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-BADFREXP-NEXT: [[FDIV_OPENCL:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
|
|
; IEEE-BADFREXP-NEXT: ret float [[FDIV_OPENCL]]
|
|
;
|
|
; DAZ-LABEL: define float @rsq_f32_missing_contract1
|
|
; DAZ-SAME: (float [[X:%.*]]) #[[ATTR1]] {
|
|
; DAZ-NEXT: [[SQRT_X_3ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !2
|
|
; DAZ-NEXT: [[FDIV_OPENCL:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP]])
|
|
; DAZ-NEXT: ret float [[FDIV_OPENCL]]
|
|
;
|
|
%sqrt.x.3ulp = call contract float @llvm.sqrt.f32(float %x), !fpmath !2
|
|
%fdiv.opencl = fdiv float 1.0, %sqrt.x.3ulp, !fpmath !2
|
|
ret float %fdiv.opencl
|
|
}
|
|
|
|
define float @rsq_f32_flag_merge(float %x) {
|
|
; IEEE-LABEL: define float @rsq_f32_flag_merge
|
|
; IEEE-SAME: (float [[X:%.*]]) #[[ATTR1]] {
|
|
; IEEE-NEXT: [[TMP1:%.*]] = fcmp ninf nsz contract olt float [[X]], 0x3810000000000000
|
|
; IEEE-NEXT: [[TMP2:%.*]] = select ninf nsz contract i1 [[TMP1]], float 0x4170000000000000, float 1.000000e+00
|
|
; IEEE-NEXT: [[TMP3:%.*]] = fmul ninf nsz contract float [[X]], [[TMP2]]
|
|
; IEEE-NEXT: [[TMP4:%.*]] = call ninf nsz contract float @llvm.amdgcn.rsq.f32(float [[TMP3]])
|
|
; IEEE-NEXT: [[TMP5:%.*]] = select ninf nsz contract i1 [[TMP1]], float 4.096000e+03, float 1.000000e+00
|
|
; IEEE-NEXT: [[FDIV_OPENCL:%.*]] = fmul ninf nsz contract float [[TMP4]], [[TMP5]]
|
|
; IEEE-NEXT: ret float [[FDIV_OPENCL]]
|
|
;
|
|
; DAZ-LABEL: define float @rsq_f32_flag_merge
|
|
; DAZ-SAME: (float [[X:%.*]]) #[[ATTR1]] {
|
|
; DAZ-NEXT: [[FDIV_OPENCL:%.*]] = call ninf nsz contract float @llvm.amdgcn.rsq.f32(float [[X]])
|
|
; DAZ-NEXT: ret float [[FDIV_OPENCL]]
|
|
;
|
|
%sqrt.x.3ulp = call contract ninf float @llvm.sqrt.f32(float %x), !fpmath !2
|
|
%fdiv.opencl = fdiv contract nsz float 1.0, %sqrt.x.3ulp, !fpmath !2
|
|
ret float %fdiv.opencl
|
|
}
|
|
|
|
define amdgpu_kernel void @rsq_f32_knownfinite(ptr addrspace(1) %out, float nofpclass(nan) %no.nan,
|
|
; IEEE-LABEL: define amdgpu_kernel void @rsq_f32_knownfinite
|
|
; IEEE-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(nan) [[NO_NAN:%.*]], float nofpclass(nan) [[NO_INF:%.*]], float nofpclass(nan inf) [[NO_INF_NAN:%.*]]) #[[ATTR1]] {
|
|
; IEEE-NEXT: [[TMP1:%.*]] = fcmp contract olt float [[NO_NAN]], 0x3810000000000000
|
|
; IEEE-NEXT: [[TMP2:%.*]] = select contract i1 [[TMP1]], float 0x4170000000000000, float 1.000000e+00
|
|
; IEEE-NEXT: [[TMP3:%.*]] = fmul contract float [[NO_NAN]], [[TMP2]]
|
|
; IEEE-NEXT: [[TMP4:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP3]])
|
|
; IEEE-NEXT: [[TMP5:%.*]] = select contract i1 [[TMP1]], float 4.096000e+03, float 1.000000e+00
|
|
; IEEE-NEXT: [[FDIV_OPENCL_NO_NAN:%.*]] = fmul contract float [[TMP4]], [[TMP5]]
|
|
; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NO_NAN]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-NEXT: [[TMP6:%.*]] = fcmp contract olt float [[NO_INF]], 0x3810000000000000
|
|
; IEEE-NEXT: [[TMP7:%.*]] = select contract i1 [[TMP6]], float 0x4170000000000000, float 1.000000e+00
|
|
; IEEE-NEXT: [[TMP8:%.*]] = fmul contract float [[NO_INF]], [[TMP7]]
|
|
; IEEE-NEXT: [[TMP9:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP8]])
|
|
; IEEE-NEXT: [[TMP10:%.*]] = select contract i1 [[TMP6]], float 4.096000e+03, float 1.000000e+00
|
|
; IEEE-NEXT: [[FDIV_OPENCL_NO_INF:%.*]] = fmul contract float [[TMP9]], [[TMP10]]
|
|
; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NO_INF]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-NEXT: [[TMP11:%.*]] = fcmp contract olt float [[NO_INF_NAN]], 0x3810000000000000
|
|
; IEEE-NEXT: [[TMP12:%.*]] = select contract i1 [[TMP11]], float 0x4170000000000000, float 1.000000e+00
|
|
; IEEE-NEXT: [[TMP13:%.*]] = fmul contract float [[NO_INF_NAN]], [[TMP12]]
|
|
; IEEE-NEXT: [[TMP14:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP13]])
|
|
; IEEE-NEXT: [[TMP15:%.*]] = select contract i1 [[TMP11]], float 4.096000e+03, float 1.000000e+00
|
|
; IEEE-NEXT: [[FDIV_OPENCL_NO_INF_NAN:%.*]] = fmul contract float [[TMP14]], [[TMP15]]
|
|
; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NO_INF_NAN]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-NEXT: ret void
|
|
;
|
|
; DAZ-LABEL: define amdgpu_kernel void @rsq_f32_knownfinite
|
|
; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(nan) [[NO_NAN:%.*]], float nofpclass(nan) [[NO_INF:%.*]], float nofpclass(nan inf) [[NO_INF_NAN:%.*]]) #[[ATTR1]] {
|
|
; DAZ-NEXT: [[FDIV_OPENCL_NO_NAN:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[NO_NAN]])
|
|
; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NO_NAN]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[FDIV_OPENCL_NO_INF:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[NO_INF]])
|
|
; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NO_INF]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[FDIV_OPENCL_NO_INF_NAN:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[NO_INF_NAN]])
|
|
; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NO_INF_NAN]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: ret void
|
|
;
|
|
float nofpclass(nan) %no.inf,
|
|
float nofpclass(inf nan) %no.inf.nan) {
|
|
%sqrt.x.3ulp.no.nan = call contract float @llvm.sqrt.f32(float %no.nan), !fpmath !3
|
|
%fdiv.opencl.no.nan = fdiv contract float 1.0, %sqrt.x.3ulp.no.nan, !fpmath !0
|
|
store volatile float %fdiv.opencl.no.nan, ptr addrspace(1) %out, align 4
|
|
|
|
%sqrt.x.3ulp.no.inf = call contract float @llvm.sqrt.f32(float %no.inf), !fpmath !3
|
|
%fdiv.opencl.no.inf = fdiv contract float 1.0, %sqrt.x.3ulp.no.inf, !fpmath !0
|
|
store volatile float %fdiv.opencl.no.inf, ptr addrspace(1) %out, align 4
|
|
|
|
%sqrt.x.3ulp.no.inf.nan = call contract float @llvm.sqrt.f32(float %no.inf.nan), !fpmath !3
|
|
%fdiv.opencl.no.inf.nan = fdiv contract float 1.0, %sqrt.x.3ulp.no.inf.nan, !fpmath !0
|
|
store volatile float %fdiv.opencl.no.inf.nan, ptr addrspace(1) %out, align 4
|
|
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @rsq_f32_known_nozero(ptr addrspace(1) %out, float nofpclass(zero) %no.zero, float nofpclass(zero sub) %no.zero.sub) {
|
|
; IEEE-LABEL: define amdgpu_kernel void @rsq_f32_known_nozero
|
|
; IEEE-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(zero) [[NO_ZERO:%.*]], float nofpclass(zero sub) [[NO_ZERO_SUB:%.*]]) #[[ATTR1]] {
|
|
; IEEE-NEXT: [[TMP1:%.*]] = fcmp contract olt float [[NO_ZERO]], 0x3810000000000000
|
|
; IEEE-NEXT: [[TMP2:%.*]] = select contract i1 [[TMP1]], float 0x4170000000000000, float 1.000000e+00
|
|
; IEEE-NEXT: [[TMP3:%.*]] = fmul contract float [[NO_ZERO]], [[TMP2]]
|
|
; IEEE-NEXT: [[TMP4:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP3]])
|
|
; IEEE-NEXT: [[TMP5:%.*]] = select contract i1 [[TMP1]], float 4.096000e+03, float 1.000000e+00
|
|
; IEEE-NEXT: [[FDIV_OPENCL_NO_ZERO:%.*]] = fmul contract float [[TMP4]], [[TMP5]]
|
|
; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NO_ZERO]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-NEXT: [[FDIV_OPENCL_NO_ZERO_SUB:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[NO_ZERO_SUB]])
|
|
; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NO_ZERO_SUB]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-NEXT: ret void
|
|
;
|
|
; DAZ-LABEL: define amdgpu_kernel void @rsq_f32_known_nozero
|
|
; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(zero) [[NO_ZERO:%.*]], float nofpclass(zero sub) [[NO_ZERO_SUB:%.*]]) #[[ATTR1]] {
|
|
; DAZ-NEXT: [[FDIV_OPENCL_NO_ZERO:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[NO_ZERO]])
|
|
; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NO_ZERO]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[FDIV_OPENCL_NO_ZERO_SUB:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[NO_ZERO_SUB]])
|
|
; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NO_ZERO_SUB]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: ret void
|
|
;
|
|
%sqrt.x.3ulp.no.zero = call contract float @llvm.sqrt.f32(float %no.zero), !fpmath !3
|
|
%fdiv.opencl.no.zero = fdiv contract float 1.0, %sqrt.x.3ulp.no.zero, !fpmath !0
|
|
store volatile float %fdiv.opencl.no.zero, ptr addrspace(1) %out, align 4
|
|
|
|
%sqrt.x.3ulp.no.zero.sub = call contract float @llvm.sqrt.f32(float %no.zero.sub), !fpmath !3
|
|
%fdiv.opencl.no.zero.sub = fdiv contract float 1.0, %sqrt.x.3ulp.no.zero.sub, !fpmath !0
|
|
store volatile float %fdiv.opencl.no.zero.sub, ptr addrspace(1) %out, align 4
|
|
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @rsq_f32_known_nosub(ptr addrspace(1) %out, float nofpclass(sub) %no.sub, float nofpclass(psub) %no.psub, float nofpclass(nsub) %no.nsub) {
|
|
; IEEE-LABEL: define amdgpu_kernel void @rsq_f32_known_nosub
|
|
; IEEE-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(sub) [[NO_SUB:%.*]], float nofpclass(psub) [[NO_PSUB:%.*]], float nofpclass(nsub) [[NO_NSUB:%.*]]) #[[ATTR1]] {
|
|
; IEEE-NEXT: [[FDIV_OPENCL_NO_SUB:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[NO_SUB]])
|
|
; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NO_SUB]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-NEXT: [[TMP1:%.*]] = fcmp contract olt float [[NO_PSUB]], 0x3810000000000000
|
|
; IEEE-NEXT: [[TMP2:%.*]] = select contract i1 [[TMP1]], float 0x4170000000000000, float 1.000000e+00
|
|
; IEEE-NEXT: [[TMP3:%.*]] = fmul contract float [[NO_PSUB]], [[TMP2]]
|
|
; IEEE-NEXT: [[TMP4:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP3]])
|
|
; IEEE-NEXT: [[TMP5:%.*]] = select contract i1 [[TMP1]], float 4.096000e+03, float 1.000000e+00
|
|
; IEEE-NEXT: [[FDIV_OPENCL_NO_PSUB:%.*]] = fmul contract float [[TMP4]], [[TMP5]]
|
|
; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NO_PSUB]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-NEXT: [[TMP6:%.*]] = fcmp contract olt float [[NO_NSUB]], 0x3810000000000000
|
|
; IEEE-NEXT: [[TMP7:%.*]] = select contract i1 [[TMP6]], float 0x4170000000000000, float 1.000000e+00
|
|
; IEEE-NEXT: [[TMP8:%.*]] = fmul contract float [[NO_NSUB]], [[TMP7]]
|
|
; IEEE-NEXT: [[TMP9:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP8]])
|
|
; IEEE-NEXT: [[TMP10:%.*]] = select contract i1 [[TMP6]], float 4.096000e+03, float 1.000000e+00
|
|
; IEEE-NEXT: [[FDIV_OPENCL_NO_NSUB:%.*]] = fmul contract float [[TMP9]], [[TMP10]]
|
|
; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NO_NSUB]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-NEXT: ret void
|
|
;
|
|
; DAZ-LABEL: define amdgpu_kernel void @rsq_f32_known_nosub
|
|
; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(sub) [[NO_SUB:%.*]], float nofpclass(psub) [[NO_PSUB:%.*]], float nofpclass(nsub) [[NO_NSUB:%.*]]) #[[ATTR1]] {
|
|
; DAZ-NEXT: [[FDIV_OPENCL_NO_SUB:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[NO_SUB]])
|
|
; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NO_SUB]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[FDIV_OPENCL_NO_PSUB:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[NO_PSUB]])
|
|
; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NO_PSUB]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[FDIV_OPENCL_NO_NSUB:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[NO_NSUB]])
|
|
; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NO_NSUB]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: ret void
|
|
;
|
|
%sqrt.x.3ulp.no.sub = call contract float @llvm.sqrt.f32(float %no.sub), !fpmath !3
|
|
%fdiv.opencl.no.sub = fdiv contract float 1.0, %sqrt.x.3ulp.no.sub, !fpmath !0
|
|
store volatile float %fdiv.opencl.no.sub, ptr addrspace(1) %out, align 4
|
|
|
|
%sqrt.x.3ulp.no.psub = call contract float @llvm.sqrt.f32(float %no.psub), !fpmath !3
|
|
%fdiv.opencl.no.psub = fdiv contract float 1.0, %sqrt.x.3ulp.no.psub, !fpmath !0
|
|
store volatile float %fdiv.opencl.no.psub, ptr addrspace(1) %out, align 4
|
|
|
|
%sqrt.x.3ulp.no.nsub = call contract float @llvm.sqrt.f32(float %no.nsub), !fpmath !3
|
|
%fdiv.opencl.no.nsub = fdiv contract float 1.0, %sqrt.x.3ulp.no.nsub, !fpmath !0
|
|
store volatile float %fdiv.opencl.no.nsub, ptr addrspace(1) %out, align 4
|
|
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @rsq_f32_assume_nosub(ptr addrspace(1) %out, float %x) {
|
|
; CHECK-LABEL: define amdgpu_kernel void @rsq_f32_assume_nosub
|
|
; CHECK-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]])
|
|
; CHECK-NEXT: [[IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_X]], 0x3810000000000000
|
|
; CHECK-NEXT: call void @llvm.assume(i1 [[IS_NOT_SUBNORMAL]])
|
|
; CHECK-NEXT: [[FDIV_OPENCL_NO_SUB:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[X]])
|
|
; CHECK-NEXT: store volatile float [[FDIV_OPENCL_NO_SUB]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%fabs.x = call float @llvm.fabs.f32(float %x)
|
|
%is.not.subnormal = fcmp oge float %fabs.x, 0x3810000000000000
|
|
call void @llvm.assume(i1 %is.not.subnormal)
|
|
%sqrt.x.3ulp.no.sub = call contract float @llvm.sqrt.f32(float %x), !fpmath !3
|
|
%fdiv.opencl.no.sub = fdiv contract float 1.0, %sqrt.x.3ulp.no.sub, !fpmath !0
|
|
store volatile float %fdiv.opencl.no.sub, ptr addrspace(1) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @rsq_f32_vector_fpmath(ptr addrspace(1) %out, <2 x float> %x) {
|
|
; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rsq_f32_vector_fpmath
|
|
; IEEE-GOODFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] {
|
|
; IEEE-GOODFREXP-NEXT: [[SQRT_X_NO_MD:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]])
|
|
; IEEE-GOODFREXP-NEXT: [[NO_MD:%.*]] = fdiv contract <2 x float> <float 1.000000e+00, float 1.000000e+00>, [[SQRT_X_NO_MD]]
|
|
; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[SQRT_MD_1ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath !2
|
|
; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[X]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[X]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = fcmp contract olt float [[TMP3]], 0x3810000000000000
|
|
; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = select contract i1 [[TMP5]], float 0x4170000000000000, float 1.000000e+00
|
|
; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = fmul contract float [[TMP3]], [[TMP6]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP7]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = select contract i1 [[TMP5]], float 4.096000e+03, float 1.000000e+00
|
|
; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = fmul contract float [[TMP8]], [[TMP9]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fcmp contract olt float [[TMP4]], 0x3810000000000000
|
|
; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = select contract i1 [[TMP11]], float 0x4170000000000000, float 1.000000e+00
|
|
; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = fmul contract float [[TMP4]], [[TMP12]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP13]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = select contract i1 [[TMP11]], float 4.096000e+03, float 1.000000e+00
|
|
; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = fmul contract float [[TMP14]], [[TMP15]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = insertelement <2 x float> poison, float [[TMP10]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[MD_1ULP:%.*]] = insertelement <2 x float> [[TMP17]], float [[TMP16]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[SQRT_MD_1ULP_UNDEF:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath !2
|
|
; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP_UNDEF]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP_UNDEF]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[X]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = extractelement <2 x float> [[X]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = fcmp contract olt float [[TMP20]], 0x3810000000000000
|
|
; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = select contract i1 [[TMP22]], float 0x4170000000000000, float 1.000000e+00
|
|
; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = fmul contract float [[TMP20]], [[TMP23]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP24]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = select contract i1 [[TMP22]], float 4.096000e+03, float 1.000000e+00
|
|
; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = fmul contract float [[TMP25]], [[TMP26]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP19]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP28]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP29]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float undef)
|
|
; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = extractvalue { float, i32 } [[TMP32]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = extractvalue { float, i32 } [[TMP32]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = fmul contract float [[TMP33]], [[TMP31]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = sub i32 [[TMP34]], [[TMP30]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP36]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = insertelement <2 x float> poison, float [[TMP27]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[MD_1ULP_UNDEF:%.*]] = insertelement <2 x float> [[TMP38]], float [[TMP37]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_UNDEF]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[SQRT_X_3ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath !3
|
|
; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = extractelement <2 x float> [[SQRT_X_3ULP]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP40:%.*]] = extractelement <2 x float> [[SQRT_X_3ULP]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP41:%.*]] = extractelement <2 x float> [[X]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP42:%.*]] = extractelement <2 x float> [[X]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP43:%.*]] = fcmp contract olt float [[TMP41]], 0x3810000000000000
|
|
; IEEE-GOODFREXP-NEXT: [[TMP44:%.*]] = select contract i1 [[TMP43]], float 0x4170000000000000, float 1.000000e+00
|
|
; IEEE-GOODFREXP-NEXT: [[TMP45:%.*]] = fmul contract float [[TMP41]], [[TMP44]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP46:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP45]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP47:%.*]] = select contract i1 [[TMP43]], float 4.096000e+03, float 1.000000e+00
|
|
; IEEE-GOODFREXP-NEXT: [[TMP48:%.*]] = fmul contract float [[TMP46]], [[TMP47]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP49:%.*]] = fcmp contract olt float [[TMP42]], 0x3810000000000000
|
|
; IEEE-GOODFREXP-NEXT: [[TMP50:%.*]] = select contract i1 [[TMP49]], float 0x4170000000000000, float 1.000000e+00
|
|
; IEEE-GOODFREXP-NEXT: [[TMP51:%.*]] = fmul contract float [[TMP42]], [[TMP50]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP52:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP51]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP53:%.*]] = select contract i1 [[TMP49]], float 4.096000e+03, float 1.000000e+00
|
|
; IEEE-GOODFREXP-NEXT: [[TMP54:%.*]] = fmul contract float [[TMP52]], [[TMP53]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP55:%.*]] = insertelement <2 x float> poison, float [[TMP48]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[FDIV_OPENCL:%.*]] = insertelement <2 x float> [[TMP55]], float [[TMP54]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: ret void
|
|
;
|
|
; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rsq_f32_vector_fpmath
|
|
; IEEE-BADFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] {
|
|
; IEEE-BADFREXP-NEXT: [[SQRT_X_NO_MD:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]])
|
|
; IEEE-BADFREXP-NEXT: [[NO_MD:%.*]] = fdiv contract <2 x float> <float 1.000000e+00, float 1.000000e+00>, [[SQRT_X_NO_MD]]
|
|
; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[SQRT_MD_1ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath !2
|
|
; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[X]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[X]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = fcmp contract olt float [[TMP3]], 0x3810000000000000
|
|
; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = select contract i1 [[TMP5]], float 0x4170000000000000, float 1.000000e+00
|
|
; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = fmul contract float [[TMP3]], [[TMP6]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP7]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = select contract i1 [[TMP5]], float 4.096000e+03, float 1.000000e+00
|
|
; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = fmul contract float [[TMP8]], [[TMP9]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fcmp contract olt float [[TMP4]], 0x3810000000000000
|
|
; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = select contract i1 [[TMP11]], float 0x4170000000000000, float 1.000000e+00
|
|
; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = fmul contract float [[TMP4]], [[TMP12]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP13]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = select contract i1 [[TMP11]], float 4.096000e+03, float 1.000000e+00
|
|
; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = fmul contract float [[TMP14]], [[TMP15]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = insertelement <2 x float> poison, float [[TMP10]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[MD_1ULP:%.*]] = insertelement <2 x float> [[TMP17]], float [[TMP16]], i64 1
|
|
; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[SQRT_MD_1ULP_UNDEF:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath !2
|
|
; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP_UNDEF]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP_UNDEF]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[X]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = extractelement <2 x float> [[X]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = fcmp contract olt float [[TMP20]], 0x3810000000000000
|
|
; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = select contract i1 [[TMP22]], float 0x4170000000000000, float 1.000000e+00
|
|
; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = fmul contract float [[TMP20]], [[TMP23]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP24]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = select contract i1 [[TMP22]], float 4.096000e+03, float 1.000000e+00
|
|
; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = fmul contract float [[TMP25]], [[TMP26]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP19]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP19]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP29]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float undef)
|
|
; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = extractvalue { float, i32 } [[TMP32]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float undef)
|
|
; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = fmul contract float [[TMP33]], [[TMP31]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = sub i32 [[TMP34]], [[TMP30]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP36]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = insertelement <2 x float> poison, float [[TMP27]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[MD_1ULP_UNDEF:%.*]] = insertelement <2 x float> [[TMP38]], float [[TMP37]], i64 1
|
|
; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_UNDEF]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[SQRT_X_3ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath !3
|
|
; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = extractelement <2 x float> [[SQRT_X_3ULP]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP40:%.*]] = extractelement <2 x float> [[SQRT_X_3ULP]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP41:%.*]] = extractelement <2 x float> [[X]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP42:%.*]] = extractelement <2 x float> [[X]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP43:%.*]] = fcmp contract olt float [[TMP41]], 0x3810000000000000
|
|
; IEEE-BADFREXP-NEXT: [[TMP44:%.*]] = select contract i1 [[TMP43]], float 0x4170000000000000, float 1.000000e+00
|
|
; IEEE-BADFREXP-NEXT: [[TMP45:%.*]] = fmul contract float [[TMP41]], [[TMP44]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP46:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP45]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP47:%.*]] = select contract i1 [[TMP43]], float 4.096000e+03, float 1.000000e+00
|
|
; IEEE-BADFREXP-NEXT: [[TMP48:%.*]] = fmul contract float [[TMP46]], [[TMP47]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP49:%.*]] = fcmp contract olt float [[TMP42]], 0x3810000000000000
|
|
; IEEE-BADFREXP-NEXT: [[TMP50:%.*]] = select contract i1 [[TMP49]], float 0x4170000000000000, float 1.000000e+00
|
|
; IEEE-BADFREXP-NEXT: [[TMP51:%.*]] = fmul contract float [[TMP42]], [[TMP50]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP52:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP51]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP53:%.*]] = select contract i1 [[TMP49]], float 4.096000e+03, float 1.000000e+00
|
|
; IEEE-BADFREXP-NEXT: [[TMP54:%.*]] = fmul contract float [[TMP52]], [[TMP53]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP55:%.*]] = insertelement <2 x float> poison, float [[TMP48]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[FDIV_OPENCL:%.*]] = insertelement <2 x float> [[TMP55]], float [[TMP54]], i64 1
|
|
; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: ret void
|
|
;
|
|
; DAZ-LABEL: define amdgpu_kernel void @rsq_f32_vector_fpmath
|
|
; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] {
|
|
; DAZ-NEXT: [[SQRT_X_NO_MD:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]])
|
|
; DAZ-NEXT: [[NO_MD:%.*]] = fdiv contract <2 x float> <float 1.000000e+00, float 1.000000e+00>, [[SQRT_X_NO_MD]]
|
|
; DAZ-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[SQRT_MD_1ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath !2
|
|
; DAZ-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 0
|
|
; DAZ-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 1
|
|
; DAZ-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[X]], i64 0
|
|
; DAZ-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[X]], i64 1
|
|
; DAZ-NEXT: [[TMP5:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP3]])
|
|
; DAZ-NEXT: [[TMP6:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP4]])
|
|
; DAZ-NEXT: [[TMP7:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i64 0
|
|
; DAZ-NEXT: [[MD_1ULP:%.*]] = insertelement <2 x float> [[TMP7]], float [[TMP6]], i64 1
|
|
; DAZ-NEXT: store volatile <2 x float> [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[SQRT_MD_1ULP_UNDEF:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath !2
|
|
; DAZ-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP_UNDEF]], i64 0
|
|
; DAZ-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP_UNDEF]], i64 1
|
|
; DAZ-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[X]], i64 0
|
|
; DAZ-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[X]], i64 1
|
|
; DAZ-NEXT: [[TMP12:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP10]])
|
|
; DAZ-NEXT: [[TMP13:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP9]])
|
|
; DAZ-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP13]], 0
|
|
; DAZ-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP13]], 1
|
|
; DAZ-NEXT: [[TMP16:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP14]])
|
|
; DAZ-NEXT: [[TMP17:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float undef)
|
|
; DAZ-NEXT: [[TMP18:%.*]] = extractvalue { float, i32 } [[TMP17]], 0
|
|
; DAZ-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP17]], 1
|
|
; DAZ-NEXT: [[TMP20:%.*]] = fmul contract float [[TMP18]], [[TMP16]]
|
|
; DAZ-NEXT: [[TMP21:%.*]] = sub i32 [[TMP19]], [[TMP15]]
|
|
; DAZ-NEXT: [[TMP22:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP20]], i32 [[TMP21]])
|
|
; DAZ-NEXT: [[TMP23:%.*]] = insertelement <2 x float> poison, float [[TMP12]], i64 0
|
|
; DAZ-NEXT: [[MD_1ULP_UNDEF:%.*]] = insertelement <2 x float> [[TMP23]], float [[TMP22]], i64 1
|
|
; DAZ-NEXT: store volatile <2 x float> [[MD_1ULP_UNDEF]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[SQRT_X_3ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath !3
|
|
; DAZ-NEXT: [[TMP24:%.*]] = extractelement <2 x float> [[SQRT_X_3ULP]], i64 0
|
|
; DAZ-NEXT: [[TMP25:%.*]] = extractelement <2 x float> [[SQRT_X_3ULP]], i64 1
|
|
; DAZ-NEXT: [[TMP26:%.*]] = extractelement <2 x float> [[X]], i64 0
|
|
; DAZ-NEXT: [[TMP27:%.*]] = extractelement <2 x float> [[X]], i64 1
|
|
; DAZ-NEXT: [[TMP28:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP26]])
|
|
; DAZ-NEXT: [[TMP29:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP27]])
|
|
; DAZ-NEXT: [[TMP30:%.*]] = insertelement <2 x float> poison, float [[TMP28]], i64 0
|
|
; DAZ-NEXT: [[FDIV_OPENCL:%.*]] = insertelement <2 x float> [[TMP30]], float [[TMP29]], i64 1
|
|
; DAZ-NEXT: store volatile <2 x float> [[FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: ret void
|
|
;
|
|
%sqrt.x.no.md = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %x)
|
|
%no.md = fdiv contract <2 x float> <float 1.0, float 1.0>, %sqrt.x.no.md
|
|
store volatile <2 x float> %no.md, ptr addrspace(1) %out, align 4
|
|
|
|
; Matches the rsq instruction accuracy
|
|
%sqrt.md.1ulp = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !2
|
|
%md.1ulp = fdiv contract <2 x float> <float 1.0, float 1.0>, %sqrt.md.1ulp, !fpmath !2
|
|
store volatile <2 x float> %md.1ulp, ptr addrspace(1) %out, align 4
|
|
|
|
; Matches the rsq instruction accuracy
|
|
%sqrt.md.1ulp.undef = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !2
|
|
%md.1ulp.undef = fdiv contract <2 x float> <float 1.0, float undef>, %sqrt.md.1ulp.undef, !fpmath !2
|
|
store volatile <2 x float> %md.1ulp.undef, ptr addrspace(1) %out, align 4
|
|
|
|
; Test mismatched metadata/flags between the sqrt and fdiv
|
|
|
|
; Test the expected opencl default pattern
|
|
%sqrt.x.3ulp = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !3 ; OpenCL default requires 3 for sqrt and 2.5 for fdiv
|
|
%fdiv.opencl = fdiv contract <2 x float> <float 1.0, float 1.0>, %sqrt.x.3ulp, !fpmath !0
|
|
store volatile <2 x float> %fdiv.opencl, ptr addrspace(1) %out, align 4
|
|
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @multiple_arcp_fdiv_denom_nomd(ptr addrspace(1) %out, float %x, float %y, float %denom) {
|
|
; CHECK-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_nomd
|
|
; CHECK-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: [[ARCP0:%.*]] = fdiv arcp float [[X]], [[DENOM]]
|
|
; CHECK-NEXT: [[ARCP1:%.*]] = fdiv arcp float [[Y]], [[DENOM]]
|
|
; CHECK-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%arcp0 = fdiv arcp float %x, %denom
|
|
%arcp1 = fdiv arcp float %y, %denom
|
|
store volatile float %arcp0, ptr addrspace(1) %out
|
|
store volatile float %arcp1, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp(ptr addrspace(1) %out, float %x, float %y, float %denom) {
|
|
; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp
|
|
; IEEE-GOODFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] {
|
|
; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
|
|
; IEEE-GOODFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP6]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = extractvalue { float, i32 } [[TMP7]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = sub i32 0, [[TMP9]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP8]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP11]], i32 [[TMP10]])
|
|
; IEEE-GOODFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP12]]
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: ret void
|
|
;
|
|
; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp
|
|
; IEEE-BADFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] {
|
|
; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
|
|
; IEEE-BADFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP6]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = sub i32 0, [[TMP9]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP8]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP11]], i32 [[TMP10]])
|
|
; IEEE-BADFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP12]]
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: ret void
|
|
;
|
|
; DAZ-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp
|
|
; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] {
|
|
; DAZ-NEXT: [[TMP1:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[DENOM]])
|
|
; DAZ-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP1]]
|
|
; DAZ-NEXT: [[TMP2:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[DENOM]])
|
|
; DAZ-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP2]]
|
|
; DAZ-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: ret void
|
|
;
|
|
%arcp0 = fdiv arcp float %x, %denom, !fpmath !0
|
|
%arcp1 = fdiv arcp float %y, %denom, !fpmath !0
|
|
store volatile float %arcp0, ptr addrspace(1) %out
|
|
store volatile float %arcp1, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp_x3(ptr addrspace(1) %out, float %x, float %y, float %z, float %denom) {
|
|
; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp_x3
|
|
; IEEE-GOODFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] {
|
|
; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
|
|
; IEEE-GOODFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP6]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = extractvalue { float, i32 } [[TMP7]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = sub i32 0, [[TMP9]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP8]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP11]], i32 [[TMP10]])
|
|
; IEEE-GOODFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP12]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP13]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP13]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = sub i32 0, [[TMP15]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP14]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP16]])
|
|
; IEEE-GOODFREXP-NEXT: [[ARCP2:%.*]] = fmul arcp float [[Z]], [[TMP18]]
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP2]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: ret void
|
|
;
|
|
; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp_x3
|
|
; IEEE-BADFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] {
|
|
; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
|
|
; IEEE-BADFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP6]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = sub i32 0, [[TMP9]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP8]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP11]], i32 [[TMP10]])
|
|
; IEEE-BADFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP12]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP13]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = sub i32 0, [[TMP15]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP14]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP16]])
|
|
; IEEE-BADFREXP-NEXT: [[ARCP2:%.*]] = fmul arcp float [[Z]], [[TMP18]]
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[ARCP2]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: ret void
|
|
;
|
|
; DAZ-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp_x3
|
|
; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] {
|
|
; DAZ-NEXT: [[TMP1:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[DENOM]])
|
|
; DAZ-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP1]]
|
|
; DAZ-NEXT: [[TMP2:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[DENOM]])
|
|
; DAZ-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP2]]
|
|
; DAZ-NEXT: [[TMP3:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[DENOM]])
|
|
; DAZ-NEXT: [[ARCP2:%.*]] = fmul arcp float [[Z]], [[TMP3]]
|
|
; DAZ-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: store volatile float [[ARCP2]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: ret void
|
|
;
|
|
%arcp0 = fdiv arcp float %x, %denom, !fpmath !0
|
|
%arcp1 = fdiv arcp float %y, %denom, !fpmath !0
|
|
%arcp2 = fdiv arcp float %z, %denom, !fpmath !0
|
|
store volatile float %arcp0, ptr addrspace(1) %out
|
|
store volatile float %arcp1, ptr addrspace(1) %out
|
|
store volatile float %arcp2, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp_nomd(ptr addrspace(1) %out, float %x, float %y, float %denom) {
|
|
; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp_nomd
|
|
; IEEE-GOODFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] {
|
|
; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
|
|
; IEEE-GOODFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP6]]
|
|
; IEEE-GOODFREXP-NEXT: [[ARCP1:%.*]] = fdiv arcp float [[Y]], [[DENOM]]
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: ret void
|
|
;
|
|
; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp_nomd
|
|
; IEEE-BADFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] {
|
|
; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
|
|
; IEEE-BADFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP6]]
|
|
; IEEE-BADFREXP-NEXT: [[ARCP1:%.*]] = fdiv arcp float [[Y]], [[DENOM]]
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: ret void
|
|
;
|
|
; DAZ-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp_nomd
|
|
; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] {
|
|
; DAZ-NEXT: [[TMP1:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[DENOM]])
|
|
; DAZ-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP1]]
|
|
; DAZ-NEXT: [[ARCP1:%.*]] = fdiv arcp float [[Y]], [[DENOM]]
|
|
; DAZ-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: ret void
|
|
;
|
|
%arcp0 = fdiv arcp float %x, %denom, !fpmath !0
|
|
%arcp1 = fdiv arcp float %y, %denom
|
|
store volatile float %arcp0, ptr addrspace(1) %out
|
|
store volatile float %arcp1, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @multiple_arcp_fdiv_denom_nomd_25ulp(ptr addrspace(1) %out, float %x, float %y, float %denom) {
|
|
; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_nomd_25ulp
|
|
; IEEE-GOODFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] {
|
|
; IEEE-GOODFREXP-NEXT: [[ARCP0:%.*]] = fdiv arcp float [[X]], [[DENOM]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
|
|
; IEEE-GOODFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP6]]
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: ret void
|
|
;
|
|
; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_nomd_25ulp
|
|
; IEEE-BADFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] {
|
|
; IEEE-BADFREXP-NEXT: [[ARCP0:%.*]] = fdiv arcp float [[X]], [[DENOM]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
|
|
; IEEE-BADFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP6]]
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: ret void
|
|
;
|
|
; DAZ-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_nomd_25ulp
|
|
; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] {
|
|
; DAZ-NEXT: [[ARCP0:%.*]] = fdiv arcp float [[X]], [[DENOM]]
|
|
; DAZ-NEXT: [[TMP1:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[DENOM]])
|
|
; DAZ-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP1]]
|
|
; DAZ-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: ret void
|
|
;
|
|
%arcp0 = fdiv arcp float %x, %denom
|
|
%arcp1 = fdiv arcp float %y, %denom, !fpmath !0
|
|
store volatile float %arcp0, ptr addrspace(1) %out
|
|
store volatile float %arcp1, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @multiple_arcp_fdiv_denom_1ulp(ptr addrspace(1) %out, float %x, float %y, float %denom) {
|
|
; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_1ulp
|
|
; IEEE-GOODFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] {
|
|
; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
|
|
; IEEE-GOODFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP6]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = extractvalue { float, i32 } [[TMP7]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = sub i32 0, [[TMP9]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP8]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP11]], i32 [[TMP10]])
|
|
; IEEE-GOODFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP12]]
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: ret void
|
|
;
|
|
; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_1ulp
|
|
; IEEE-BADFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] {
|
|
; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
|
|
; IEEE-BADFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP6]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = sub i32 0, [[TMP9]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP8]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP11]], i32 [[TMP10]])
|
|
; IEEE-BADFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP12]]
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: ret void
|
|
;
|
|
; DAZ-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_1ulp
|
|
; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] {
|
|
; DAZ-NEXT: [[TMP1:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[DENOM]])
|
|
; DAZ-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP1]]
|
|
; DAZ-NEXT: [[TMP2:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[DENOM]])
|
|
; DAZ-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP2]]
|
|
; DAZ-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: ret void
|
|
;
|
|
%arcp0 = fdiv arcp float %x, %denom, !fpmath !2
|
|
%arcp1 = fdiv arcp float %y, %denom, !fpmath !2
|
|
store volatile float %arcp0, ptr addrspace(1) %out
|
|
store volatile float %arcp1, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @multiple_arcp_fdiv_denom_1ulp_vector(ptr addrspace(1) %out, <2 x float> %x, <2 x float> %y, <2 x float> %denom) {
|
|
; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_1ulp_vector
|
|
; IEEE-GOODFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[DENOM:%.*]]) #[[ATTR1]] {
|
|
; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[DENOM]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[DENOM]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = sub i32 0, [[TMP7]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP6]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP8]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fmul arcp float [[TMP1]], [[TMP10]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP13]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = fmul arcp float [[TMP2]], [[TMP17]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = insertelement <2 x float> poison, float [[TMP11]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[ARCP0:%.*]] = insertelement <2 x float> [[TMP19]], float [[TMP18]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[Y]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = extractelement <2 x float> [[Y]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = extractelement <2 x float> [[DENOM]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = extractelement <2 x float> [[DENOM]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP22]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP24]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = extractvalue { float, i32 } [[TMP24]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = sub i32 0, [[TMP26]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP25]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP28]], i32 [[TMP27]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = fmul arcp float [[TMP20]], [[TMP29]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP23]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = extractvalue { float, i32 } [[TMP31]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = extractvalue { float, i32 } [[TMP31]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = sub i32 0, [[TMP33]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP32]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP34]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = fmul arcp float [[TMP21]], [[TMP36]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = insertelement <2 x float> poison, float [[TMP30]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[ARCP1:%.*]] = insertelement <2 x float> [[TMP38]], float [[TMP37]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[ARCP0]], ptr addrspace(1) [[OUT]], align 8
|
|
; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[ARCP1]], ptr addrspace(1) [[OUT]], align 8
|
|
; IEEE-GOODFREXP-NEXT: ret void
|
|
;
|
|
; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_1ulp_vector
|
|
; IEEE-BADFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[DENOM:%.*]]) #[[ATTR1]] {
|
|
; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[DENOM]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[DENOM]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP3]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = sub i32 0, [[TMP7]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP6]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP8]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fmul arcp float [[TMP1]], [[TMP10]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP4]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP13]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = fmul arcp float [[TMP2]], [[TMP17]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = insertelement <2 x float> poison, float [[TMP11]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[ARCP0:%.*]] = insertelement <2 x float> [[TMP19]], float [[TMP18]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[Y]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = extractelement <2 x float> [[Y]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = extractelement <2 x float> [[DENOM]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = extractelement <2 x float> [[DENOM]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP22]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP24]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP22]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = sub i32 0, [[TMP26]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP25]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP28]], i32 [[TMP27]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = fmul arcp float [[TMP20]], [[TMP29]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP23]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = extractvalue { float, i32 } [[TMP31]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP23]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = sub i32 0, [[TMP33]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP32]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP34]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = fmul arcp float [[TMP21]], [[TMP36]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = insertelement <2 x float> poison, float [[TMP30]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[ARCP1:%.*]] = insertelement <2 x float> [[TMP38]], float [[TMP37]], i64 1
|
|
; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[ARCP0]], ptr addrspace(1) [[OUT]], align 8
|
|
; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[ARCP1]], ptr addrspace(1) [[OUT]], align 8
|
|
; IEEE-BADFREXP-NEXT: ret void
|
|
;
|
|
; DAZ-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_1ulp_vector
|
|
; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[DENOM:%.*]]) #[[ATTR1]] {
|
|
; DAZ-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 0
|
|
; DAZ-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X]], i64 1
|
|
; DAZ-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[DENOM]], i64 0
|
|
; DAZ-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[DENOM]], i64 1
|
|
; DAZ-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP3]])
|
|
; DAZ-NEXT: [[TMP6:%.*]] = fmul arcp float [[TMP1]], [[TMP5]]
|
|
; DAZ-NEXT: [[TMP7:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP4]])
|
|
; DAZ-NEXT: [[TMP8:%.*]] = fmul arcp float [[TMP2]], [[TMP7]]
|
|
; DAZ-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i64 0
|
|
; DAZ-NEXT: [[ARCP0:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP8]], i64 1
|
|
; DAZ-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[Y]], i64 0
|
|
; DAZ-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[Y]], i64 1
|
|
; DAZ-NEXT: [[TMP12:%.*]] = extractelement <2 x float> [[DENOM]], i64 0
|
|
; DAZ-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[DENOM]], i64 1
|
|
; DAZ-NEXT: [[TMP14:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP12]])
|
|
; DAZ-NEXT: [[TMP15:%.*]] = fmul arcp float [[TMP10]], [[TMP14]]
|
|
; DAZ-NEXT: [[TMP16:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP13]])
|
|
; DAZ-NEXT: [[TMP17:%.*]] = fmul arcp float [[TMP11]], [[TMP16]]
|
|
; DAZ-NEXT: [[TMP18:%.*]] = insertelement <2 x float> poison, float [[TMP15]], i64 0
|
|
; DAZ-NEXT: [[ARCP1:%.*]] = insertelement <2 x float> [[TMP18]], float [[TMP17]], i64 1
|
|
; DAZ-NEXT: store volatile <2 x float> [[ARCP0]], ptr addrspace(1) [[OUT]], align 8
|
|
; DAZ-NEXT: store volatile <2 x float> [[ARCP1]], ptr addrspace(1) [[OUT]], align 8
|
|
; DAZ-NEXT: ret void
|
|
;
|
|
%arcp0 = fdiv arcp <2 x float> %x, %denom, !fpmath !2
|
|
%arcp1 = fdiv arcp <2 x float> %y, %denom, !fpmath !2
|
|
store volatile <2 x float> %arcp0, ptr addrspace(1) %out
|
|
store volatile <2 x float> %arcp1, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp(ptr addrspace(1) %out, float %x, float %y, float %sqr.denom) {
|
|
; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp
|
|
; IEEE-GOODFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[SQR_DENOM:%.*]]) #[[ATTR1]] {
|
|
; IEEE-GOODFREXP-NEXT: [[DENOM:%.*]] = call contract float @llvm.sqrt.f32(float [[SQR_DENOM]]), !fpmath !3
|
|
; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
|
|
; IEEE-GOODFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp contract float [[X]], [[TMP6]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = extractvalue { float, i32 } [[TMP7]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = sub i32 0, [[TMP9]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP8]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP11]], i32 [[TMP10]])
|
|
; IEEE-GOODFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp contract float [[Y]], [[TMP12]]
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: ret void
|
|
;
|
|
; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp
|
|
; IEEE-BADFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[SQR_DENOM:%.*]]) #[[ATTR1]] {
|
|
; IEEE-BADFREXP-NEXT: [[DENOM:%.*]] = call contract float @llvm.sqrt.f32(float [[SQR_DENOM]]), !fpmath !3
|
|
; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
|
|
; IEEE-BADFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp contract float [[X]], [[TMP6]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = sub i32 0, [[TMP9]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP8]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP11]], i32 [[TMP10]])
|
|
; IEEE-BADFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp contract float [[Y]], [[TMP12]]
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: ret void
|
|
;
|
|
; DAZ-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp
|
|
; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[SQR_DENOM:%.*]]) #[[ATTR1]] {
|
|
; DAZ-NEXT: [[DENOM:%.*]] = call contract float @llvm.sqrt.f32(float [[SQR_DENOM]]), !fpmath !3
|
|
; DAZ-NEXT: [[TMP1:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[DENOM]])
|
|
; DAZ-NEXT: [[ARCP0:%.*]] = fmul arcp contract float [[X]], [[TMP1]]
|
|
; DAZ-NEXT: [[TMP2:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[DENOM]])
|
|
; DAZ-NEXT: [[ARCP1:%.*]] = fmul arcp contract float [[Y]], [[TMP2]]
|
|
; DAZ-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: ret void
|
|
;
|
|
%denom = call contract float @llvm.sqrt.f32(float %sqr.denom), !fpmath !3
|
|
%arcp0 = fdiv contract arcp float %x, %denom, !fpmath !0
|
|
%arcp1 = fdiv contract arcp float %y, %denom, !fpmath !0
|
|
store volatile float %arcp0, ptr addrspace(1) %out
|
|
store volatile float %arcp1, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_vector_25ulp(ptr addrspace(1) %out, <2 x float> %x, <2 x float> %y, <2 x float> %sqr.denom) {
|
|
; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_vector_25ulp
|
|
; IEEE-GOODFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[SQR_DENOM:%.*]]) #[[ATTR1]] {
|
|
; IEEE-GOODFREXP-NEXT: [[DENOM:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[SQR_DENOM]]), !fpmath !3
|
|
; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[DENOM]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[DENOM]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = sub i32 0, [[TMP7]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP6]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP8]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fmul arcp contract float [[TMP1]], [[TMP10]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP13]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = fmul arcp contract float [[TMP2]], [[TMP17]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = insertelement <2 x float> poison, float [[TMP11]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[ARCP0:%.*]] = insertelement <2 x float> [[TMP19]], float [[TMP18]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[Y]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = extractelement <2 x float> [[Y]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = extractelement <2 x float> [[DENOM]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = extractelement <2 x float> [[DENOM]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP22]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP24]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = extractvalue { float, i32 } [[TMP24]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = sub i32 0, [[TMP26]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP25]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP28]], i32 [[TMP27]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = fmul arcp contract float [[TMP20]], [[TMP29]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP23]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = extractvalue { float, i32 } [[TMP31]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = extractvalue { float, i32 } [[TMP31]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = sub i32 0, [[TMP33]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP32]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP34]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = fmul arcp contract float [[TMP21]], [[TMP36]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = insertelement <2 x float> poison, float [[TMP30]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[ARCP1:%.*]] = insertelement <2 x float> [[TMP38]], float [[TMP37]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[ARCP0]], ptr addrspace(1) [[OUT]], align 8
|
|
; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[ARCP1]], ptr addrspace(1) [[OUT]], align 8
|
|
; IEEE-GOODFREXP-NEXT: ret void
|
|
;
|
|
; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_vector_25ulp
|
|
; IEEE-BADFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[SQR_DENOM:%.*]]) #[[ATTR1]] {
|
|
; IEEE-BADFREXP-NEXT: [[DENOM:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[SQR_DENOM]]), !fpmath !3
|
|
; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[DENOM]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[DENOM]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP3]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = sub i32 0, [[TMP7]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP6]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP8]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fmul arcp contract float [[TMP1]], [[TMP10]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP4]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP13]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = fmul arcp contract float [[TMP2]], [[TMP17]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = insertelement <2 x float> poison, float [[TMP11]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[ARCP0:%.*]] = insertelement <2 x float> [[TMP19]], float [[TMP18]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[Y]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = extractelement <2 x float> [[Y]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = extractelement <2 x float> [[DENOM]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = extractelement <2 x float> [[DENOM]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP22]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP24]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP22]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = sub i32 0, [[TMP26]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP25]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP28]], i32 [[TMP27]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = fmul arcp contract float [[TMP20]], [[TMP29]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP23]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = extractvalue { float, i32 } [[TMP31]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP23]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = sub i32 0, [[TMP33]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP32]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP34]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = fmul arcp contract float [[TMP21]], [[TMP36]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = insertelement <2 x float> poison, float [[TMP30]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[ARCP1:%.*]] = insertelement <2 x float> [[TMP38]], float [[TMP37]], i64 1
|
|
; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[ARCP0]], ptr addrspace(1) [[OUT]], align 8
|
|
; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[ARCP1]], ptr addrspace(1) [[OUT]], align 8
|
|
; IEEE-BADFREXP-NEXT: ret void
|
|
;
|
|
; DAZ-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_vector_25ulp
|
|
; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[SQR_DENOM:%.*]]) #[[ATTR1]] {
|
|
; DAZ-NEXT: [[DENOM:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[SQR_DENOM]]), !fpmath !3
|
|
; DAZ-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 0
|
|
; DAZ-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X]], i64 1
|
|
; DAZ-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[DENOM]], i64 0
|
|
; DAZ-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[DENOM]], i64 1
|
|
; DAZ-NEXT: [[TMP5:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP3]])
|
|
; DAZ-NEXT: [[TMP6:%.*]] = fmul arcp contract float [[TMP1]], [[TMP5]]
|
|
; DAZ-NEXT: [[TMP7:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP4]])
|
|
; DAZ-NEXT: [[TMP8:%.*]] = fmul arcp contract float [[TMP2]], [[TMP7]]
|
|
; DAZ-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i64 0
|
|
; DAZ-NEXT: [[ARCP0:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP8]], i64 1
|
|
; DAZ-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[Y]], i64 0
|
|
; DAZ-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[Y]], i64 1
|
|
; DAZ-NEXT: [[TMP12:%.*]] = extractelement <2 x float> [[DENOM]], i64 0
|
|
; DAZ-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[DENOM]], i64 1
|
|
; DAZ-NEXT: [[TMP14:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP12]])
|
|
; DAZ-NEXT: [[TMP15:%.*]] = fmul arcp contract float [[TMP10]], [[TMP14]]
|
|
; DAZ-NEXT: [[TMP16:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP13]])
|
|
; DAZ-NEXT: [[TMP17:%.*]] = fmul arcp contract float [[TMP11]], [[TMP16]]
|
|
; DAZ-NEXT: [[TMP18:%.*]] = insertelement <2 x float> poison, float [[TMP15]], i64 0
|
|
; DAZ-NEXT: [[ARCP1:%.*]] = insertelement <2 x float> [[TMP18]], float [[TMP17]], i64 1
|
|
; DAZ-NEXT: store volatile <2 x float> [[ARCP0]], ptr addrspace(1) [[OUT]], align 8
|
|
; DAZ-NEXT: store volatile <2 x float> [[ARCP1]], ptr addrspace(1) [[OUT]], align 8
|
|
; DAZ-NEXT: ret void
|
|
;
|
|
%denom = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %sqr.denom), !fpmath !3
|
|
%arcp0 = fdiv contract arcp <2 x float> %x, %denom, !fpmath !0
|
|
%arcp1 = fdiv contract arcp <2 x float> %y, %denom, !fpmath !0
|
|
store volatile <2 x float> %arcp0, ptr addrspace(1) %out
|
|
store volatile <2 x float> %arcp1, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp_x3(ptr addrspace(1) %out, float %x, float %y, float %z, float %sqr.denom) {
|
|
; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp_x3
|
|
; IEEE-GOODFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]], float [[SQR_DENOM:%.*]]) #[[ATTR1]] {
|
|
; IEEE-GOODFREXP-NEXT: [[DENOM:%.*]] = call contract float @llvm.sqrt.f32(float [[SQR_DENOM]]), !fpmath !3
|
|
; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
|
|
; IEEE-GOODFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp contract float [[X]], [[TMP6]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = extractvalue { float, i32 } [[TMP7]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = sub i32 0, [[TMP9]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP8]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP11]], i32 [[TMP10]])
|
|
; IEEE-GOODFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp contract float [[Y]], [[TMP12]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP13]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP13]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = sub i32 0, [[TMP15]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP14]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP16]])
|
|
; IEEE-GOODFREXP-NEXT: [[ARCP2:%.*]] = fmul arcp contract float [[Z]], [[TMP18]]
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP2]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: ret void
|
|
;
|
|
; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp_x3
|
|
; IEEE-BADFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]], float [[SQR_DENOM:%.*]]) #[[ATTR1]] {
|
|
; IEEE-BADFREXP-NEXT: [[DENOM:%.*]] = call contract float @llvm.sqrt.f32(float [[SQR_DENOM]]), !fpmath !3
|
|
; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
|
|
; IEEE-BADFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp contract float [[X]], [[TMP6]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = sub i32 0, [[TMP9]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP8]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP11]], i32 [[TMP10]])
|
|
; IEEE-BADFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp contract float [[Y]], [[TMP12]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP13]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = sub i32 0, [[TMP15]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP14]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP16]])
|
|
; IEEE-BADFREXP-NEXT: [[ARCP2:%.*]] = fmul arcp contract float [[Z]], [[TMP18]]
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[ARCP2]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: ret void
|
|
;
|
|
; DAZ-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp_x3
|
|
; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]], float [[SQR_DENOM:%.*]]) #[[ATTR1]] {
|
|
; DAZ-NEXT: [[DENOM:%.*]] = call contract float @llvm.sqrt.f32(float [[SQR_DENOM]]), !fpmath !3
|
|
; DAZ-NEXT: [[TMP1:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[DENOM]])
|
|
; DAZ-NEXT: [[ARCP0:%.*]] = fmul arcp contract float [[X]], [[TMP1]]
|
|
; DAZ-NEXT: [[TMP2:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[DENOM]])
|
|
; DAZ-NEXT: [[ARCP1:%.*]] = fmul arcp contract float [[Y]], [[TMP2]]
|
|
; DAZ-NEXT: [[TMP3:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[DENOM]])
|
|
; DAZ-NEXT: [[ARCP2:%.*]] = fmul arcp contract float [[Z]], [[TMP3]]
|
|
; DAZ-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: store volatile float [[ARCP2]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: ret void
|
|
;
|
|
%denom = call contract float @llvm.sqrt.f32(float %sqr.denom), !fpmath !3
|
|
%arcp0 = fdiv contract arcp float %x, %denom, !fpmath !0
|
|
%arcp1 = fdiv contract arcp float %y, %denom, !fpmath !0
|
|
%arcp2 = fdiv contract arcp float %z, %denom, !fpmath !0
|
|
store volatile float %arcp0, ptr addrspace(1) %out
|
|
store volatile float %arcp1, ptr addrspace(1) %out
|
|
store volatile float %arcp2, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
define <4 x float> @rsq_f32_vector_mixed_constant_numerator(<4 x float> %arg) {
|
|
; IEEE-GOODFREXP-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator
|
|
; IEEE-GOODFREXP-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
|
|
; IEEE-GOODFREXP-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath !2
|
|
; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
|
|
; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
|
|
; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[ARG]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[ARG]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[ARG]], i64 2
|
|
; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[ARG]], i64 3
|
|
; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = fcmp contract olt float [[TMP5]], 0x3810000000000000
|
|
; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = select contract i1 [[TMP9]], float 0x4170000000000000, float 1.000000e+00
|
|
; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fmul contract float [[TMP5]], [[TMP10]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP11]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = select contract i1 [[TMP9]], float 4.096000e+03, float 1.000000e+00
|
|
; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = fmul contract float [[TMP12]], [[TMP13]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = fcmp contract olt float [[TMP6]], 0x3810000000000000
|
|
; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = select contract i1 [[TMP15]], float 0x4170000000000000, float 1.000000e+00
|
|
; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = fmul contract float [[TMP6]], [[TMP16]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP17]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = select contract i1 [[TMP15]], float -4.096000e+03, float -1.000000e+00
|
|
; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = fmul contract float [[TMP18]], [[TMP19]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = extractvalue { float, i32 } [[TMP21]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = extractvalue { float, i32 } [[TMP21]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP22]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 4.000000e+00)
|
|
; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = extractvalue { float, i32 } [[TMP25]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = extractvalue { float, i32 } [[TMP25]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = fmul contract float [[TMP26]], [[TMP24]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = sub i32 [[TMP27]], [[TMP23]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP28]], i32 [[TMP29]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = extractvalue { float, i32 } [[TMP31]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = extractvalue { float, i32 } [[TMP31]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP32]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float undef)
|
|
; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = extractvalue { float, i32 } [[TMP35]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = extractvalue { float, i32 } [[TMP35]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = fmul contract float [[TMP36]], [[TMP34]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = sub i32 [[TMP37]], [[TMP33]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP40:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP38]], i32 [[TMP39]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP41:%.*]] = insertelement <4 x float> poison, float [[TMP14]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP42:%.*]] = insertelement <4 x float> [[TMP41]], float [[TMP20]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP43:%.*]] = insertelement <4 x float> [[TMP42]], float [[TMP30]], i64 2
|
|
; IEEE-GOODFREXP-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP43]], float [[TMP40]], i64 3
|
|
; IEEE-GOODFREXP-NEXT: ret <4 x float> [[PARTIAL_RSQ]]
|
|
;
|
|
; IEEE-BADFREXP-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator
|
|
; IEEE-BADFREXP-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
|
|
; IEEE-BADFREXP-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath !2
|
|
; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
|
|
; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
|
|
; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[ARG]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[ARG]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[ARG]], i64 2
|
|
; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[ARG]], i64 3
|
|
; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = fcmp contract olt float [[TMP5]], 0x3810000000000000
|
|
; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = select contract i1 [[TMP9]], float 0x4170000000000000, float 1.000000e+00
|
|
; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fmul contract float [[TMP5]], [[TMP10]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP11]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = select contract i1 [[TMP9]], float 4.096000e+03, float 1.000000e+00
|
|
; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = fmul contract float [[TMP12]], [[TMP13]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = fcmp contract olt float [[TMP6]], 0x3810000000000000
|
|
; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = select contract i1 [[TMP15]], float 0x4170000000000000, float 1.000000e+00
|
|
; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = fmul contract float [[TMP6]], [[TMP16]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP17]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = select contract i1 [[TMP15]], float -4.096000e+03, float -1.000000e+00
|
|
; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = fmul contract float [[TMP18]], [[TMP19]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = extractvalue { float, i32 } [[TMP21]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP3]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP22]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 4.000000e+00)
|
|
; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = extractvalue { float, i32 } [[TMP25]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float 4.000000e+00)
|
|
; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = fmul contract float [[TMP26]], [[TMP24]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = sub i32 [[TMP27]], [[TMP23]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP28]], i32 [[TMP29]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = extractvalue { float, i32 } [[TMP31]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP4]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP32]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float undef)
|
|
; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = extractvalue { float, i32 } [[TMP35]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float undef)
|
|
; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = fmul contract float [[TMP36]], [[TMP34]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = sub i32 [[TMP37]], [[TMP33]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP40:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP38]], i32 [[TMP39]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP41:%.*]] = insertelement <4 x float> poison, float [[TMP14]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP42:%.*]] = insertelement <4 x float> [[TMP41]], float [[TMP20]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP43:%.*]] = insertelement <4 x float> [[TMP42]], float [[TMP30]], i64 2
|
|
; IEEE-BADFREXP-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP43]], float [[TMP40]], i64 3
|
|
; IEEE-BADFREXP-NEXT: ret <4 x float> [[PARTIAL_RSQ]]
|
|
;
|
|
; DAZ-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator
|
|
; DAZ-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
|
|
; DAZ-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath !2
|
|
; DAZ-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
|
|
; DAZ-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
|
|
; DAZ-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
|
|
; DAZ-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
|
|
; DAZ-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[ARG]], i64 0
|
|
; DAZ-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[ARG]], i64 1
|
|
; DAZ-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[ARG]], i64 2
|
|
; DAZ-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[ARG]], i64 3
|
|
; DAZ-NEXT: [[TMP9:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP5]])
|
|
; DAZ-NEXT: [[TMP10:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP6]])
|
|
; DAZ-NEXT: [[TMP11:%.*]] = fneg contract float [[TMP10]]
|
|
; DAZ-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
|
|
; DAZ-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0
|
|
; DAZ-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1
|
|
; DAZ-NEXT: [[TMP15:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP13]])
|
|
; DAZ-NEXT: [[TMP16:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 4.000000e+00)
|
|
; DAZ-NEXT: [[TMP17:%.*]] = extractvalue { float, i32 } [[TMP16]], 0
|
|
; DAZ-NEXT: [[TMP18:%.*]] = extractvalue { float, i32 } [[TMP16]], 1
|
|
; DAZ-NEXT: [[TMP19:%.*]] = fmul contract float [[TMP17]], [[TMP15]]
|
|
; DAZ-NEXT: [[TMP20:%.*]] = sub i32 [[TMP18]], [[TMP14]]
|
|
; DAZ-NEXT: [[TMP21:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP19]], i32 [[TMP20]])
|
|
; DAZ-NEXT: [[TMP22:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
|
|
; DAZ-NEXT: [[TMP23:%.*]] = extractvalue { float, i32 } [[TMP22]], 0
|
|
; DAZ-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP22]], 1
|
|
; DAZ-NEXT: [[TMP25:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP23]])
|
|
; DAZ-NEXT: [[TMP26:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float undef)
|
|
; DAZ-NEXT: [[TMP27:%.*]] = extractvalue { float, i32 } [[TMP26]], 0
|
|
; DAZ-NEXT: [[TMP28:%.*]] = extractvalue { float, i32 } [[TMP26]], 1
|
|
; DAZ-NEXT: [[TMP29:%.*]] = fmul contract float [[TMP27]], [[TMP25]]
|
|
; DAZ-NEXT: [[TMP30:%.*]] = sub i32 [[TMP28]], [[TMP24]]
|
|
; DAZ-NEXT: [[TMP31:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP29]], i32 [[TMP30]])
|
|
; DAZ-NEXT: [[TMP32:%.*]] = insertelement <4 x float> poison, float [[TMP9]], i64 0
|
|
; DAZ-NEXT: [[TMP33:%.*]] = insertelement <4 x float> [[TMP32]], float [[TMP11]], i64 1
|
|
; DAZ-NEXT: [[TMP34:%.*]] = insertelement <4 x float> [[TMP33]], float [[TMP21]], i64 2
|
|
; DAZ-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP34]], float [[TMP31]], i64 3
|
|
; DAZ-NEXT: ret <4 x float> [[PARTIAL_RSQ]]
|
|
;
|
|
%denom = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg), !fpmath !2
|
|
%partial.rsq = fdiv contract <4 x float> <float 1.0, float -1.0, float 4.0, float undef>, %denom, !fpmath !2
|
|
ret <4 x float> %partial.rsq
|
|
}
|
|
|
|
define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_sqrt(<4 x float> %arg) {
|
|
; IEEE-GOODFREXP-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_sqrt
|
|
; IEEE-GOODFREXP-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
|
|
; IEEE-GOODFREXP-NEXT: [[DENOM:%.*]] = call contract afn <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
|
|
; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
|
|
; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[ARG]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[ARG]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[ARG]], i64 2
|
|
; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[ARG]], i64 3
|
|
; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = call contract afn float @llvm.amdgcn.rsq.f32(float [[TMP5]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call contract afn float @llvm.amdgcn.rsq.f32(float [[TMP6]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fneg contract afn float [[TMP10]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP13]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 4.000000e+00)
|
|
; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = extractvalue { float, i32 } [[TMP16]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = extractvalue { float, i32 } [[TMP16]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = fmul contract float [[TMP17]], [[TMP15]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = sub i32 [[TMP18]], [[TMP14]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP19]], i32 [[TMP20]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = extractvalue { float, i32 } [[TMP22]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP22]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP23]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float undef)
|
|
; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = extractvalue { float, i32 } [[TMP26]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = extractvalue { float, i32 } [[TMP26]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = fmul contract float [[TMP27]], [[TMP25]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = sub i32 [[TMP28]], [[TMP24]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP29]], i32 [[TMP30]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = insertelement <4 x float> poison, float [[TMP9]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = insertelement <4 x float> [[TMP32]], float [[TMP11]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = insertelement <4 x float> [[TMP33]], float [[TMP21]], i64 2
|
|
; IEEE-GOODFREXP-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP34]], float [[TMP31]], i64 3
|
|
; IEEE-GOODFREXP-NEXT: ret <4 x float> [[PARTIAL_RSQ]]
|
|
;
|
|
; IEEE-BADFREXP-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_sqrt
|
|
; IEEE-BADFREXP-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
|
|
; IEEE-BADFREXP-NEXT: [[DENOM:%.*]] = call contract afn <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
|
|
; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
|
|
; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[ARG]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[ARG]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[ARG]], i64 2
|
|
; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[ARG]], i64 3
|
|
; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call contract afn float @llvm.amdgcn.rsq.f32(float [[TMP5]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call contract afn float @llvm.amdgcn.rsq.f32(float [[TMP6]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fneg contract afn float [[TMP10]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP3]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP13]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 4.000000e+00)
|
|
; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = extractvalue { float, i32 } [[TMP16]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float 4.000000e+00)
|
|
; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = fmul contract float [[TMP17]], [[TMP15]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = sub i32 [[TMP18]], [[TMP14]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP19]], i32 [[TMP20]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = extractvalue { float, i32 } [[TMP22]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP4]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP23]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float undef)
|
|
; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = extractvalue { float, i32 } [[TMP26]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float undef)
|
|
; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = fmul contract float [[TMP27]], [[TMP25]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = sub i32 [[TMP28]], [[TMP24]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP29]], i32 [[TMP30]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = insertelement <4 x float> poison, float [[TMP9]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = insertelement <4 x float> [[TMP32]], float [[TMP11]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = insertelement <4 x float> [[TMP33]], float [[TMP21]], i64 2
|
|
; IEEE-BADFREXP-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP34]], float [[TMP31]], i64 3
|
|
; IEEE-BADFREXP-NEXT: ret <4 x float> [[PARTIAL_RSQ]]
|
|
;
|
|
; DAZ-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_sqrt
|
|
; DAZ-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
|
|
; DAZ-NEXT: [[DENOM:%.*]] = call contract afn <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]])
|
|
; DAZ-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
|
|
; DAZ-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
|
|
; DAZ-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
|
|
; DAZ-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
|
|
; DAZ-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[ARG]], i64 0
|
|
; DAZ-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[ARG]], i64 1
|
|
; DAZ-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[ARG]], i64 2
|
|
; DAZ-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[ARG]], i64 3
|
|
; DAZ-NEXT: [[TMP9:%.*]] = call contract afn float @llvm.amdgcn.rsq.f32(float [[TMP5]])
|
|
; DAZ-NEXT: [[TMP10:%.*]] = call contract afn float @llvm.amdgcn.rsq.f32(float [[TMP6]])
|
|
; DAZ-NEXT: [[TMP11:%.*]] = fneg contract afn float [[TMP10]]
|
|
; DAZ-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
|
|
; DAZ-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0
|
|
; DAZ-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1
|
|
; DAZ-NEXT: [[TMP15:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP13]])
|
|
; DAZ-NEXT: [[TMP16:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 4.000000e+00)
|
|
; DAZ-NEXT: [[TMP17:%.*]] = extractvalue { float, i32 } [[TMP16]], 0
|
|
; DAZ-NEXT: [[TMP18:%.*]] = extractvalue { float, i32 } [[TMP16]], 1
|
|
; DAZ-NEXT: [[TMP19:%.*]] = fmul contract float [[TMP17]], [[TMP15]]
|
|
; DAZ-NEXT: [[TMP20:%.*]] = sub i32 [[TMP18]], [[TMP14]]
|
|
; DAZ-NEXT: [[TMP21:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP19]], i32 [[TMP20]])
|
|
; DAZ-NEXT: [[TMP22:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
|
|
; DAZ-NEXT: [[TMP23:%.*]] = extractvalue { float, i32 } [[TMP22]], 0
|
|
; DAZ-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP22]], 1
|
|
; DAZ-NEXT: [[TMP25:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP23]])
|
|
; DAZ-NEXT: [[TMP26:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float undef)
|
|
; DAZ-NEXT: [[TMP27:%.*]] = extractvalue { float, i32 } [[TMP26]], 0
|
|
; DAZ-NEXT: [[TMP28:%.*]] = extractvalue { float, i32 } [[TMP26]], 1
|
|
; DAZ-NEXT: [[TMP29:%.*]] = fmul contract float [[TMP27]], [[TMP25]]
|
|
; DAZ-NEXT: [[TMP30:%.*]] = sub i32 [[TMP28]], [[TMP24]]
|
|
; DAZ-NEXT: [[TMP31:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP29]], i32 [[TMP30]])
|
|
; DAZ-NEXT: [[TMP32:%.*]] = insertelement <4 x float> poison, float [[TMP9]], i64 0
|
|
; DAZ-NEXT: [[TMP33:%.*]] = insertelement <4 x float> [[TMP32]], float [[TMP11]], i64 1
|
|
; DAZ-NEXT: [[TMP34:%.*]] = insertelement <4 x float> [[TMP33]], float [[TMP21]], i64 2
|
|
; DAZ-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP34]], float [[TMP31]], i64 3
|
|
; DAZ-NEXT: ret <4 x float> [[PARTIAL_RSQ]]
|
|
;
|
|
%denom = call contract afn <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg)
|
|
%partial.rsq = fdiv contract <4 x float> <float 1.0, float -1.0, float 4.0, float undef>, %denom, !fpmath !2
|
|
ret <4 x float> %partial.rsq
|
|
}
|
|
|
|
define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_div(<4 x float> %arg) {
|
|
; CHECK-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_div
|
|
; CHECK-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath !2
|
|
; CHECK-NEXT: [[PARTIAL_RSQ:%.*]] = fdiv contract afn <4 x float> <float 1.000000e+00, float -1.000000e+00, float 4.000000e+00, float undef>, [[DENOM]]
|
|
; CHECK-NEXT: ret <4 x float> [[PARTIAL_RSQ]]
|
|
;
|
|
%denom = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg), !fpmath !2
|
|
%partial.rsq = fdiv contract afn <4 x float> <float 1.0, float -1.0, float 4.0, float undef>, %denom
|
|
ret <4 x float> %partial.rsq
|
|
}
|
|
|
|
define <4 x float> @rsq_f32_vector_mixed_constant_numerator_correct_fdiv(<4 x float> %arg) {
|
|
; CHECK-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_correct_fdiv
|
|
; CHECK-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath !2
|
|
; CHECK-NEXT: [[PARTIAL_RSQ:%.*]] = fdiv contract <4 x float> <float 1.000000e+00, float -1.000000e+00, float 4.000000e+00, float undef>, [[DENOM]]
|
|
; CHECK-NEXT: ret <4 x float> [[PARTIAL_RSQ]]
|
|
;
|
|
%denom = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg), !fpmath !2
|
|
%partial.rsq = fdiv contract <4 x float> <float 1.0, float -1.0, float 4.0, float undef>, %denom
|
|
ret <4 x float> %partial.rsq
|
|
}
|
|
|
|
define <4 x float> @rsq_f32_vector_mixed_constant_numerator_correct_sqrt(<4 x float> %arg) {
|
|
; IEEE-GOODFREXP-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_correct_sqrt
|
|
; IEEE-GOODFREXP-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
|
|
; IEEE-GOODFREXP-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
|
|
; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
|
|
; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP1]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = sub i32 0, [[TMP7]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP6]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP8]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fneg contract float [[TMP2]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP11]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP13]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP18]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP19]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 4.000000e+00)
|
|
; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = extractvalue { float, i32 } [[TMP22]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP22]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = fmul contract float [[TMP23]], [[TMP21]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = sub i32 [[TMP24]], [[TMP20]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP25]], i32 [[TMP26]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP28]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP29]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float undef)
|
|
; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = extractvalue { float, i32 } [[TMP32]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = extractvalue { float, i32 } [[TMP32]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = fmul contract float [[TMP33]], [[TMP31]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = sub i32 [[TMP34]], [[TMP30]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP36]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = insertelement <4 x float> poison, float [[TMP10]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = insertelement <4 x float> [[TMP38]], float [[TMP17]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP40:%.*]] = insertelement <4 x float> [[TMP39]], float [[TMP27]], i64 2
|
|
; IEEE-GOODFREXP-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP40]], float [[TMP37]], i64 3
|
|
; IEEE-GOODFREXP-NEXT: ret <4 x float> [[PARTIAL_RSQ]]
|
|
;
|
|
; IEEE-BADFREXP-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_correct_sqrt
|
|
; IEEE-BADFREXP-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
|
|
; IEEE-BADFREXP-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
|
|
; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
|
|
; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP1]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP1]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = sub i32 0, [[TMP7]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP6]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP8]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fneg contract float [[TMP2]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP11]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP11]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP13]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP3]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP19]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 4.000000e+00)
|
|
; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = extractvalue { float, i32 } [[TMP22]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float 4.000000e+00)
|
|
; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = fmul contract float [[TMP23]], [[TMP21]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = sub i32 [[TMP24]], [[TMP20]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP25]], i32 [[TMP26]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP4]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP29]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float undef)
|
|
; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = extractvalue { float, i32 } [[TMP32]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float undef)
|
|
; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = fmul contract float [[TMP33]], [[TMP31]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = sub i32 [[TMP34]], [[TMP30]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP36]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = insertelement <4 x float> poison, float [[TMP10]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = insertelement <4 x float> [[TMP38]], float [[TMP17]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP40:%.*]] = insertelement <4 x float> [[TMP39]], float [[TMP27]], i64 2
|
|
; IEEE-BADFREXP-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP40]], float [[TMP37]], i64 3
|
|
; IEEE-BADFREXP-NEXT: ret <4 x float> [[PARTIAL_RSQ]]
|
|
;
|
|
; DAZ-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_correct_sqrt
|
|
; DAZ-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
|
|
; DAZ-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]])
|
|
; DAZ-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
|
|
; DAZ-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
|
|
; DAZ-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
|
|
; DAZ-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
|
|
; DAZ-NEXT: [[TMP5:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP1]])
|
|
; DAZ-NEXT: [[TMP6:%.*]] = fneg contract float [[TMP2]]
|
|
; DAZ-NEXT: [[TMP7:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP6]])
|
|
; DAZ-NEXT: [[TMP8:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
|
|
; DAZ-NEXT: [[TMP9:%.*]] = extractvalue { float, i32 } [[TMP8]], 0
|
|
; DAZ-NEXT: [[TMP10:%.*]] = extractvalue { float, i32 } [[TMP8]], 1
|
|
; DAZ-NEXT: [[TMP11:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP9]])
|
|
; DAZ-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 4.000000e+00)
|
|
; DAZ-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0
|
|
; DAZ-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1
|
|
; DAZ-NEXT: [[TMP15:%.*]] = fmul contract float [[TMP13]], [[TMP11]]
|
|
; DAZ-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP10]]
|
|
; DAZ-NEXT: [[TMP17:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP15]], i32 [[TMP16]])
|
|
; DAZ-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
|
|
; DAZ-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0
|
|
; DAZ-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP18]], 1
|
|
; DAZ-NEXT: [[TMP21:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP19]])
|
|
; DAZ-NEXT: [[TMP22:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float undef)
|
|
; DAZ-NEXT: [[TMP23:%.*]] = extractvalue { float, i32 } [[TMP22]], 0
|
|
; DAZ-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP22]], 1
|
|
; DAZ-NEXT: [[TMP25:%.*]] = fmul contract float [[TMP23]], [[TMP21]]
|
|
; DAZ-NEXT: [[TMP26:%.*]] = sub i32 [[TMP24]], [[TMP20]]
|
|
; DAZ-NEXT: [[TMP27:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP25]], i32 [[TMP26]])
|
|
; DAZ-NEXT: [[TMP28:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0
|
|
; DAZ-NEXT: [[TMP29:%.*]] = insertelement <4 x float> [[TMP28]], float [[TMP7]], i64 1
|
|
; DAZ-NEXT: [[TMP30:%.*]] = insertelement <4 x float> [[TMP29]], float [[TMP17]], i64 2
|
|
; DAZ-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP30]], float [[TMP27]], i64 3
|
|
; DAZ-NEXT: ret <4 x float> [[PARTIAL_RSQ]]
|
|
;
|
|
%denom = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg)
|
|
%partial.rsq = fdiv contract <4 x float> <float 1.0, float -1.0, float 4.0, float undef>, %denom, !fpmath !2
|
|
ret <4 x float> %partial.rsq
|
|
}
|
|
|
|
define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp(<4 x float> %arg) {
|
|
; IEEE-GOODFREXP-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp
|
|
; IEEE-GOODFREXP-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
|
|
; IEEE-GOODFREXP-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath !2
|
|
; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
|
|
; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
|
|
; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[ARG]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[ARG]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[ARG]], i64 2
|
|
; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[ARG]], i64 3
|
|
; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = fcmp arcp contract olt float [[TMP5]], 0x3810000000000000
|
|
; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = select arcp contract i1 [[TMP9]], float 0x4170000000000000, float 1.000000e+00
|
|
; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fmul arcp contract float [[TMP5]], [[TMP10]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call arcp contract float @llvm.amdgcn.rsq.f32(float [[TMP11]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = select arcp contract i1 [[TMP9]], float 4.096000e+03, float 1.000000e+00
|
|
; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = fmul arcp contract float [[TMP12]], [[TMP13]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = fcmp arcp contract olt float [[TMP6]], 0x3810000000000000
|
|
; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = select arcp contract i1 [[TMP15]], float 0x4170000000000000, float 1.000000e+00
|
|
; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = fmul arcp contract float [[TMP6]], [[TMP16]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = call arcp contract float @llvm.amdgcn.rsq.f32(float [[TMP17]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = select arcp contract i1 [[TMP15]], float -4.096000e+03, float -1.000000e+00
|
|
; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = fmul arcp contract float [[TMP18]], [[TMP19]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = extractvalue { float, i32 } [[TMP21]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = extractvalue { float, i32 } [[TMP21]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = sub i32 0, [[TMP23]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP22]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP25]], i32 [[TMP24]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = fmul arcp contract float 4.000000e+00, [[TMP26]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP28]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = sub i32 0, [[TMP30]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP29]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP31]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = fmul arcp contract float undef, [[TMP33]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = insertelement <4 x float> poison, float [[TMP14]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = insertelement <4 x float> [[TMP35]], float [[TMP20]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = insertelement <4 x float> [[TMP36]], float [[TMP27]], i64 2
|
|
; IEEE-GOODFREXP-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP37]], float [[TMP34]], i64 3
|
|
; IEEE-GOODFREXP-NEXT: ret <4 x float> [[PARTIAL_RSQ]]
|
|
;
|
|
; IEEE-BADFREXP-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp
|
|
; IEEE-BADFREXP-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
|
|
; IEEE-BADFREXP-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath !2
|
|
; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
|
|
; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
|
|
; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[ARG]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[ARG]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[ARG]], i64 2
|
|
; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[ARG]], i64 3
|
|
; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = fcmp arcp contract olt float [[TMP5]], 0x3810000000000000
|
|
; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = select arcp contract i1 [[TMP9]], float 0x4170000000000000, float 1.000000e+00
|
|
; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fmul arcp contract float [[TMP5]], [[TMP10]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call arcp contract float @llvm.amdgcn.rsq.f32(float [[TMP11]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = select arcp contract i1 [[TMP9]], float 4.096000e+03, float 1.000000e+00
|
|
; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = fmul arcp contract float [[TMP12]], [[TMP13]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = fcmp arcp contract olt float [[TMP6]], 0x3810000000000000
|
|
; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = select arcp contract i1 [[TMP15]], float 0x4170000000000000, float 1.000000e+00
|
|
; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = fmul arcp contract float [[TMP6]], [[TMP16]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call arcp contract float @llvm.amdgcn.rsq.f32(float [[TMP17]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = select arcp contract i1 [[TMP15]], float -4.096000e+03, float -1.000000e+00
|
|
; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = fmul arcp contract float [[TMP18]], [[TMP19]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = extractvalue { float, i32 } [[TMP21]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP3]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = sub i32 0, [[TMP23]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP22]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP25]], i32 [[TMP24]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = fmul arcp contract float 4.000000e+00, [[TMP26]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP4]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = sub i32 0, [[TMP30]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP29]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP31]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = fmul arcp contract float undef, [[TMP33]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = insertelement <4 x float> poison, float [[TMP14]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = insertelement <4 x float> [[TMP35]], float [[TMP20]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = insertelement <4 x float> [[TMP36]], float [[TMP27]], i64 2
|
|
; IEEE-BADFREXP-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP37]], float [[TMP34]], i64 3
|
|
; IEEE-BADFREXP-NEXT: ret <4 x float> [[PARTIAL_RSQ]]
|
|
;
|
|
; DAZ-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp
|
|
; DAZ-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
|
|
; DAZ-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath !2
|
|
; DAZ-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
|
|
; DAZ-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
|
|
; DAZ-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
|
|
; DAZ-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
|
|
; DAZ-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[ARG]], i64 0
|
|
; DAZ-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[ARG]], i64 1
|
|
; DAZ-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[ARG]], i64 2
|
|
; DAZ-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[ARG]], i64 3
|
|
; DAZ-NEXT: [[TMP9:%.*]] = call arcp contract float @llvm.amdgcn.rsq.f32(float [[TMP5]])
|
|
; DAZ-NEXT: [[TMP10:%.*]] = call arcp contract float @llvm.amdgcn.rsq.f32(float [[TMP6]])
|
|
; DAZ-NEXT: [[TMP11:%.*]] = fneg arcp contract float [[TMP10]]
|
|
; DAZ-NEXT: [[TMP12:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP3]])
|
|
; DAZ-NEXT: [[TMP13:%.*]] = fmul arcp contract float 4.000000e+00, [[TMP12]]
|
|
; DAZ-NEXT: [[TMP14:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP4]])
|
|
; DAZ-NEXT: [[TMP15:%.*]] = fmul arcp contract float undef, [[TMP14]]
|
|
; DAZ-NEXT: [[TMP16:%.*]] = insertelement <4 x float> poison, float [[TMP9]], i64 0
|
|
; DAZ-NEXT: [[TMP17:%.*]] = insertelement <4 x float> [[TMP16]], float [[TMP11]], i64 1
|
|
; DAZ-NEXT: [[TMP18:%.*]] = insertelement <4 x float> [[TMP17]], float [[TMP13]], i64 2
|
|
; DAZ-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP18]], float [[TMP15]], i64 3
|
|
; DAZ-NEXT: ret <4 x float> [[PARTIAL_RSQ]]
|
|
;
|
|
%denom = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg), !fpmath !2
|
|
%partial.rsq = fdiv contract arcp <4 x float> <float 1.0, float -1.0, float 4.0, float undef>, %denom, !fpmath !2
|
|
ret <4 x float> %partial.rsq
|
|
}
|
|
|
|
define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp_correct(<4 x float> %arg) {
|
|
; CHECK-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp_correct
|
|
; CHECK-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath !2
|
|
; CHECK-NEXT: [[PARTIAL_RSQ:%.*]] = fdiv arcp contract <4 x float> <float 1.000000e+00, float -1.000000e+00, float 4.000000e+00, float undef>, [[DENOM]]
|
|
; CHECK-NEXT: ret <4 x float> [[PARTIAL_RSQ]]
|
|
;
|
|
%denom = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg), !fpmath !2
|
|
%partial.rsq = fdiv contract arcp <4 x float> <float 1.0, float -1.0, float 4.0, float undef>, %denom
|
|
ret <4 x float> %partial.rsq
|
|
}
|
|
|
|
define <4 x float> @rcp_f32_vector_mixed_constant_numerator_arcp(<4 x float> %arg) {
|
|
; IEEE-GOODFREXP-LABEL: define <4 x float> @rcp_f32_vector_mixed_constant_numerator_arcp
|
|
; IEEE-GOODFREXP-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
|
|
; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[ARG]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[ARG]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[ARG]], i64 2
|
|
; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[ARG]], i64 3
|
|
; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP1]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = sub i32 0, [[TMP7]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP6]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP8]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fneg arcp float [[TMP2]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP11]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP13]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP18]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = sub i32 0, [[TMP20]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP19]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP21]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = fmul arcp float 4.000000e+00, [[TMP23]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = extractvalue { float, i32 } [[TMP25]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = extractvalue { float, i32 } [[TMP25]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = sub i32 0, [[TMP27]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP26]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP29]], i32 [[TMP28]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = fmul arcp float undef, [[TMP30]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = insertelement <4 x float> poison, float [[TMP10]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = insertelement <4 x float> [[TMP32]], float [[TMP17]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = insertelement <4 x float> [[TMP33]], float [[TMP24]], i64 2
|
|
; IEEE-GOODFREXP-NEXT: [[PARTIAL_RCP:%.*]] = insertelement <4 x float> [[TMP34]], float [[TMP31]], i64 3
|
|
; IEEE-GOODFREXP-NEXT: ret <4 x float> [[PARTIAL_RCP]]
|
|
;
|
|
; IEEE-BADFREXP-LABEL: define <4 x float> @rcp_f32_vector_mixed_constant_numerator_arcp
|
|
; IEEE-BADFREXP-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
|
|
; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[ARG]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[ARG]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[ARG]], i64 2
|
|
; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[ARG]], i64 3
|
|
; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP1]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP1]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = sub i32 0, [[TMP7]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP6]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP8]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fneg arcp float [[TMP2]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP11]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP11]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP13]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP3]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = sub i32 0, [[TMP20]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP19]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP21]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = fmul arcp float 4.000000e+00, [[TMP23]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = extractvalue { float, i32 } [[TMP25]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP4]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = sub i32 0, [[TMP27]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP26]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP29]], i32 [[TMP28]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = fmul arcp float undef, [[TMP30]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = insertelement <4 x float> poison, float [[TMP10]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = insertelement <4 x float> [[TMP32]], float [[TMP17]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = insertelement <4 x float> [[TMP33]], float [[TMP24]], i64 2
|
|
; IEEE-BADFREXP-NEXT: [[PARTIAL_RCP:%.*]] = insertelement <4 x float> [[TMP34]], float [[TMP31]], i64 3
|
|
; IEEE-BADFREXP-NEXT: ret <4 x float> [[PARTIAL_RCP]]
|
|
;
|
|
; DAZ-LABEL: define <4 x float> @rcp_f32_vector_mixed_constant_numerator_arcp
|
|
; DAZ-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
|
|
; DAZ-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[ARG]], i64 0
|
|
; DAZ-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[ARG]], i64 1
|
|
; DAZ-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[ARG]], i64 2
|
|
; DAZ-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[ARG]], i64 3
|
|
; DAZ-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP1]])
|
|
; DAZ-NEXT: [[TMP6:%.*]] = fneg arcp float [[TMP2]]
|
|
; DAZ-NEXT: [[TMP7:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP6]])
|
|
; DAZ-NEXT: [[TMP8:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP3]])
|
|
; DAZ-NEXT: [[TMP9:%.*]] = fmul arcp float 4.000000e+00, [[TMP8]]
|
|
; DAZ-NEXT: [[TMP10:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP4]])
|
|
; DAZ-NEXT: [[TMP11:%.*]] = fmul arcp float undef, [[TMP10]]
|
|
; DAZ-NEXT: [[TMP12:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0
|
|
; DAZ-NEXT: [[TMP13:%.*]] = insertelement <4 x float> [[TMP12]], float [[TMP7]], i64 1
|
|
; DAZ-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP9]], i64 2
|
|
; DAZ-NEXT: [[PARTIAL_RCP:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP11]], i64 3
|
|
; DAZ-NEXT: ret <4 x float> [[PARTIAL_RCP]]
|
|
;
|
|
%partial.rcp = fdiv arcp <4 x float> <float 1.0, float -1.0, float 4.0, float undef>, %arg, !fpmath !2
|
|
ret <4 x float> %partial.rcp
|
|
}
|
|
|
|
define <4 x float> @rcp_f32_vector_mixed_constant_numerator_arcp_correct(<4 x float> %arg) {
|
|
; CHECK-LABEL: define <4 x float> @rcp_f32_vector_mixed_constant_numerator_arcp_correct
|
|
; CHECK-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: [[PARTIAL_RCP:%.*]] = fdiv arcp <4 x float> <float 1.000000e+00, float -1.000000e+00, float 4.000000e+00, float undef>, [[ARG]]
|
|
; CHECK-NEXT: ret <4 x float> [[PARTIAL_RCP]]
|
|
;
|
|
%partial.rcp = fdiv arcp <4 x float> <float 1.0, float -1.0, float 4.0, float undef>, %arg
|
|
ret <4 x float> %partial.rcp
|
|
}
|
|
|
|
; Make sure we don't crash if a vector square root has a constant vecctor input
|
|
define <4 x float> @rsq_f32_vector_const_denom(ptr addrspace(1) %out, <2 x float> %x) {
|
|
; IEEE-GOODFREXP-LABEL: define <4 x float> @rsq_f32_vector_const_denom
|
|
; IEEE-GOODFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] {
|
|
; IEEE-GOODFREXP-NEXT: [[SQRT:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> <float 4.000000e+00, float 2.000000e+00, float 8.000000e+00, float undef>), !fpmath !2
|
|
; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[SQRT]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[SQRT]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[SQRT]], i64 2
|
|
; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[SQRT]], i64 3
|
|
; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float 4.000000e+00)
|
|
; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float 2.000000e+00)
|
|
; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = fneg contract float [[TMP6]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = extractvalue { float, i32 } [[TMP8]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = extractvalue { float, i32 } [[TMP8]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP9]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float undef)
|
|
; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = fmul contract float [[TMP13]], [[TMP11]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP10]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP15]], i32 [[TMP16]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP18]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP19]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 2.000000e+00)
|
|
; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = extractvalue { float, i32 } [[TMP22]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP22]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = fmul contract float [[TMP23]], [[TMP21]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = sub i32 [[TMP24]], [[TMP20]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP25]], i32 [[TMP26]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = insertelement <4 x float> [[TMP28]], float [[TMP7]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = insertelement <4 x float> [[TMP29]], float [[TMP17]], i64 2
|
|
; IEEE-GOODFREXP-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP30]], float [[TMP27]], i64 3
|
|
; IEEE-GOODFREXP-NEXT: ret <4 x float> [[PARTIAL_RSQ]]
|
|
;
|
|
; IEEE-BADFREXP-LABEL: define <4 x float> @rsq_f32_vector_const_denom
|
|
; IEEE-BADFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] {
|
|
; IEEE-BADFREXP-NEXT: [[SQRT:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> <float 4.000000e+00, float 2.000000e+00, float 8.000000e+00, float undef>), !fpmath !2
|
|
; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[SQRT]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[SQRT]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[SQRT]], i64 2
|
|
; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[SQRT]], i64 3
|
|
; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float 4.000000e+00)
|
|
; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float 2.000000e+00)
|
|
; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = fneg contract float [[TMP6]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = extractvalue { float, i32 } [[TMP8]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP3]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP9]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float undef)
|
|
; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float undef)
|
|
; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = fmul contract float [[TMP13]], [[TMP11]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP10]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP15]], i32 [[TMP16]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP4]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP19]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 2.000000e+00)
|
|
; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = extractvalue { float, i32 } [[TMP22]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float 2.000000e+00)
|
|
; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = fmul contract float [[TMP23]], [[TMP21]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = sub i32 [[TMP24]], [[TMP20]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP25]], i32 [[TMP26]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = insertelement <4 x float> [[TMP28]], float [[TMP7]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = insertelement <4 x float> [[TMP29]], float [[TMP17]], i64 2
|
|
; IEEE-BADFREXP-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP30]], float [[TMP27]], i64 3
|
|
; IEEE-BADFREXP-NEXT: ret <4 x float> [[PARTIAL_RSQ]]
|
|
;
|
|
; DAZ-LABEL: define <4 x float> @rsq_f32_vector_const_denom
|
|
; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] {
|
|
; DAZ-NEXT: [[SQRT:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> <float 4.000000e+00, float 2.000000e+00, float 8.000000e+00, float undef>), !fpmath !2
|
|
; DAZ-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[SQRT]], i64 0
|
|
; DAZ-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[SQRT]], i64 1
|
|
; DAZ-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[SQRT]], i64 2
|
|
; DAZ-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[SQRT]], i64 3
|
|
; DAZ-NEXT: [[TMP5:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float 4.000000e+00)
|
|
; DAZ-NEXT: [[TMP6:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float 2.000000e+00)
|
|
; DAZ-NEXT: [[TMP7:%.*]] = fneg contract float [[TMP6]]
|
|
; DAZ-NEXT: [[TMP8:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
|
|
; DAZ-NEXT: [[TMP9:%.*]] = extractvalue { float, i32 } [[TMP8]], 0
|
|
; DAZ-NEXT: [[TMP10:%.*]] = extractvalue { float, i32 } [[TMP8]], 1
|
|
; DAZ-NEXT: [[TMP11:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP9]])
|
|
; DAZ-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float undef)
|
|
; DAZ-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0
|
|
; DAZ-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1
|
|
; DAZ-NEXT: [[TMP15:%.*]] = fmul contract float [[TMP13]], [[TMP11]]
|
|
; DAZ-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP10]]
|
|
; DAZ-NEXT: [[TMP17:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP15]], i32 [[TMP16]])
|
|
; DAZ-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
|
|
; DAZ-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0
|
|
; DAZ-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP18]], 1
|
|
; DAZ-NEXT: [[TMP21:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP19]])
|
|
; DAZ-NEXT: [[TMP22:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 2.000000e+00)
|
|
; DAZ-NEXT: [[TMP23:%.*]] = extractvalue { float, i32 } [[TMP22]], 0
|
|
; DAZ-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP22]], 1
|
|
; DAZ-NEXT: [[TMP25:%.*]] = fmul contract float [[TMP23]], [[TMP21]]
|
|
; DAZ-NEXT: [[TMP26:%.*]] = sub i32 [[TMP24]], [[TMP20]]
|
|
; DAZ-NEXT: [[TMP27:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP25]], i32 [[TMP26]])
|
|
; DAZ-NEXT: [[TMP28:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0
|
|
; DAZ-NEXT: [[TMP29:%.*]] = insertelement <4 x float> [[TMP28]], float [[TMP7]], i64 1
|
|
; DAZ-NEXT: [[TMP30:%.*]] = insertelement <4 x float> [[TMP29]], float [[TMP17]], i64 2
|
|
; DAZ-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP30]], float [[TMP27]], i64 3
|
|
; DAZ-NEXT: ret <4 x float> [[PARTIAL_RSQ]]
|
|
;
|
|
%sqrt = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> <float 4.0, float 2.0, float 8.0, float undef>), !fpmath !2
|
|
%partial.rsq = fdiv contract <4 x float> <float 1.0, float -1.0, float undef, float 2.0>, %sqrt, !fpmath !2
|
|
ret <4 x float> %partial.rsq
|
|
}
|
|
|
|
define <4 x float> @fdiv_constant_f32_vector(ptr addrspace(1) %out, <2 x float> %x) {
|
|
; IEEE-GOODFREXP-LABEL: define <4 x float> @fdiv_constant_f32_vector
|
|
; IEEE-GOODFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] {
|
|
; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 5.000000e-01)
|
|
; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float -2.000000e+00)
|
|
; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = extractvalue { float, i32 } [[TMP7]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = sub i32 0, [[TMP9]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP8]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP11]], i32 [[TMP10]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 3.200000e+01)
|
|
; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP13]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP13]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP14]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float undef)
|
|
; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = extractvalue { float, i32 } [[TMP17]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP17]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = fmul float [[TMP18]], [[TMP16]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = sub i32 [[TMP19]], [[TMP15]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP20]], i32 [[TMP21]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 1.000000e+01)
|
|
; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP23]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP24]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 2.000000e+00)
|
|
; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = extractvalue { float, i32 } [[TMP27]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP27]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = fmul float [[TMP28]], [[TMP26]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = sub i32 [[TMP29]], [[TMP25]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP30]], i32 [[TMP31]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i64 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = insertelement <4 x float> [[TMP33]], float [[TMP12]], i64 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = insertelement <4 x float> [[TMP34]], float [[TMP22]], i64 2
|
|
; IEEE-GOODFREXP-NEXT: [[CONST_PARTIAL_RCP:%.*]] = insertelement <4 x float> [[TMP35]], float [[TMP32]], i64 3
|
|
; IEEE-GOODFREXP-NEXT: ret <4 x float> [[CONST_PARTIAL_RCP]]
|
|
;
|
|
; IEEE-BADFREXP-LABEL: define <4 x float> @fdiv_constant_f32_vector
|
|
; IEEE-BADFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] {
|
|
; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 5.000000e-01)
|
|
; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float 5.000000e-01)
|
|
; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float -2.000000e+00)
|
|
; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float -2.000000e+00)
|
|
; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = sub i32 0, [[TMP9]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP8]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP11]], i32 [[TMP10]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 3.200000e+01)
|
|
; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP13]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float 3.200000e+01)
|
|
; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP14]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float undef)
|
|
; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = extractvalue { float, i32 } [[TMP17]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float undef)
|
|
; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = fmul float [[TMP18]], [[TMP16]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = sub i32 [[TMP19]], [[TMP15]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP20]], i32 [[TMP21]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 1.000000e+01)
|
|
; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float 1.000000e+01)
|
|
; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP24]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 2.000000e+00)
|
|
; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = extractvalue { float, i32 } [[TMP27]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float 2.000000e+00)
|
|
; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = fmul float [[TMP28]], [[TMP26]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = sub i32 [[TMP29]], [[TMP25]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP30]], i32 [[TMP31]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i64 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = insertelement <4 x float> [[TMP33]], float [[TMP12]], i64 1
|
|
; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = insertelement <4 x float> [[TMP34]], float [[TMP22]], i64 2
|
|
; IEEE-BADFREXP-NEXT: [[CONST_PARTIAL_RCP:%.*]] = insertelement <4 x float> [[TMP35]], float [[TMP32]], i64 3
|
|
; IEEE-BADFREXP-NEXT: ret <4 x float> [[CONST_PARTIAL_RCP]]
|
|
;
|
|
; DAZ-LABEL: define <4 x float> @fdiv_constant_f32_vector
|
|
; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] {
|
|
; DAZ-NEXT: [[TMP1:%.*]] = call float @llvm.amdgcn.rcp.f32(float 5.000000e-01)
|
|
; DAZ-NEXT: [[TMP2:%.*]] = call float @llvm.amdgcn.rcp.f32(float -2.000000e+00)
|
|
; DAZ-NEXT: [[TMP3:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 3.200000e+01)
|
|
; DAZ-NEXT: [[TMP4:%.*]] = extractvalue { float, i32 } [[TMP3]], 0
|
|
; DAZ-NEXT: [[TMP5:%.*]] = extractvalue { float, i32 } [[TMP3]], 1
|
|
; DAZ-NEXT: [[TMP6:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP4]])
|
|
; DAZ-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float undef)
|
|
; DAZ-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0
|
|
; DAZ-NEXT: [[TMP9:%.*]] = extractvalue { float, i32 } [[TMP7]], 1
|
|
; DAZ-NEXT: [[TMP10:%.*]] = fmul float [[TMP8]], [[TMP6]]
|
|
; DAZ-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[TMP5]]
|
|
; DAZ-NEXT: [[TMP12:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP11]])
|
|
; DAZ-NEXT: [[TMP13:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 1.000000e+01)
|
|
; DAZ-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP13]], 0
|
|
; DAZ-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP13]], 1
|
|
; DAZ-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP14]])
|
|
; DAZ-NEXT: [[TMP17:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 2.000000e+00)
|
|
; DAZ-NEXT: [[TMP18:%.*]] = extractvalue { float, i32 } [[TMP17]], 0
|
|
; DAZ-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP17]], 1
|
|
; DAZ-NEXT: [[TMP20:%.*]] = fmul float [[TMP18]], [[TMP16]]
|
|
; DAZ-NEXT: [[TMP21:%.*]] = sub i32 [[TMP19]], [[TMP15]]
|
|
; DAZ-NEXT: [[TMP22:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP20]], i32 [[TMP21]])
|
|
; DAZ-NEXT: [[TMP23:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i64 0
|
|
; DAZ-NEXT: [[TMP24:%.*]] = insertelement <4 x float> [[TMP23]], float [[TMP2]], i64 1
|
|
; DAZ-NEXT: [[TMP25:%.*]] = insertelement <4 x float> [[TMP24]], float [[TMP12]], i64 2
|
|
; DAZ-NEXT: [[CONST_PARTIAL_RCP:%.*]] = insertelement <4 x float> [[TMP25]], float [[TMP22]], i64 3
|
|
; DAZ-NEXT: ret <4 x float> [[CONST_PARTIAL_RCP]]
|
|
;
|
|
%const.partial.rcp = fdiv <4 x float> <float 1.0, float -1.0, float undef, float 2.0>, <float 0.5, float 2.0, float 32.0, float 10.0>, !fpmath !2
|
|
ret <4 x float> %const.partial.rcp
|
|
}
|
|
|
|
define amdgpu_kernel void @fdiv_fpmath_f32_nosub_lhs(ptr addrspace(1) %out, float nofpclass(sub) %a, float %b) {
|
|
; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_nosub_lhs
|
|
; IEEE-GOODFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(sub) [[A:%.*]], float [[B:%.*]]) #[[ATTR1]] {
|
|
; IEEE-GOODFREXP-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]]
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath !1
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = fmul float [[TMP6]], [[TMP4]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]]
|
|
; IEEE-GOODFREXP-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]])
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP10]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP10]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP11]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP14]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = extractvalue { float, i32 } [[TMP14]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = fmul float [[TMP15]], [[TMP13]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = sub i32 [[TMP16]], [[TMP12]]
|
|
; IEEE-GOODFREXP-NEXT: [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP18]])
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP19]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP20]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP23]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = fmul float [[TMP24]], [[TMP22]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]]
|
|
; IEEE-GOODFREXP-NEXT: [[MD_3ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]])
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath !0
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath !0
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]]
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP28]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = sub i32 0, [[TMP30]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP29]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP31]])
|
|
; IEEE-GOODFREXP-NEXT: [[ARCP_MD_25ULP:%.*]] = fmul arcp float [[A]], [[TMP33]]
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = extractvalue { float, i32 } [[TMP34]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = extractvalue { float, i32 } [[TMP34]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = sub i32 0, [[TMP36]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP35]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP38]], i32 [[TMP37]])
|
|
; IEEE-GOODFREXP-NEXT: [[ARCP_MD_1ULP:%.*]] = fmul arcp float [[A]], [[TMP39]]
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: ret void
|
|
;
|
|
; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_nosub_lhs
|
|
; IEEE-BADFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(sub) [[A:%.*]], float [[B:%.*]]) #[[ATTR1]] {
|
|
; IEEE-BADFREXP-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]]
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath !1
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = fmul float [[TMP6]], [[TMP4]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]]
|
|
; IEEE-BADFREXP-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]])
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP10]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP11]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP14]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = fmul float [[TMP15]], [[TMP13]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = sub i32 [[TMP16]], [[TMP12]]
|
|
; IEEE-BADFREXP-NEXT: [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP18]])
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP20]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = fmul float [[TMP24]], [[TMP22]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]]
|
|
; IEEE-BADFREXP-NEXT: [[MD_3ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]])
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath !0
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath !0
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]]
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = sub i32 0, [[TMP30]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP29]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP31]])
|
|
; IEEE-BADFREXP-NEXT: [[ARCP_MD_25ULP:%.*]] = fmul arcp float [[A]], [[TMP33]]
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[ARCP_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = extractvalue { float, i32 } [[TMP34]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = sub i32 0, [[TMP36]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP35]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP38]], i32 [[TMP37]])
|
|
; IEEE-BADFREXP-NEXT: [[ARCP_MD_1ULP:%.*]] = fmul arcp float [[A]], [[TMP39]]
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[ARCP_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: ret void
|
|
;
|
|
; DAZ-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_nosub_lhs
|
|
; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(sub) [[A:%.*]], float [[B:%.*]]) #[[ATTR1]] {
|
|
; DAZ-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]]
|
|
; DAZ-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath !1
|
|
; DAZ-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; DAZ-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; DAZ-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
|
|
; DAZ-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; DAZ-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
|
|
; DAZ-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
|
|
; DAZ-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1
|
|
; DAZ-NEXT: [[TMP8:%.*]] = fmul float [[TMP6]], [[TMP4]]
|
|
; DAZ-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]]
|
|
; DAZ-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]])
|
|
; DAZ-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[MD_25ULP:%.*]] = call float @llvm.amdgcn.fdiv.fast(float [[A]], float [[B]])
|
|
; DAZ-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[MD_3ULP:%.*]] = call float @llvm.amdgcn.fdiv.fast(float [[A]], float [[B]])
|
|
; DAZ-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath !0
|
|
; DAZ-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath !0
|
|
; DAZ-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]]
|
|
; DAZ-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[TMP10:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[B]])
|
|
; DAZ-NEXT: [[ARCP_MD_25ULP:%.*]] = fmul arcp float [[A]], [[TMP10]]
|
|
; DAZ-NEXT: store volatile float [[ARCP_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[TMP11:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[B]])
|
|
; DAZ-NEXT: [[ARCP_MD_1ULP:%.*]] = fmul arcp float [[A]], [[TMP11]]
|
|
; DAZ-NEXT: store volatile float [[ARCP_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: ret void
|
|
;
|
|
%no.md = fdiv float %a, %b
|
|
store volatile float %no.md, ptr addrspace(1) %out, align 4
|
|
%md.half.ulp = fdiv float %a, %b, !fpmath !1
|
|
store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4
|
|
%md.1ulp = fdiv float %a, %b, !fpmath !2
|
|
store volatile float %md.1ulp, ptr addrspace(1) %out, align 4
|
|
%md.25ulp = fdiv float %a, %b, !fpmath !0
|
|
store volatile float %md.25ulp, ptr addrspace(1) %out, align 4
|
|
%md.3ulp = fdiv float %a, %b, !fpmath !3
|
|
store volatile float %md.3ulp, ptr addrspace(1) %out, align 4
|
|
%fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0
|
|
store volatile float %fast.md.25ulp, ptr addrspace(1) %out, align 4
|
|
%afn.md.25ulp = fdiv afn float %a, %b, !fpmath !0
|
|
store volatile float %afn.md.25ulp, ptr addrspace(1) %out, align 4
|
|
%no.md.arcp = fdiv arcp float %a, %b
|
|
store volatile float %no.md.arcp, ptr addrspace(1) %out, align 4
|
|
%arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0
|
|
store volatile float %arcp.md.25ulp, ptr addrspace(1) %out, align 4
|
|
%arcp.md.1ulp = fdiv arcp float %a, %b, !fpmath !2
|
|
store volatile float %arcp.md.1ulp, ptr addrspace(1) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @fdiv_fpmath_f32_nosub_rhs(ptr addrspace(1) %out, float %a, float nofpclass(sub) %b) {
|
|
; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_nosub_rhs
|
|
; IEEE-GOODFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float nofpclass(sub) [[B:%.*]]) #[[ATTR1]] {
|
|
; IEEE-GOODFREXP-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]]
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath !1
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = fmul float [[TMP6]], [[TMP4]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]]
|
|
; IEEE-GOODFREXP-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]])
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP10]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP10]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP11]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP14]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = extractvalue { float, i32 } [[TMP14]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = fmul float [[TMP15]], [[TMP13]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = sub i32 [[TMP16]], [[TMP12]]
|
|
; IEEE-GOODFREXP-NEXT: [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP18]])
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP19]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP20]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP23]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = fmul float [[TMP24]], [[TMP22]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]]
|
|
; IEEE-GOODFREXP-NEXT: [[MD_3ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]])
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath !0
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath !0
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]]
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP28]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = sub i32 0, [[TMP30]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP29]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP31]])
|
|
; IEEE-GOODFREXP-NEXT: [[ARCP_MD_25ULP:%.*]] = fmul arcp float [[A]], [[TMP33]]
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = extractvalue { float, i32 } [[TMP34]], 0
|
|
; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = extractvalue { float, i32 } [[TMP34]], 1
|
|
; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = sub i32 0, [[TMP36]]
|
|
; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP35]])
|
|
; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP38]], i32 [[TMP37]])
|
|
; IEEE-GOODFREXP-NEXT: [[ARCP_MD_1ULP:%.*]] = fmul arcp float [[A]], [[TMP39]]
|
|
; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-GOODFREXP-NEXT: ret void
|
|
;
|
|
; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_nosub_rhs
|
|
; IEEE-BADFREXP-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float nofpclass(sub) [[B:%.*]]) #[[ATTR1]] {
|
|
; IEEE-BADFREXP-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]]
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath !1
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = fmul float [[TMP6]], [[TMP4]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]]
|
|
; IEEE-BADFREXP-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]])
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP10]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP11]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP14]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = fmul float [[TMP15]], [[TMP13]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = sub i32 [[TMP16]], [[TMP12]]
|
|
; IEEE-BADFREXP-NEXT: [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP18]])
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP20]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = fmul float [[TMP24]], [[TMP22]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]]
|
|
; IEEE-BADFREXP-NEXT: [[MD_3ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]])
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath !0
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath !0
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]]
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = sub i32 0, [[TMP30]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP29]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP31]])
|
|
; IEEE-BADFREXP-NEXT: [[ARCP_MD_25ULP:%.*]] = fmul arcp float [[A]], [[TMP33]]
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[ARCP_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = extractvalue { float, i32 } [[TMP34]], 0
|
|
; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = sub i32 0, [[TMP36]]
|
|
; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP35]])
|
|
; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP38]], i32 [[TMP37]])
|
|
; IEEE-BADFREXP-NEXT: [[ARCP_MD_1ULP:%.*]] = fmul arcp float [[A]], [[TMP39]]
|
|
; IEEE-BADFREXP-NEXT: store volatile float [[ARCP_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; IEEE-BADFREXP-NEXT: ret void
|
|
;
|
|
; DAZ-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_nosub_rhs
|
|
; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float nofpclass(sub) [[B:%.*]]) #[[ATTR1]] {
|
|
; DAZ-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]]
|
|
; DAZ-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath !1
|
|
; DAZ-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
|
|
; DAZ-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
|
|
; DAZ-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
|
|
; DAZ-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
|
|
; DAZ-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
|
|
; DAZ-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
|
|
; DAZ-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1
|
|
; DAZ-NEXT: [[TMP8:%.*]] = fmul float [[TMP6]], [[TMP4]]
|
|
; DAZ-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]]
|
|
; DAZ-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]])
|
|
; DAZ-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[MD_25ULP:%.*]] = call float @llvm.amdgcn.fdiv.fast(float [[A]], float [[B]])
|
|
; DAZ-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[MD_3ULP:%.*]] = call float @llvm.amdgcn.fdiv.fast(float [[A]], float [[B]])
|
|
; DAZ-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath !0
|
|
; DAZ-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath !0
|
|
; DAZ-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]]
|
|
; DAZ-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[TMP10:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[B]])
|
|
; DAZ-NEXT: [[ARCP_MD_25ULP:%.*]] = fmul arcp float [[A]], [[TMP10]]
|
|
; DAZ-NEXT: store volatile float [[ARCP_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: [[TMP11:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[B]])
|
|
; DAZ-NEXT: [[ARCP_MD_1ULP:%.*]] = fmul arcp float [[A]], [[TMP11]]
|
|
; DAZ-NEXT: store volatile float [[ARCP_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
|
|
; DAZ-NEXT: ret void
|
|
;
|
|
%no.md = fdiv float %a, %b
|
|
store volatile float %no.md, ptr addrspace(1) %out, align 4
|
|
%md.half.ulp = fdiv float %a, %b, !fpmath !1
|
|
store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4
|
|
%md.1ulp = fdiv float %a, %b, !fpmath !2
|
|
store volatile float %md.1ulp, ptr addrspace(1) %out, align 4
|
|
%md.25ulp = fdiv float %a, %b, !fpmath !0
|
|
store volatile float %md.25ulp, ptr addrspace(1) %out, align 4
|
|
%md.3ulp = fdiv float %a, %b, !fpmath !3
|
|
store volatile float %md.3ulp, ptr addrspace(1) %out, align 4
|
|
%fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0
|
|
store volatile float %fast.md.25ulp, ptr addrspace(1) %out, align 4
|
|
%afn.md.25ulp = fdiv afn float %a, %b, !fpmath !0
|
|
store volatile float %afn.md.25ulp, ptr addrspace(1) %out, align 4
|
|
%no.md.arcp = fdiv arcp float %a, %b
|
|
store volatile float %no.md.arcp, ptr addrspace(1) %out, align 4
|
|
%arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0
|
|
store volatile float %arcp.md.25ulp, ptr addrspace(1) %out, align 4
|
|
%arcp.md.1ulp = fdiv arcp float %a, %b, !fpmath !2
|
|
store volatile float %arcp.md.1ulp, ptr addrspace(1) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
declare float @llvm.sqrt.f32(float)
|
|
declare float @llvm.fabs.f32(float)
|
|
declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
|
|
declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
|
|
declare void @llvm.assume(i1 noundef)
|
|
|
|
attributes #0 = { optnone noinline }
|
|
|
|
!0 = !{float 2.500000e+00}
|
|
!1 = !{float 5.000000e-01}
|
|
!2 = !{float 1.000000e+00}
|
|
!3 = !{float 3.000000e+00}
|