Files
clang-p2996/llvm/test/CodeGen/AMDGPU/strict_fma.f32.ll
Kevin P. Neal 76c22b18ea [FPEnv][AMDGPU] Correct strictfp tests.
Correct AMDGPU strictfp tests to follow the rules documented in the LangRef:
https://llvm.org/docs/LangRef.html#constrained-floating-point-intrinsics

Mostly these tests just needed the strictfp attribute on function
definitions.  I've also removed the strictfp attribute from uses
of the constrained intrinsics because it comes by default since
D154991, but I only did this in tests I was changing anyway.

I also removed attributes added to declare lines of intrinsics. The
attributes of intrinsics cannot be changed in a test so I eliminated
attempts to do so.

Test changes verified with D146845.
2023-07-25 13:24:46 -04:00

162 lines
7.3 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s
define float @v_constained_fma_f32_fpexcept_strict(float %x, float %y, float %z) #0 {
; GCN-LABEL: v_constained_fma_f32_fpexcept_strict:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_fma_f32 v0, v0, v1, v2
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fma_f32_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_fma_f32 v0, v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
%val = call float @llvm.experimental.constrained.fma.f32(float %x, float %y, float %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret float %val
}
define <2 x float> @v_constained_fma_v2f32_fpexcept_strict(<2 x float> %x, <2 x float> %y, <2 x float> %z) #0 {
; GCN-LABEL: v_constained_fma_v2f32_fpexcept_strict:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_fma_f32 v0, v0, v2, v4
; GCN-NEXT: v_fma_f32 v1, v1, v3, v5
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fma_v2f32_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_fma_f32 v0, v0, v2, v4
; GFX10-NEXT: v_fma_f32 v1, v1, v3, v5
; GFX10-NEXT: s_setpc_b64 s[30:31]
%val = call <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret <2 x float> %val
}
define <3 x float> @v_constained_fma_v3f32_fpexcept_strict(<3 x float> %x, <3 x float> %y, <3 x float> %z) #0 {
; GCN-LABEL: v_constained_fma_v3f32_fpexcept_strict:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_fma_f32 v0, v0, v3, v6
; GCN-NEXT: v_fma_f32 v1, v1, v4, v7
; GCN-NEXT: v_fma_f32 v2, v2, v5, v8
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fma_v3f32_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_fma_f32 v0, v0, v3, v6
; GFX10-NEXT: v_fma_f32 v1, v1, v4, v7
; GFX10-NEXT: v_fma_f32 v2, v2, v5, v8
; GFX10-NEXT: s_setpc_b64 s[30:31]
%val = call <3 x float> @llvm.experimental.constrained.fma.v3f32(<3 x float> %x, <3 x float> %y, <3 x float> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret <3 x float> %val
}
define <4 x float> @v_constained_fma_v4f32_fpexcept_strict(<4 x float> %x, <4 x float> %y, <4 x float> %z) #0 {
; GCN-LABEL: v_constained_fma_v4f32_fpexcept_strict:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_fma_f32 v0, v0, v4, v8
; GCN-NEXT: v_fma_f32 v1, v1, v5, v9
; GCN-NEXT: v_fma_f32 v2, v2, v6, v10
; GCN-NEXT: v_fma_f32 v3, v3, v7, v11
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fma_v4f32_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_fma_f32 v0, v0, v4, v8
; GFX10-NEXT: v_fma_f32 v1, v1, v5, v9
; GFX10-NEXT: v_fma_f32 v2, v2, v6, v10
; GFX10-NEXT: v_fma_f32 v3, v3, v7, v11
; GFX10-NEXT: s_setpc_b64 s[30:31]
%val = call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %x, <4 x float> %y, <4 x float> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret <4 x float> %val
}
define float @v_constained_fma_f32_fpexcept_strict_fneg(float %x, float %y, float %z) #0 {
; GCN-LABEL: v_constained_fma_f32_fpexcept_strict_fneg:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_fma_f32 v0, v0, v1, -v2
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fma_f32_fpexcept_strict_fneg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_fma_f32 v0, v0, v1, -v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
%neg.z = fneg float %z
%val = call float @llvm.experimental.constrained.fma.f32(float %x, float %y, float %neg.z, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret float %val
}
define float @v_constained_fma_f32_fpexcept_strict_fneg_fneg(float %x, float %y, float %z) #0 {
; GCN-LABEL: v_constained_fma_f32_fpexcept_strict_fneg_fneg:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_fma_f32 v0, -v0, -v1, v2
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fma_f32_fpexcept_strict_fneg_fneg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_fma_f32 v0, -v0, -v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
%neg.x = fneg float %x
%neg.y = fneg float %y
%val = call float @llvm.experimental.constrained.fma.f32(float %neg.x, float %neg.y, float %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret float %val
}
define float @v_constained_fma_f32_fpexcept_strict_fabs_fabs(float %x, float %y, float %z) #0 {
; GCN-LABEL: v_constained_fma_f32_fpexcept_strict_fabs_fabs:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_fma_f32 v0, |v0|, |v1|, v2
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fma_f32_fpexcept_strict_fabs_fabs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_fma_f32 v0, |v0|, |v1|, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
%neg.x = call float @llvm.fabs.f32(float %x) #0
%neg.y = call float @llvm.fabs.f32(float %y) #0
%val = call float @llvm.experimental.constrained.fma.f32(float %neg.x, float %neg.y, float %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret float %val
}
define <2 x float> @v_constained_fma_v2f32_fpexcept_strict_fneg_fneg(<2 x float> %x, <2 x float> %y, <2 x float> %z) #0 {
; GCN-LABEL: v_constained_fma_v2f32_fpexcept_strict_fneg_fneg:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_fma_f32 v0, -v0, -v2, v4
; GCN-NEXT: v_fma_f32 v1, -v1, -v3, v5
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fma_v2f32_fpexcept_strict_fneg_fneg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_fma_f32 v0, -v0, -v2, v4
; GFX10-NEXT: v_fma_f32 v1, -v1, -v3, v5
; GFX10-NEXT: s_setpc_b64 s[30:31]
%neg.x = fneg <2 x float> %x
%neg.y = fneg <2 x float> %y
%val = call <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float> %neg.x, <2 x float> %neg.y, <2 x float> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret <2 x float> %val
}
declare float @llvm.fabs.f32(float)
declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata)
declare <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float>, <2 x float>, <2 x float>, metadata, metadata)
declare <3 x float> @llvm.experimental.constrained.fma.v3f32(<3 x float>, <3 x float>, <3 x float>, metadata, metadata)
declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata)
attributes #0 = { strictfp }