SIInsertWaitcnts inserts waitcnt instructions to resolve data dependencies. The GFX10+ vscnt (VMEM store count) counter is never used in this way. It is only used to resolve memory dependencies, and that is handled by SIMemoryLegalizer. Hence there is no need to conservatively wait for vscnt to be 0 on function entry and before returns. Differential Revision: https://reviews.llvm.org/D153537
212 lines
7.7 KiB
LLVM
212 lines
7.7 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx802 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX89,GFX8 %s
|
|
; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX89,GFX9 %s
|
|
; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
|
|
|
|
define i32 @test_min_max_ValK0_K1_u32(i32 %a) {
|
|
; GFX89-LABEL: test_min_max_ValK0_K1_u32:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: v_med3_u32 v0, v0, 12, 17
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: test_min_max_ValK0_K1_u32:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
%umax = call i32 @llvm.umax.i32(i32 %a, i32 12)
|
|
%umed = call i32 @llvm.umin.i32(i32 %umax, i32 17)
|
|
ret i32 %umed
|
|
}
|
|
|
|
define i32 @min_max_ValK0_K1_i32(i32 %a) {
|
|
; GFX89-LABEL: min_max_ValK0_K1_i32:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: v_med3_u32 v0, v0, 12, 17
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: min_max_ValK0_K1_i32:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
%umax = call i32 @llvm.umax.i32(i32 12, i32 %a)
|
|
%umed = call i32 @llvm.umin.i32(i32 %umax, i32 17)
|
|
ret i32 %umed
|
|
}
|
|
|
|
define i32 @test_min_K1max_ValK0__u32(i32 %a) {
|
|
; GFX89-LABEL: test_min_K1max_ValK0__u32:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: v_med3_u32 v0, v0, 12, 17
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: test_min_K1max_ValK0__u32:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
%umax = call i32 @llvm.umax.i32(i32 %a, i32 12)
|
|
%umed = call i32 @llvm.umin.i32(i32 17, i32 %umax)
|
|
ret i32 %umed
|
|
}
|
|
|
|
define i32 @test_min_K1max_K0Val__u32(i32 %a) {
|
|
; GFX89-LABEL: test_min_K1max_K0Val__u32:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: v_med3_u32 v0, v0, 12, 17
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: test_min_K1max_K0Val__u32:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
%umax = call i32 @llvm.umax.i32(i32 12, i32 %a)
|
|
%umed = call i32 @llvm.umin.i32(i32 17, i32 %umax)
|
|
ret i32 %umed
|
|
}
|
|
|
|
define i32 @test_max_min_ValK1_K0_u32(i32 %a) {
|
|
; GFX89-LABEL: test_max_min_ValK1_K0_u32:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: v_med3_u32 v0, v0, 12, 17
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: test_max_min_ValK1_K0_u32:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
%umin = call i32 @llvm.umin.i32(i32 %a, i32 17)
|
|
%umed = call i32 @llvm.umax.i32(i32 %umin, i32 12)
|
|
ret i32 %umed
|
|
}
|
|
|
|
define i32 @test_max_min_K1Val_K0_u32(i32 %a) {
|
|
; GFX89-LABEL: test_max_min_K1Val_K0_u32:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: v_med3_u32 v0, v0, 12, 17
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: test_max_min_K1Val_K0_u32:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
%umin = call i32 @llvm.umin.i32(i32 17, i32 %a)
|
|
%umed = call i32 @llvm.umax.i32(i32 %umin, i32 12)
|
|
ret i32 %umed
|
|
}
|
|
|
|
define i32 @test_max_K0min_ValK1__u32(i32 %a) {
|
|
; GFX89-LABEL: test_max_K0min_ValK1__u32:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: v_med3_u32 v0, v0, 12, 17
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: test_max_K0min_ValK1__u32:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
%umin = call i32 @llvm.umin.i32(i32 %a, i32 17)
|
|
%umed = call i32 @llvm.umax.i32(i32 12, i32 %umin)
|
|
ret i32 %umed
|
|
}
|
|
|
|
define i32 @test_max_K0min_K1Val__u32(i32 %a) {
|
|
; GFX89-LABEL: test_max_K0min_K1Val__u32:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: v_med3_u32 v0, v0, 12, 17
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: test_max_K0min_K1Val__u32:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
%umin = call i32 @llvm.umin.i32(i32 17, i32 %a)
|
|
%umed = call i32 @llvm.umax.i32(i32 12, i32 %umin)
|
|
ret i32 %umed
|
|
}
|
|
|
|
define <2 x i16> @test_max_K0min_K1Val__v2u16(<2 x i16> %a) {
|
|
; GFX8-LABEL: test_max_K0min_K1Val__v2u16:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX8-NEXT: v_mov_b32_e32 v2, 17
|
|
; GFX8-NEXT: v_min_u16_e32 v1, 17, v0
|
|
; GFX8-NEXT: v_min_u16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
|
; GFX8-NEXT: v_mov_b32_e32 v2, 12
|
|
; GFX8-NEXT: v_max_u16_e32 v1, 12, v1
|
|
; GFX8-NEXT: v_max_u16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
|
; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
|
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX9-LABEL: test_max_K0min_K1Val__v2u16:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX9-NEXT: v_pk_min_u16 v0, 17, v0 op_sel_hi:[0,1]
|
|
; GFX9-NEXT: v_pk_max_u16 v0, 12, v0 op_sel_hi:[0,1]
|
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: test_max_K0min_K1Val__v2u16:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_pk_min_u16 v0, 17, v0 op_sel_hi:[0,1]
|
|
; GFX10-NEXT: v_pk_max_u16 v0, 12, v0 op_sel_hi:[0,1]
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
%umin = call <2 x i16> @llvm.umin.v2i16(<2 x i16> <i16 17, i16 17>, <2 x i16> %a)
|
|
%umed = call <2 x i16> @llvm.umax.v2i16(<2 x i16> <i16 12, i16 12>, <2 x i16> %umin)
|
|
ret <2 x i16> %umed
|
|
}
|
|
|
|
define amdgpu_ps i32 @test_uniform_min_max(i32 inreg %a) {
|
|
; GFX89-LABEL: test_uniform_min_max:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_max_u32 s0, s2, 12
|
|
; GFX89-NEXT: s_min_u32 s0, s0, 17
|
|
; GFX89-NEXT: ; return to shader part epilog
|
|
;
|
|
; GFX10-LABEL: test_uniform_min_max:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_max_u32 s0, s2, 12
|
|
; GFX10-NEXT: s_min_u32 s0, s0, 17
|
|
; GFX10-NEXT: ; return to shader part epilog
|
|
%umax = call i32 @llvm.umax.i32(i32 %a, i32 12)
|
|
%umed = call i32 @llvm.umin.i32(i32 %umax, i32 17)
|
|
ret i32 %umed
|
|
}
|
|
|
|
define i32 @test_non_inline_constant_u32(i32 %a) {
|
|
; GFX89-LABEL: test_non_inline_constant_u32:
|
|
; GFX89: ; %bb.0:
|
|
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX89-NEXT: v_mov_b32_e32 v1, 0x41
|
|
; GFX89-NEXT: v_med3_u32 v0, v0, 12, v1
|
|
; GFX89-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX10-LABEL: test_non_inline_constant_u32:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 0x41
|
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
|
%umax = call i32 @llvm.umax.i32(i32 %a, i32 12)
|
|
%umed = call i32 @llvm.umin.i32(i32 %umax, i32 65)
|
|
ret i32 %umed
|
|
}
|
|
|
|
declare i32 @llvm.umin.i32(i32, i32)
|
|
declare i32 @llvm.umax.i32(i32, i32)
|
|
declare <2 x i16> @llvm.umin.v2i16(<2 x i16>, <2 x i16>)
|
|
declare <2 x i16> @llvm.umax.v2i16(<2 x i16>, <2 x i16>)
|