Files
clang-p2996/llvm/test/CodeGen/AMDGPU/GlobalISel/umed3.ll
Petar Avramovic 4a9bc59867 AMDGPU/GlobalISel: Add integer med3 combines
Add signed and unsigned integer version of med3 combine.
Source pattern is min(max(Val, K0), K1) or max(min(Val, K1), K0)
where K0 and K1 are constants and K0 <= K1. Destination is med3
that corresponds to signedness of min/max in source.

Differential Revision: https://reviews.llvm.org/D90050
2021-04-27 11:52:23 +02:00

128 lines
4.6 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
define i32 @test_min_max_ValK0_K1_u32(i32 %a) {
; GFX10-LABEL: test_min_max_ValK0_K1_u32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%umax = call i32 @llvm.umax.i32(i32 %a, i32 12)
%umed = call i32 @llvm.umin.i32(i32 %umax, i32 17)
ret i32 %umed
}
define i32 @min_max_ValK0_K1_i32(i32 %a) {
; GFX10-LABEL: min_max_ValK0_K1_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%umax = call i32 @llvm.umax.i32(i32 12, i32 %a)
%umed = call i32 @llvm.umin.i32(i32 %umax, i32 17)
ret i32 %umed
}
define i32 @test_min_K1max_ValK0__u32(i32 %a) {
; GFX10-LABEL: test_min_K1max_ValK0__u32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%umax = call i32 @llvm.umax.i32(i32 %a, i32 12)
%umed = call i32 @llvm.umin.i32(i32 17, i32 %umax)
ret i32 %umed
}
define i32 @test_min_K1max_K0Val__u32(i32 %a) {
; GFX10-LABEL: test_min_K1max_K0Val__u32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%umax = call i32 @llvm.umax.i32(i32 12, i32 %a)
%umed = call i32 @llvm.umin.i32(i32 17, i32 %umax)
ret i32 %umed
}
define i32 @test_max_min_ValK1_K0_u32(i32 %a) {
; GFX10-LABEL: test_max_min_ValK1_K0_u32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%umin = call i32 @llvm.umin.i32(i32 %a, i32 17)
%umed = call i32 @llvm.umax.i32(i32 %umin, i32 12)
ret i32 %umed
}
define i32 @test_max_min_K1Val_K0_u32(i32 %a) {
; GFX10-LABEL: test_max_min_K1Val_K0_u32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%umin = call i32 @llvm.umin.i32(i32 17, i32 %a)
%umed = call i32 @llvm.umax.i32(i32 %umin, i32 12)
ret i32 %umed
}
define i32 @test_max_K0min_ValK1__u32(i32 %a) {
; GFX10-LABEL: test_max_K0min_ValK1__u32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%umin = call i32 @llvm.umin.i32(i32 %a, i32 17)
%umed = call i32 @llvm.umax.i32(i32 12, i32 %umin)
ret i32 %umed
}
define i32 @test_max_K0min_K1Val__u32(i32 %a) {
; GFX10-LABEL: test_max_K0min_K1Val__u32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
; GFX10-NEXT: s_setpc_b64 s[30:31]
%umin = call i32 @llvm.umin.i32(i32 17, i32 %a)
%umed = call i32 @llvm.umax.i32(i32 12, i32 %umin)
ret i32 %umed
}
define <2 x i16> @test_max_K0min_K1Val__v2u16(<2 x i16> %a) {
; GFX10-LABEL: test_max_K0min_K1Val__v2u16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_pk_min_u16 v0, 17, v0 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_max_u16 v0, 12, v0 op_sel_hi:[0,1]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%umin = call <2 x i16> @llvm.umin.v2i16(<2 x i16> <i16 17, i16 17>, <2 x i16> %a)
%umed = call <2 x i16> @llvm.umax.v2i16(<2 x i16> <i16 12, i16 12>, <2 x i16> %umin)
ret <2 x i16> %umed
}
define amdgpu_ps i32 @test_uniform_min_max(i32 inreg %a) {
; GFX10-LABEL: test_uniform_min_max:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_max_u32 s0, s2, 12
; GFX10-NEXT: s_min_u32 s0, s0, 17
; GFX10-NEXT: ; return to shader part epilog
%umax = call i32 @llvm.umax.i32(i32 %a, i32 12)
%umed = call i32 @llvm.umin.i32(i32 %umax, i32 17)
ret i32 %umed
}
declare i32 @llvm.umin.i32(i32, i32)
declare i32 @llvm.umax.i32(i32, i32)
declare <2 x i16> @llvm.umin.v2i16(<2 x i16>, <2 x i16>)
declare <2 x i16> @llvm.umax.v2i16(<2 x i16>, <2 x i16>)