Files
clang-p2996/llvm/test/CodeGen/AMDGPU/div-rem-by-constant-64.ll
Simon Pilgrim 9fb4bc5bf4 [DAG] SimplifyMultipleUseDemandedBits - ignore SRL node if we're just demanding known sign bits (#114389)
Check to see if we are only demanding (shifted) signbits from a SRL node that are also signbits in the source node.

We can't demand any upper zero bits that the SRL will shift in (up to max shift amount), and the lower demanded bits bound must already be all signbits.
2024-10-31 16:40:29 +00:00

1411 lines
58 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck -check-prefixes=GFX9 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -o - %s | FileCheck -check-prefixes=GFX942 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -o - %s | FileCheck -check-prefixes=GFX1030 %s
; Sample test to check how we deal with division/modulos by 64 bit constants.
define noundef i64 @srem64_3(i64 noundef %i) {
; GFX9-LABEL: srem64_3:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s6, 0x55555556
; GFX9-NEXT: v_mul_hi_u32 v2, v0, s6
; GFX9-NEXT: v_mov_b32_e32 v3, 0
; GFX9-NEXT: s_mov_b32 s7, 0x55555555
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, s6, v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v6, v5
; GFX9-NEXT: v_mov_b32_e32 v5, v3
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, s7, v[4:5]
; GFX9-NEXT: v_ashrrev_i32_e32 v4, 31, v1
; GFX9-NEXT: v_mul_lo_u32 v5, v4, s7
; GFX9-NEXT: v_mov_b32_e32 v2, v3
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v6, v2
; GFX9-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, vcc
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v1, s7, v[2:3]
; GFX9-NEXT: v_mul_lo_u32 v6, v4, s6
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, s6, v[2:3]
; GFX9-NEXT: v_add3_u32 v3, v6, v3, v5
; GFX9-NEXT: v_lshrrev_b32_e32 v4, 31, v3
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v3, vcc
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, 3, 0
; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v4, 3, v[3:4]
; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: srem64_3:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_mov_b32 s2, 0x55555556
; GFX942-NEXT: v_mul_hi_u32 v2, v0, s2
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v1, s2, v[2:3]
; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: s_mov_b32 s3, 0x55555555
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v0, s3, v[4:5]
; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[2:3], 0, v[4:5]
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v1, s3, v[4:5]
; GFX942-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v2, s2, v[4:5]
; GFX942-NEXT: v_mul_lo_u32 v6, v2, s3
; GFX942-NEXT: v_mul_lo_u32 v2, v2, s2
; GFX942-NEXT: v_add3_u32 v5, v2, v5, v6
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 31, v5
; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[4:5], 0, v[2:3]
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v2, 3, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v3, 3, v[2:3]
; GFX942-NEXT: v_mov_b32_e32 v3, v2
; GFX942-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v4
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: srem64_3:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_mul_hi_u32 v2, 0x55555556, v0
; GFX1030-NEXT: v_mov_b32_e32 v3, 0
; GFX1030-NEXT: v_mad_u64_u32 v[4:5], null, 0x55555556, v1, v[2:3]
; GFX1030-NEXT: v_mov_b32_e32 v6, v5
; GFX1030-NEXT: v_mov_b32_e32 v5, v3
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0x55555555, v0, v[4:5]
; GFX1030-NEXT: v_ashrrev_i32_e32 v4, 31, v1
; GFX1030-NEXT: v_mul_lo_u32 v5, 0x55555555, v4
; GFX1030-NEXT: v_mov_b32_e32 v2, v3
; GFX1030-NEXT: v_add_co_u32 v2, s4, v6, v2
; GFX1030-NEXT: v_add_co_ci_u32_e64 v3, null, 0, 0, s4
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0x55555555, v1, v[2:3]
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0x55555556, v4, v[2:3]
; GFX1030-NEXT: v_mul_lo_u32 v4, 0x55555556, v4
; GFX1030-NEXT: v_add3_u32 v3, v4, v3, v5
; GFX1030-NEXT: v_lshrrev_b32_e32 v4, 31, v3
; GFX1030-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
; GFX1030-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, 0, v3, vcc_lo
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, v2, 3, 0
; GFX1030-NEXT: v_mad_u64_u32 v[3:4], null, v4, 3, v[3:4]
; GFX1030-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
; GFX1030-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%rem = srem i64 %i, 3
ret i64 %rem
}
define noundef i64 @srem64_6(i64 noundef %i) {
; GFX9-LABEL: srem64_6:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s6, 0x55555556
; GFX9-NEXT: v_mul_hi_u32 v2, v0, s6
; GFX9-NEXT: v_mov_b32_e32 v3, 0
; GFX9-NEXT: s_mov_b32 s7, 0x55555555
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, s6, v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v6, v5
; GFX9-NEXT: v_mov_b32_e32 v5, v3
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, s7, v[4:5]
; GFX9-NEXT: v_ashrrev_i32_e32 v4, 31, v1
; GFX9-NEXT: v_mul_lo_u32 v5, v4, s7
; GFX9-NEXT: v_mov_b32_e32 v2, v3
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v6, v2
; GFX9-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, vcc
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v1, s7, v[2:3]
; GFX9-NEXT: v_mul_lo_u32 v6, v4, s6
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, s6, v[2:3]
; GFX9-NEXT: v_add3_u32 v3, v6, v3, v5
; GFX9-NEXT: v_lshrrev_b32_e32 v4, 31, v3
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v3, vcc
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, 3, 0
; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v4, 3, v[3:4]
; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: srem64_6:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_mov_b32 s2, 0x55555556
; GFX942-NEXT: v_mul_hi_u32 v2, v0, s2
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v1, s2, v[2:3]
; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: s_mov_b32 s3, 0x55555555
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v0, s3, v[4:5]
; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[2:3], 0, v[4:5]
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v1, s3, v[4:5]
; GFX942-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v2, s2, v[4:5]
; GFX942-NEXT: v_mul_lo_u32 v6, v2, s3
; GFX942-NEXT: v_mul_lo_u32 v2, v2, s2
; GFX942-NEXT: v_add3_u32 v5, v2, v5, v6
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 31, v5
; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[4:5], 0, v[2:3]
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v2, 3, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v3, 3, v[2:3]
; GFX942-NEXT: v_mov_b32_e32 v3, v2
; GFX942-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v4
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: srem64_6:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_mul_hi_u32 v2, 0x55555556, v0
; GFX1030-NEXT: v_mov_b32_e32 v3, 0
; GFX1030-NEXT: v_mad_u64_u32 v[4:5], null, 0x55555556, v1, v[2:3]
; GFX1030-NEXT: v_mov_b32_e32 v6, v5
; GFX1030-NEXT: v_mov_b32_e32 v5, v3
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0x55555555, v0, v[4:5]
; GFX1030-NEXT: v_ashrrev_i32_e32 v4, 31, v1
; GFX1030-NEXT: v_mul_lo_u32 v5, 0x55555555, v4
; GFX1030-NEXT: v_mov_b32_e32 v2, v3
; GFX1030-NEXT: v_add_co_u32 v2, s4, v6, v2
; GFX1030-NEXT: v_add_co_ci_u32_e64 v3, null, 0, 0, s4
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0x55555555, v1, v[2:3]
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0x55555556, v4, v[2:3]
; GFX1030-NEXT: v_mul_lo_u32 v4, 0x55555556, v4
; GFX1030-NEXT: v_add3_u32 v3, v4, v3, v5
; GFX1030-NEXT: v_lshrrev_b32_e32 v4, 31, v3
; GFX1030-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
; GFX1030-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, 0, v3, vcc_lo
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, v2, 3, 0
; GFX1030-NEXT: v_mad_u64_u32 v[3:4], null, v4, 3, v[3:4]
; GFX1030-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
; GFX1030-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%rem = srem i64 %i, 3
ret i64 %rem
}
define noundef i64 @urem64_3(i64 noundef %i) {
; GFX9-LABEL: urem64_3:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s4, 0xaaaaaaab
; GFX9-NEXT: v_mul_hi_u32 v2, v0, s4
; GFX9-NEXT: v_mov_b32_e32 v3, 0
; GFX9-NEXT: s_mov_b32 s6, 0xaaaaaaaa
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, s4, v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v6, v5
; GFX9-NEXT: v_mov_b32_e32 v5, v3
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, s6, v[4:5]
; GFX9-NEXT: v_mov_b32_e32 v2, v3
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v6, v2
; GFX9-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, vcc
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v1, s6, v[2:3]
; GFX9-NEXT: v_alignbit_b32 v2, v3, v2, 1
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, 3, 0
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 1, v3
; GFX9-NEXT: v_mov_b32_e32 v2, v5
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v3, 3, v[2:3]
; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v4
; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: urem64_3:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_mov_b32 s0, 0xaaaaaaab
; GFX942-NEXT: v_mul_hi_u32 v2, v0, s0
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v1, s0, v[2:3]
; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: s_mov_b32 s2, 0xaaaaaaaa
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v0, s2, v[4:5]
; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[2:3], 0, v[4:5]
; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v1, s2, v[2:3]
; GFX942-NEXT: v_alignbit_b32 v2, v3, v2, 1
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v2, 3, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_lshrrev_b32_e32 v3, 1, v3
; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v3, 3, v[2:3]
; GFX942-NEXT: v_mov_b32_e32 v3, v2
; GFX942-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v4
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: urem64_3:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_mul_hi_u32 v2, 0xaaaaaaab, v0
; GFX1030-NEXT: v_mov_b32_e32 v3, 0
; GFX1030-NEXT: v_mad_u64_u32 v[4:5], null, 0xaaaaaaab, v1, v[2:3]
; GFX1030-NEXT: v_mov_b32_e32 v6, v5
; GFX1030-NEXT: v_mov_b32_e32 v5, v3
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0xaaaaaaaa, v0, v[4:5]
; GFX1030-NEXT: v_mov_b32_e32 v2, v3
; GFX1030-NEXT: v_add_co_u32 v2, s4, v6, v2
; GFX1030-NEXT: v_add_co_ci_u32_e64 v3, null, 0, 0, s4
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0xaaaaaaaa, v1, v[2:3]
; GFX1030-NEXT: v_alignbit_b32 v2, v3, v2, 1
; GFX1030-NEXT: v_lshrrev_b32_e32 v3, 1, v3
; GFX1030-NEXT: v_mad_u64_u32 v[4:5], null, v2, 3, 0
; GFX1030-NEXT: v_mov_b32_e32 v2, v5
; GFX1030-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v4
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, v3, 3, v[2:3]
; GFX1030-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v2, vcc_lo
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%rem = urem i64 %i, 3
ret i64 %rem
}
define noundef i64 @urem64_6(i64 noundef %i) {
; GFX9-LABEL: urem64_6:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s4, 0xaaaaaaab
; GFX9-NEXT: v_mul_hi_u32 v2, v0, s4
; GFX9-NEXT: v_mov_b32_e32 v3, 0
; GFX9-NEXT: s_mov_b32 s6, 0xaaaaaaaa
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, s4, v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v6, v5
; GFX9-NEXT: v_mov_b32_e32 v5, v3
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, s6, v[4:5]
; GFX9-NEXT: v_mov_b32_e32 v2, v3
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v6, v2
; GFX9-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, vcc
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v1, s6, v[2:3]
; GFX9-NEXT: v_alignbit_b32 v2, v3, v2, 2
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, 6, 0
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 2, v3
; GFX9-NEXT: v_mov_b32_e32 v2, v5
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v3, 6, v[2:3]
; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v4
; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: urem64_6:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_mov_b32 s0, 0xaaaaaaab
; GFX942-NEXT: v_mul_hi_u32 v2, v0, s0
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v1, s0, v[2:3]
; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: s_mov_b32 s2, 0xaaaaaaaa
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v0, s2, v[4:5]
; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[2:3], 0, v[4:5]
; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v1, s2, v[2:3]
; GFX942-NEXT: v_alignbit_b32 v2, v3, v2, 2
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v2, 6, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_lshrrev_b32_e32 v3, 2, v3
; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v3, 6, v[2:3]
; GFX942-NEXT: v_mov_b32_e32 v3, v2
; GFX942-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v4
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: urem64_6:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_mul_hi_u32 v2, 0xaaaaaaab, v0
; GFX1030-NEXT: v_mov_b32_e32 v3, 0
; GFX1030-NEXT: v_mad_u64_u32 v[4:5], null, 0xaaaaaaab, v1, v[2:3]
; GFX1030-NEXT: v_mov_b32_e32 v6, v5
; GFX1030-NEXT: v_mov_b32_e32 v5, v3
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0xaaaaaaaa, v0, v[4:5]
; GFX1030-NEXT: v_mov_b32_e32 v2, v3
; GFX1030-NEXT: v_add_co_u32 v2, s4, v6, v2
; GFX1030-NEXT: v_add_co_ci_u32_e64 v3, null, 0, 0, s4
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0xaaaaaaaa, v1, v[2:3]
; GFX1030-NEXT: v_alignbit_b32 v2, v3, v2, 2
; GFX1030-NEXT: v_lshrrev_b32_e32 v3, 2, v3
; GFX1030-NEXT: v_mad_u64_u32 v[4:5], null, v2, 6, 0
; GFX1030-NEXT: v_mov_b32_e32 v2, v5
; GFX1030-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v4
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, v3, 6, v[2:3]
; GFX1030-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v2, vcc_lo
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%rem = urem i64 %i, 6
ret i64 %rem
}
define noundef i64 @sdiv64_3(i64 noundef %i) {
; GFX9-LABEL: sdiv64_3:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s6, 0x55555556
; GFX9-NEXT: v_mul_hi_u32 v2, v0, s6
; GFX9-NEXT: v_mov_b32_e32 v3, 0
; GFX9-NEXT: s_mov_b32 s7, 0x55555555
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, s6, v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v6, v5
; GFX9-NEXT: v_mov_b32_e32 v5, v3
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, s7, v[4:5]
; GFX9-NEXT: v_mov_b32_e32 v0, v3
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v6, v0
; GFX9-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, vcc
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v1, s7, v[2:3]
; GFX9-NEXT: v_ashrrev_i32_e32 v0, 31, v1
; GFX9-NEXT: v_mul_lo_u32 v4, v0, s7
; GFX9-NEXT: v_mul_lo_u32 v5, v0, s6
; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, s6, v[2:3]
; GFX9-NEXT: v_add3_u32 v1, v5, v1, v4
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 31, v1
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: sdiv64_3:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_mov_b32 s2, 0x55555556
; GFX942-NEXT: v_mul_hi_u32 v2, v0, s2
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v1, s2, v[2:3]
; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: s_mov_b32 s3, 0x55555555
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v0, s3, v[4:5]
; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[2:3], 0, v[4:5]
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v1, s3, v[4:5]
; GFX942-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX942-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, s2, v[4:5]
; GFX942-NEXT: v_mul_lo_u32 v4, v2, s3
; GFX942-NEXT: v_mul_lo_u32 v2, v2, s2
; GFX942-NEXT: v_add3_u32 v1, v2, v1, v4
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 31, v1
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: sdiv64_3:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_mul_hi_u32 v2, 0x55555556, v0
; GFX1030-NEXT: v_mov_b32_e32 v3, 0
; GFX1030-NEXT: v_mad_u64_u32 v[4:5], null, 0x55555556, v1, v[2:3]
; GFX1030-NEXT: v_mov_b32_e32 v6, v5
; GFX1030-NEXT: v_mov_b32_e32 v5, v3
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0x55555555, v0, v[4:5]
; GFX1030-NEXT: v_mov_b32_e32 v0, v3
; GFX1030-NEXT: v_add_co_u32 v2, s4, v6, v0
; GFX1030-NEXT: v_add_co_ci_u32_e64 v3, null, 0, 0, s4
; GFX1030-NEXT: v_ashrrev_i32_e32 v0, 31, v1
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0x55555555, v1, v[2:3]
; GFX1030-NEXT: v_mul_lo_u32 v4, 0x55555555, v0
; GFX1030-NEXT: v_mul_lo_u32 v5, 0x55555556, v0
; GFX1030-NEXT: v_mad_u64_u32 v[0:1], null, 0x55555556, v0, v[2:3]
; GFX1030-NEXT: v_add3_u32 v1, v5, v1, v4
; GFX1030-NEXT: v_lshrrev_b32_e32 v2, 31, v1
; GFX1030-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1030-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%div = sdiv i64 %i, 3
ret i64 %div
}
define noundef i64 @sdiv64_6(i64 noundef %i) {
; GFX9-LABEL: sdiv64_6:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s6, 0x55555556
; GFX9-NEXT: v_mul_hi_u32 v2, v0, s6
; GFX9-NEXT: v_mov_b32_e32 v3, 0
; GFX9-NEXT: s_mov_b32 s7, 0x55555555
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, s6, v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v6, v5
; GFX9-NEXT: v_mov_b32_e32 v5, v3
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, s7, v[4:5]
; GFX9-NEXT: v_mov_b32_e32 v0, v3
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v6, v0
; GFX9-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, vcc
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v1, s7, v[2:3]
; GFX9-NEXT: v_ashrrev_i32_e32 v0, 31, v1
; GFX9-NEXT: v_mul_lo_u32 v4, v0, s7
; GFX9-NEXT: v_mul_lo_u32 v5, v0, s6
; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, s6, v[2:3]
; GFX9-NEXT: v_add3_u32 v1, v5, v1, v4
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 31, v1
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: sdiv64_6:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_mov_b32 s2, 0x55555556
; GFX942-NEXT: v_mul_hi_u32 v2, v0, s2
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v1, s2, v[2:3]
; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: s_mov_b32 s3, 0x55555555
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v0, s3, v[4:5]
; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[2:3], 0, v[4:5]
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v1, s3, v[4:5]
; GFX942-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX942-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, s2, v[4:5]
; GFX942-NEXT: v_mul_lo_u32 v4, v2, s3
; GFX942-NEXT: v_mul_lo_u32 v2, v2, s2
; GFX942-NEXT: v_add3_u32 v1, v2, v1, v4
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 31, v1
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: sdiv64_6:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_mul_hi_u32 v2, 0x55555556, v0
; GFX1030-NEXT: v_mov_b32_e32 v3, 0
; GFX1030-NEXT: v_mad_u64_u32 v[4:5], null, 0x55555556, v1, v[2:3]
; GFX1030-NEXT: v_mov_b32_e32 v6, v5
; GFX1030-NEXT: v_mov_b32_e32 v5, v3
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0x55555555, v0, v[4:5]
; GFX1030-NEXT: v_mov_b32_e32 v0, v3
; GFX1030-NEXT: v_add_co_u32 v2, s4, v6, v0
; GFX1030-NEXT: v_add_co_ci_u32_e64 v3, null, 0, 0, s4
; GFX1030-NEXT: v_ashrrev_i32_e32 v0, 31, v1
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0x55555555, v1, v[2:3]
; GFX1030-NEXT: v_mul_lo_u32 v4, 0x55555555, v0
; GFX1030-NEXT: v_mul_lo_u32 v5, 0x55555556, v0
; GFX1030-NEXT: v_mad_u64_u32 v[0:1], null, 0x55555556, v0, v[2:3]
; GFX1030-NEXT: v_add3_u32 v1, v5, v1, v4
; GFX1030-NEXT: v_lshrrev_b32_e32 v2, 31, v1
; GFX1030-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1030-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%div = sdiv i64 %i, 3
ret i64 %div
}
define noundef i64 @udiv64_3(i64 noundef %i) {
; GFX9-LABEL: udiv64_3:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s4, 0xaaaaaaab
; GFX9-NEXT: v_mul_hi_u32 v2, v0, s4
; GFX9-NEXT: v_mov_b32_e32 v3, 0
; GFX9-NEXT: s_mov_b32 s6, 0xaaaaaaaa
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, s4, v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v6, v5
; GFX9-NEXT: v_mov_b32_e32 v5, v3
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, s6, v[4:5]
; GFX9-NEXT: v_mov_b32_e32 v0, v3
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v6, v0
; GFX9-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, vcc
; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v1, s6, v[2:3]
; GFX9-NEXT: v_alignbit_b32 v0, v1, v0, 1
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 1, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: udiv64_3:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_mov_b32 s0, 0xaaaaaaab
; GFX942-NEXT: v_mul_hi_u32 v2, v0, s0
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v1, s0, v[2:3]
; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: s_mov_b32 s2, 0xaaaaaaaa
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v0, s2, v[4:5]
; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[2:3], 0, v[4:5]
; GFX942-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v1, s2, v[2:3]
; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 1
; GFX942-NEXT: v_lshrrev_b32_e32 v1, 1, v1
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: udiv64_3:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_mul_hi_u32 v2, 0xaaaaaaab, v0
; GFX1030-NEXT: v_mov_b32_e32 v3, 0
; GFX1030-NEXT: v_mad_u64_u32 v[4:5], null, 0xaaaaaaab, v1, v[2:3]
; GFX1030-NEXT: v_mov_b32_e32 v6, v5
; GFX1030-NEXT: v_mov_b32_e32 v5, v3
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0xaaaaaaaa, v0, v[4:5]
; GFX1030-NEXT: v_mov_b32_e32 v0, v3
; GFX1030-NEXT: v_add_co_u32 v2, s4, v6, v0
; GFX1030-NEXT: v_add_co_ci_u32_e64 v3, null, 0, 0, s4
; GFX1030-NEXT: v_mad_u64_u32 v[0:1], null, 0xaaaaaaaa, v1, v[2:3]
; GFX1030-NEXT: v_alignbit_b32 v0, v1, v0, 1
; GFX1030-NEXT: v_lshrrev_b32_e32 v1, 1, v1
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%div = udiv i64 %i, 3
ret i64 %div
}
define noundef i64 @udiv64_6(i64 noundef %i) {
; GFX9-LABEL: udiv64_6:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s4, 0xaaaaaaab
; GFX9-NEXT: v_mul_hi_u32 v2, v0, s4
; GFX9-NEXT: v_mov_b32_e32 v3, 0
; GFX9-NEXT: s_mov_b32 s6, 0xaaaaaaaa
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, s4, v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v6, v5
; GFX9-NEXT: v_mov_b32_e32 v5, v3
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, s6, v[4:5]
; GFX9-NEXT: v_mov_b32_e32 v0, v3
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v6, v0
; GFX9-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, vcc
; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v1, s6, v[2:3]
; GFX9-NEXT: v_alignbit_b32 v0, v1, v0, 2
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 2, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: udiv64_6:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_mov_b32 s0, 0xaaaaaaab
; GFX942-NEXT: v_mul_hi_u32 v2, v0, s0
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v1, s0, v[2:3]
; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: s_mov_b32 s2, 0xaaaaaaaa
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v0, s2, v[4:5]
; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[2:3], 0, v[4:5]
; GFX942-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v1, s2, v[2:3]
; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 2
; GFX942-NEXT: v_lshrrev_b32_e32 v1, 2, v1
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: udiv64_6:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_mul_hi_u32 v2, 0xaaaaaaab, v0
; GFX1030-NEXT: v_mov_b32_e32 v3, 0
; GFX1030-NEXT: v_mad_u64_u32 v[4:5], null, 0xaaaaaaab, v1, v[2:3]
; GFX1030-NEXT: v_mov_b32_e32 v6, v5
; GFX1030-NEXT: v_mov_b32_e32 v5, v3
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0xaaaaaaaa, v0, v[4:5]
; GFX1030-NEXT: v_mov_b32_e32 v0, v3
; GFX1030-NEXT: v_add_co_u32 v2, s4, v6, v0
; GFX1030-NEXT: v_add_co_ci_u32_e64 v3, null, 0, 0, s4
; GFX1030-NEXT: v_mad_u64_u32 v[0:1], null, 0xaaaaaaaa, v1, v[2:3]
; GFX1030-NEXT: v_alignbit_b32 v0, v1, v0, 2
; GFX1030-NEXT: v_lshrrev_b32_e32 v1, 2, v1
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%div = udiv i64 %i, 6
ret i64 %div
}
define noundef i64 @srem64_2(i64 noundef %i) {
; GFX9-LABEL: srem64_2:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 31, v1
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v0, v2
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc
; GFX9-NEXT: v_and_b32_e32 v2, -2, v2
; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: srem64_2:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 31, v1
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[0:1], 0, v[2:3]
; GFX942-NEXT: v_and_b32_e32 v2, -2, v2
; GFX942-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: srem64_2:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_lshrrev_b32_e32 v2, 31, v1
; GFX1030-NEXT: v_add_co_u32 v2, vcc_lo, v0, v2
; GFX1030-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v1, vcc_lo
; GFX1030-NEXT: v_and_b32_e32 v2, -2, v2
; GFX1030-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
; GFX1030-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%rem = srem i64 %i, 2
ret i64 %rem
}
define noundef i64 @sdiv64_2(i64 noundef %i) {
; GFX9-LABEL: sdiv64_2:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 31, v1
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: v_ashrrev_i64 v[0:1], 1, v[0:1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: sdiv64_2:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 31, v1
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942-NEXT: v_ashrrev_i64 v[0:1], 1, v[0:1]
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: sdiv64_2:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_lshrrev_b32_e32 v2, 31, v1
; GFX1030-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1030-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1030-NEXT: v_ashrrev_i64 v[0:1], 1, v[0:1]
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%div = sdiv i64 %i, 2
ret i64 %div
}
define noundef i64 @urem64_2(i64 noundef %i) {
; GFX9-LABEL: urem64_2:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: urem64_2:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_and_b32_e32 v0, 1, v0
; GFX942-NEXT: v_mov_b32_e32 v1, 0
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: urem64_2:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1030-NEXT: v_mov_b32_e32 v1, 0
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%rem = urem i64 %i, 2
ret i64 %rem
}
define noundef i64 @udiv64_2(i64 noundef %i) {
; GFX9-LABEL: udiv64_2:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_alignbit_b32 v0, v1, v0, 1
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 1, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: udiv64_2:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 1
; GFX942-NEXT: v_lshrrev_b32_e32 v1, 1, v1
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: udiv64_2:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_alignbit_b32 v0, v1, v0, 1
; GFX1030-NEXT: v_lshrrev_b32_e32 v1, 1, v1
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%div = udiv i64 %i, 2
ret i64 %div
}
define noundef i64 @srem64_64(i64 noundef %i) {
; GFX9-LABEL: srem64_64:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 26, v2
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v0, v2
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc
; GFX9-NEXT: v_and_b32_e32 v2, 0xffffffc0, v2
; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: srem64_64:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 26, v2
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[0:1], 0, v[2:3]
; GFX942-NEXT: v_and_b32_e32 v2, 0xffffffc0, v2
; GFX942-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: srem64_64:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX1030-NEXT: v_lshrrev_b32_e32 v2, 26, v2
; GFX1030-NEXT: v_add_co_u32 v2, vcc_lo, v0, v2
; GFX1030-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v1, vcc_lo
; GFX1030-NEXT: v_and_b32_e32 v2, 0xffffffc0, v2
; GFX1030-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
; GFX1030-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%rem = srem i64 %i, 64
ret i64 %rem
}
define noundef i64 @sdiv64_64(i64 noundef %i) {
; GFX9-LABEL: sdiv64_64:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 26, v2
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: v_ashrrev_i64 v[0:1], 6, v[0:1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: sdiv64_64:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 26, v2
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942-NEXT: v_ashrrev_i64 v[0:1], 6, v[0:1]
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: sdiv64_64:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX1030-NEXT: v_lshrrev_b32_e32 v2, 26, v2
; GFX1030-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1030-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1030-NEXT: v_ashrrev_i64 v[0:1], 6, v[0:1]
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%div = sdiv i64 %i, 64
ret i64 %div
}
define noundef i64 @urem64_64(i64 noundef %i) {
; GFX9-LABEL: urem64_64:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_and_b32_e32 v0, 63, v0
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: urem64_64:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_and_b32_e32 v0, 63, v0
; GFX942-NEXT: v_mov_b32_e32 v1, 0
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: urem64_64:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_and_b32_e32 v0, 63, v0
; GFX1030-NEXT: v_mov_b32_e32 v1, 0
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%rem = urem i64 %i, 64
ret i64 %rem
}
define noundef i64 @udiv64_64(i64 noundef %i) {
; GFX9-LABEL: udiv64_64:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_alignbit_b32 v0, v1, v0, 6
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 6, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: udiv64_64:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 6
; GFX942-NEXT: v_lshrrev_b32_e32 v1, 6, v1
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: udiv64_64:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_alignbit_b32 v0, v1, v0, 6
; GFX1030-NEXT: v_lshrrev_b32_e32 v1, 6, v1
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%div = udiv i64 %i, 64
ret i64 %div
}
define noundef i64 @srem64_i32min(i64 noundef %i) {
; GFX9-LABEL: srem64_i32min:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 1, v2
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v0, v2
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc
; GFX9-NEXT: v_and_b32_e32 v2, 0x80000000, v2
; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: srem64_i32min:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 1, v2
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[0:1], 0, v[2:3]
; GFX942-NEXT: v_and_b32_e32 v2, 0x80000000, v2
; GFX942-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: srem64_i32min:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX1030-NEXT: v_lshrrev_b32_e32 v2, 1, v2
; GFX1030-NEXT: v_add_co_u32 v2, vcc_lo, v0, v2
; GFX1030-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v1, vcc_lo
; GFX1030-NEXT: v_and_b32_e32 v2, 0x80000000, v2
; GFX1030-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
; GFX1030-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%rem = srem i64 %i, -2147483648
ret i64 %rem
}
define noundef i64 @sdiv64_i32min(i64 noundef %i) {
; GFX9-LABEL: sdiv64_i32min:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 1, v2
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: v_ashrrev_i64 v[0:1], 31, v[0:1]
; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, 0, v0
; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: sdiv64_i32min:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 1, v2
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942-NEXT: v_ashrrev_i64 v[0:1], 31, v[0:1]
; GFX942-NEXT: v_sub_co_u32_e32 v0, vcc, 0, v0
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_subb_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: sdiv64_i32min:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX1030-NEXT: v_lshrrev_b32_e32 v2, 1, v2
; GFX1030-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1030-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1030-NEXT: v_ashrrev_i64 v[0:1], 31, v[0:1]
; GFX1030-NEXT: v_sub_co_u32 v0, vcc_lo, 0, v0
; GFX1030-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%div = sdiv i64 %i, -2147483648
ret i64 %div
}
define noundef i64 @urem64_i32min(i64 noundef %i) {
; GFX9-LABEL: urem64_i32min:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_alignbit_b32 v4, v1, v0, 31
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 31, v1
; GFX9-NEXT: v_mov_b32_e32 v3, 0
; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v4, 1, v[2:3]
; GFX9-NEXT: v_add_lshl_u32 v2, v2, v4, 30
; GFX9-NEXT: v_and_b32_e32 v2, 0x80000000, v2
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: urem64_i32min:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_alignbit_b32 v4, v1, v0, 31
; GFX942-NEXT: v_lshrrev_b32_e32 v2, 31, v1
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v4, 1, v[2:3]
; GFX942-NEXT: v_add_lshl_u32 v2, v2, v5, 30
; GFX942-NEXT: v_and_b32_e32 v2, 0x80000000, v2
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: urem64_i32min:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_mov_b32_e32 v3, 0
; GFX1030-NEXT: v_alignbit_b32 v4, v1, v0, 31
; GFX1030-NEXT: v_lshrrev_b32_e32 v2, 31, v1
; GFX1030-NEXT: v_mad_u64_u32 v[3:4], null, v4, 1, v[2:3]
; GFX1030-NEXT: v_add_lshl_u32 v2, v2, v4, 30
; GFX1030-NEXT: v_and_b32_e32 v2, 0x80000000, v2
; GFX1030-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1030-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%rem = urem i64 %i, -2147483648
ret i64 %rem
}
define noundef i64 @udiv64_i32min(i64 noundef %i) {
; GFX9-LABEL: udiv64_i32min:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_alignbit_b32 v2, v1, v0, 31
; GFX9-NEXT: v_lshrrev_b32_e32 v0, 31, v1
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v2, 1, v[0:1]
; GFX9-NEXT: v_mov_b32_e32 v1, v2
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v1
; GFX9-NEXT: v_addc_co_u32_e64 v1, s[4:5], 0, 0, vcc
; GFX9-NEXT: v_alignbit_b32 v0, v1, v0, 1
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: udiv64_i32min:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_alignbit_b32 v2, v1, v0, 31
; GFX942-NEXT: v_lshrrev_b32_e32 v0, 31, v1
; GFX942-NEXT: v_mov_b32_e32 v1, 0
; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v2, 1, v[0:1]
; GFX942-NEXT: v_mov_b32_e32 v2, v3
; GFX942-NEXT: v_mov_b32_e32 v3, v1
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 1
; GFX942-NEXT: v_mov_b32_e32 v1, 0
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: udiv64_i32min:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_mov_b32_e32 v2, 0
; GFX1030-NEXT: v_alignbit_b32 v0, v1, v0, 31
; GFX1030-NEXT: v_lshrrev_b32_e32 v1, 31, v1
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, v0, 1, v[1:2]
; GFX1030-NEXT: v_mov_b32_e32 v0, v3
; GFX1030-NEXT: v_add_co_u32 v0, s4, v1, v0
; GFX1030-NEXT: v_add_co_ci_u32_e64 v1, null, 0, 0, s4
; GFX1030-NEXT: v_alignbit_b32 v0, v1, v0, 1
; GFX1030-NEXT: v_mov_b32_e32 v1, 0
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%div = udiv i64 %i, -2147483648
ret i64 %div
}
define noundef i64 @srem64_i32max(i64 noundef %i) {
; GFX9-LABEL: srem64_i32max:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mul_hi_u32 v2, v0, 3
; GFX9-NEXT: v_mov_b32_e32 v3, 0
; GFX9-NEXT: s_mov_b32 s6, 0x80000001
; GFX9-NEXT: v_ashrrev_i32_e32 v6, 31, v1
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, 3, v[2:3]
; GFX9-NEXT: v_lshl_add_u32 v8, v6, 31, v6
; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v6, 3, 0
; GFX9-NEXT: v_mov_b32_e32 v9, v5
; GFX9-NEXT: v_mov_b32_e32 v5, v3
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, s6, v[4:5]
; GFX9-NEXT: v_add3_u32 v7, v7, v8, v6
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, -1, v[6:7]
; GFX9-NEXT: v_mov_b32_e32 v2, v3
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v9, v2
; GFX9-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, vcc
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v1, s6, v[2:3]
; GFX9-NEXT: v_sub_u32_e32 v5, v5, v1
; GFX9-NEXT: v_sub_u32_e32 v5, v5, v0
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, 1, v[2:3]
; GFX9-NEXT: s_brev_b32 s6, -2
; GFX9-NEXT: v_add_u32_e32 v3, v1, v3
; GFX9-NEXT: v_ashrrev_i64 v[4:5], 30, v[2:3]
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 31, v3
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v4, v2
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, s6, 0
; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v5, vcc
; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v4, s6, v[3:4]
; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: srem64_i32max:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX942-NEXT: v_lshl_add_u32 v4, v2, 31, v2
; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v2, 3, 0
; GFX942-NEXT: v_add3_u32 v3, v3, v4, v2
; GFX942-NEXT: v_mul_hi_u32 v4, v0, 3
; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: v_mad_u64_u32 v[6:7], s[0:1], v1, 3, v[4:5]
; GFX942-NEXT: v_mov_b32_e32 v4, v7
; GFX942-NEXT: v_mov_b32_e32 v7, v5
; GFX942-NEXT: s_mov_b32 s2, 0x80000001
; GFX942-NEXT: v_mad_u64_u32 v[6:7], s[0:1], v0, s2, v[6:7]
; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v0, -1, v[2:3]
; GFX942-NEXT: v_mov_b32_e32 v6, v7
; GFX942-NEXT: v_mov_b32_e32 v7, v5
; GFX942-NEXT: v_sub_u32_e32 v3, v3, v1
; GFX942-NEXT: v_lshl_add_u64 v[6:7], v[4:5], 0, v[6:7]
; GFX942-NEXT: v_sub_u32_e32 v3, v3, v0
; GFX942-NEXT: v_mad_u64_u32 v[6:7], s[0:1], v1, s2, v[6:7]
; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[6:7], 0, v[2:3]
; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v0, 1, v[2:3]
; GFX942-NEXT: v_add_u32_e32 v3, v1, v3
; GFX942-NEXT: v_ashrrev_i64 v[6:7], 30, v[2:3]
; GFX942-NEXT: v_lshrrev_b32_e32 v4, 31, v3
; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[6:7], 0, v[4:5]
; GFX942-NEXT: s_brev_b32 s2, -2
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v2, s2, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v3, s2, v[2:3]
; GFX942-NEXT: v_mov_b32_e32 v3, v2
; GFX942-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v4
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: srem64_i32max:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_mul_hi_u32 v2, v0, 3
; GFX1030-NEXT: v_mov_b32_e32 v3, 0
; GFX1030-NEXT: v_ashrrev_i32_e32 v6, 31, v1
; GFX1030-NEXT: v_mad_u64_u32 v[4:5], null, v1, 3, v[2:3]
; GFX1030-NEXT: v_mov_b32_e32 v7, v5
; GFX1030-NEXT: v_mov_b32_e32 v5, v3
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, v6, 3, 0
; GFX1030-NEXT: v_lshl_add_u32 v6, v6, 31, v6
; GFX1030-NEXT: v_mad_u64_u32 v[4:5], null, 0x80000001, v0, v[4:5]
; GFX1030-NEXT: v_add3_u32 v3, v3, v6, v2
; GFX1030-NEXT: v_mov_b32_e32 v4, v5
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, v0, -1, v[2:3]
; GFX1030-NEXT: v_add_co_u32 v4, s4, v7, v4
; GFX1030-NEXT: v_add_co_ci_u32_e64 v5, null, 0, 0, s4
; GFX1030-NEXT: v_sub_nc_u32_e32 v6, v3, v1
; GFX1030-NEXT: v_mad_u64_u32 v[3:4], null, 0x80000001, v1, v[4:5]
; GFX1030-NEXT: v_sub_nc_u32_e32 v5, v6, v0
; GFX1030-NEXT: v_add_co_u32 v2, vcc_lo, v3, v2
; GFX1030-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v4, v5, vcc_lo
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, v0, 1, v[2:3]
; GFX1030-NEXT: v_add_nc_u32_e32 v3, v1, v3
; GFX1030-NEXT: v_ashrrev_i64 v[4:5], 30, v[2:3]
; GFX1030-NEXT: v_lshrrev_b32_e32 v2, 31, v3
; GFX1030-NEXT: v_add_co_u32 v2, vcc_lo, v4, v2
; GFX1030-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, 0, v5, vcc_lo
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0x7fffffff, v2, 0
; GFX1030-NEXT: v_mad_u64_u32 v[3:4], null, 0x7fffffff, v4, v[3:4]
; GFX1030-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
; GFX1030-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%rem = srem i64 %i, 2147483647
ret i64 %rem
}
define noundef i64 @sdiv64_i32max(i64 noundef %i) {
; GFX9-LABEL: sdiv64_i32max:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mul_hi_u32 v2, v0, 3
; GFX9-NEXT: v_mov_b32_e32 v3, 0
; GFX9-NEXT: s_mov_b32 s6, 0x80000001
; GFX9-NEXT: v_ashrrev_i32_e32 v6, 31, v1
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, 3, v[2:3]
; GFX9-NEXT: v_lshl_add_u32 v8, v6, 31, v6
; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v6, 3, 0
; GFX9-NEXT: v_mov_b32_e32 v9, v5
; GFX9-NEXT: v_mov_b32_e32 v5, v3
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, s6, v[4:5]
; GFX9-NEXT: v_add3_u32 v7, v7, v8, v6
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, -1, v[6:7]
; GFX9-NEXT: v_mov_b32_e32 v2, v3
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v9, v2
; GFX9-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, vcc
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v1, s6, v[2:3]
; GFX9-NEXT: v_sub_u32_e32 v5, v5, v1
; GFX9-NEXT: v_sub_u32_e32 v5, v5, v0
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, 1, v[2:3]
; GFX9-NEXT: v_add_u32_e32 v3, v1, v3
; GFX9-NEXT: v_ashrrev_i64 v[0:1], 30, v[2:3]
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 31, v3
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: sdiv64_i32max:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; GFX942-NEXT: v_lshl_add_u32 v4, v2, 31, v2
; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v2, 3, 0
; GFX942-NEXT: v_add3_u32 v3, v3, v4, v2
; GFX942-NEXT: v_mul_hi_u32 v4, v0, 3
; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: v_mad_u64_u32 v[6:7], s[0:1], v1, 3, v[4:5]
; GFX942-NEXT: v_mov_b32_e32 v4, v7
; GFX942-NEXT: v_mov_b32_e32 v7, v5
; GFX942-NEXT: s_mov_b32 s2, 0x80000001
; GFX942-NEXT: v_mad_u64_u32 v[6:7], s[0:1], v0, s2, v[6:7]
; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v0, -1, v[2:3]
; GFX942-NEXT: v_mov_b32_e32 v6, v7
; GFX942-NEXT: v_mov_b32_e32 v7, v5
; GFX942-NEXT: v_sub_u32_e32 v3, v3, v1
; GFX942-NEXT: v_lshl_add_u64 v[6:7], v[4:5], 0, v[6:7]
; GFX942-NEXT: v_sub_u32_e32 v3, v3, v0
; GFX942-NEXT: v_mad_u64_u32 v[6:7], s[0:1], v1, s2, v[6:7]
; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[6:7], 0, v[2:3]
; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v0, 1, v[2:3]
; GFX942-NEXT: v_add_u32_e32 v3, v1, v3
; GFX942-NEXT: v_ashrrev_i64 v[0:1], 30, v[2:3]
; GFX942-NEXT: v_lshrrev_b32_e32 v4, 31, v3
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[4:5]
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: sdiv64_i32max:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_mul_hi_u32 v2, v0, 3
; GFX1030-NEXT: v_mov_b32_e32 v3, 0
; GFX1030-NEXT: v_ashrrev_i32_e32 v6, 31, v1
; GFX1030-NEXT: v_mad_u64_u32 v[4:5], null, v1, 3, v[2:3]
; GFX1030-NEXT: v_mov_b32_e32 v7, v5
; GFX1030-NEXT: v_mov_b32_e32 v5, v3
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, v6, 3, 0
; GFX1030-NEXT: v_lshl_add_u32 v6, v6, 31, v6
; GFX1030-NEXT: v_mad_u64_u32 v[4:5], null, 0x80000001, v0, v[4:5]
; GFX1030-NEXT: v_add3_u32 v3, v3, v6, v2
; GFX1030-NEXT: v_mov_b32_e32 v4, v5
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, v0, -1, v[2:3]
; GFX1030-NEXT: v_add_co_u32 v4, s4, v7, v4
; GFX1030-NEXT: v_add_co_ci_u32_e64 v5, null, 0, 0, s4
; GFX1030-NEXT: v_sub_nc_u32_e32 v6, v3, v1
; GFX1030-NEXT: v_mad_u64_u32 v[3:4], null, 0x80000001, v1, v[4:5]
; GFX1030-NEXT: v_sub_nc_u32_e32 v5, v6, v0
; GFX1030-NEXT: v_add_co_u32 v2, vcc_lo, v3, v2
; GFX1030-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v4, v5, vcc_lo
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, v0, 1, v[2:3]
; GFX1030-NEXT: v_add_nc_u32_e32 v3, v1, v3
; GFX1030-NEXT: v_ashrrev_i64 v[0:1], 30, v[2:3]
; GFX1030-NEXT: v_lshrrev_b32_e32 v2, 31, v3
; GFX1030-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1030-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%div = sdiv i64 %i, 2147483647
ret i64 %div
}
define noundef i64 @urem64_i32max(i64 noundef %i) {
; GFX9-LABEL: urem64_i32max:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mul_hi_u32 v2, v0, 5
; GFX9-NEXT: v_mov_b32_e32 v3, 0
; GFX9-NEXT: s_brev_b32 s6, -2
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, 5, v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v6, v5
; GFX9-NEXT: v_mov_b32_e32 v5, v3
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, 2, v[4:5]
; GFX9-NEXT: v_mov_b32_e32 v2, v3
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v6, v2
; GFX9-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, vcc
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v1, 2, v[2:3]
; GFX9-NEXT: v_sub_co_u32_e32 v4, vcc, v0, v2
; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v1, v3, vcc
; GFX9-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5]
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v4, v2
; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v5, v3, vcc
; GFX9-NEXT: v_alignbit_b32 v2, v4, v2, 30
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, s6, 0
; GFX9-NEXT: v_lshrrev_b32_e32 v4, 30, v4
; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v4, s6, v[3:4]
; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: urem64_i32max:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_mul_hi_u32 v2, v0, 5
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v1, 5, v[2:3]
; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v0, 2, v[4:5]
; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[2:3], 0, v[4:5]
; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v1, 2, v[2:3]
; GFX942-NEXT: v_sub_co_u32_e32 v4, vcc, v0, v2
; GFX942-NEXT: s_brev_b32 s2, -2
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_subb_co_u32_e32 v5, vcc, v1, v3, vcc
; GFX942-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5]
; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[4:5], 0, v[2:3]
; GFX942-NEXT: v_alignbit_b32 v2, v3, v2, 30
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v2, s2, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_lshrrev_b32_e32 v3, 30, v3
; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v3, s2, v[2:3]
; GFX942-NEXT: v_mov_b32_e32 v3, v2
; GFX942-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v4
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: urem64_i32max:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_mul_hi_u32 v2, v0, 5
; GFX1030-NEXT: v_mov_b32_e32 v3, 0
; GFX1030-NEXT: v_mad_u64_u32 v[4:5], null, v1, 5, v[2:3]
; GFX1030-NEXT: v_mov_b32_e32 v6, v5
; GFX1030-NEXT: v_mov_b32_e32 v5, v3
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, v0, 2, v[4:5]
; GFX1030-NEXT: v_mov_b32_e32 v2, v3
; GFX1030-NEXT: v_add_co_u32 v2, s4, v6, v2
; GFX1030-NEXT: v_add_co_ci_u32_e64 v3, null, 0, 0, s4
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, v1, 2, v[2:3]
; GFX1030-NEXT: v_sub_co_u32 v4, vcc_lo, v0, v2
; GFX1030-NEXT: v_sub_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo
; GFX1030-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5]
; GFX1030-NEXT: v_add_co_u32 v2, vcc_lo, v4, v2
; GFX1030-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, v5, v3, vcc_lo
; GFX1030-NEXT: v_alignbit_b32 v2, v4, v2, 30
; GFX1030-NEXT: v_lshrrev_b32_e32 v4, 30, v4
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0x7fffffff, v2, 0
; GFX1030-NEXT: v_mad_u64_u32 v[3:4], null, 0x7fffffff, v4, v[3:4]
; GFX1030-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
; GFX1030-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%rem = urem i64 %i, 2147483647
ret i64 %rem
}
define noundef i64 @udiv64_i32max(i64 noundef %i) {
; GFX9-LABEL: udiv64_i32max:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mul_hi_u32 v2, v0, 5
; GFX9-NEXT: v_mov_b32_e32 v3, 0
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, 5, v[2:3]
; GFX9-NEXT: v_mov_b32_e32 v6, v5
; GFX9-NEXT: v_mov_b32_e32 v5, v3
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, 2, v[4:5]
; GFX9-NEXT: v_mov_b32_e32 v2, v3
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v6, v2
; GFX9-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, vcc
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v1, 2, v[2:3]
; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1]
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX9-NEXT: v_alignbit_b32 v0, v1, v0, 30
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 30, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: udiv64_i32max:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_mul_hi_u32 v2, v0, 5
; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v1, 5, v[2:3]
; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: v_mad_u64_u32 v[4:5], s[0:1], v0, 2, v[4:5]
; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: v_lshl_add_u64 v[2:3], v[2:3], 0, v[4:5]
; GFX942-NEXT: v_mad_u64_u32 v[2:3], s[0:1], v1, 2, v[2:3]
; GFX942-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2
; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
; GFX942-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1]
; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 30
; GFX942-NEXT: v_lshrrev_b32_e32 v1, 30, v1
; GFX942-NEXT: s_setpc_b64 s[30:31]
;
; GFX1030-LABEL: udiv64_i32max:
; GFX1030: ; %bb.0: ; %entry
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: v_mul_hi_u32 v2, v0, 5
; GFX1030-NEXT: v_mov_b32_e32 v3, 0
; GFX1030-NEXT: v_mad_u64_u32 v[4:5], null, v1, 5, v[2:3]
; GFX1030-NEXT: v_mov_b32_e32 v6, v5
; GFX1030-NEXT: v_mov_b32_e32 v5, v3
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, v0, 2, v[4:5]
; GFX1030-NEXT: v_mov_b32_e32 v2, v3
; GFX1030-NEXT: v_add_co_u32 v2, s4, v6, v2
; GFX1030-NEXT: v_add_co_ci_u32_e64 v3, null, 0, 0, s4
; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, v1, 2, v[2:3]
; GFX1030-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
; GFX1030-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX1030-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1]
; GFX1030-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1030-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX1030-NEXT: v_alignbit_b32 v0, v1, v0, 30
; GFX1030-NEXT: v_lshrrev_b32_e32 v1, 30, v1
; GFX1030-NEXT: s_setpc_b64 s[30:31]
entry:
%div = udiv i64 %i, 2147483647
ret i64 %div
}