Files
clang-p2996/llvm/test/CodeGen/AMDGPU/amdgpu-mul24-knownbits.ll
Craig Topper 454256ef4f [AMDGPU] Correct the known bits calculation for MUL_I24.
I'm not entirely sure, but based on how ComputeNumSignBits handles
ISD::MUL, I believe this code was miscounting the number of sign
bits.

As an example of an incorrect result let's say that countMinSignBits
returned 1 for the left hand side and 24 for the right hand side.
LHSValBits would be 23 and RHSValBits would be 0 and the sum would
be 23. This would cause the code to set 9 high bits as zero/one. Now
suppose the real values for the left side is 0x800000 and the right
hand side is 0xffffff. The product is 0x00800000 which has 8 sign bits
not 9.

The number of valid bits for the left and right operands is now
the number of non-sign bits + 1. If the sum of the valid bits of
the left and right sides exceeds 32, then the result may overflow and we
can't say anything about the sign of the result. If the sum is 32
or less then it won't overflow and we know the result has at least
1 sign bit.

For the previous example, the code will now calculate the left
side valid bits as 24 and the right side as 1. The sum will be 25
and the sign bits will be 32 - 25 + 1 which is 8, the correct value.

Differential Revision: https://reviews.llvm.org/D116469
2022-01-14 08:54:54 -08:00

53 lines
1.9 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefix=GCN %s
; RUN: llc -mtriple amdgcn-amd-amdhsa -mcpu=gfx900 -early-live-intervals -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
define weak_odr amdgpu_kernel void @test_mul24_knownbits_kernel(float addrspace(1)* %p) #4 {
; GCN-LABEL: test_mul24_knownbits_kernel:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: v_and_b32_e32 v0, 3, v0
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GCN-NEXT: v_mul_i32_i24_e32 v0, -5, v0
; GCN-NEXT: v_and_b32_e32 v0, 0xffffffe0, v0
; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GCN-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v2, s1
; GCN-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0
; GCN-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: global_store_dword v[0:1], v2, off
; GCN-NEXT: s_endpgm
entry:
%0 = tail call i32 @llvm.amdgcn.workitem.id.x() #28, !range !4
%tid = and i32 %0, 3
%1 = mul nsw i32 %tid, -5
%v1 = and i32 %1, -32
%v2 = sext i32 %v1 to i64
%v3 = getelementptr inbounds float, float addrspace(1)* %p, i64 %v2
store float 0.000, float addrspace(1)* %v3, align 4
ret void
}
define i32 @f(i32 %x, i32 %y) {
; GCN-LABEL: f:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_mov_b32 s4, 0xffff80
; GCN-NEXT: v_or_b32_e32 v0, s4, v0
; GCN-NEXT: v_or_b32_e32 v1, s4, v1
; GCN-NEXT: v_mul_i32_i24_e32 v0, v0, v1
; GCN-NEXT: v_lshrrev_b32_e32 v0, 14, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%xx = or i32 %x, -128 ; 0xffffff80
%yy = or i32 %y, -128 ; 0xffffff80
%r = mul i32 %xx, %yy
%rr = lshr i32 %r, 14
ret i32 %rr
}
; Function Attrs: nounwind readnone speculatable
declare i32 @llvm.amdgcn.workitem.id.x() #20
!4 = !{i32 0, i32 1024}