Files
clang-p2996/llvm/test/CodeGen/RISCV/pr51206.ll
Craig Topper 6f7de819b9 [RISCV] Use MULHU for more division by constant cases.
D113805 improved handling of i32 divu/remu on RV64. The basic idea
from that can be extended to (mul (and X, C2), C1) where C2 is any
mask constant.

We can replace the and with an SLLI by shifting by the number of
leading zeros in C2 if we also shift C1 left by XLen - lzcnt(C1)
bits. This will give the full product XLen additional trailing zeros,
putting the result in the output of MULHU. If we can't use ANDI,
ZEXT.H, or ZEXT.W, this will avoid materializing C2 in a register.

The downside is it make take 1 additional instruction to create C1.
But since that's not on the critical path, it can hopefully be
interleaved with other operations.

The previous tablegen pattern is replaced by custom isel code.

Reviewed By: asb

Differential Revision: https://reviews.llvm.org/D115310
2021-12-09 09:10:14 -08:00

64 lines
2.1 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
;RUN: llc < %s -mtriple=riscv64-unknown-linux-gnu -mattr=+m | FileCheck %s
; This test used to cause an infinite loop.
@global = global i8 0, align 1
@global.1 = global i32 0, align 4
@global.2 = global i8 0, align 1
@global.3 = global i32 0, align 4
define signext i32 @wobble() nounwind {
; CHECK-LABEL: wobble:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; CHECK-NEXT: lui a0, %hi(global)
; CHECK-NEXT: lbu a0, %lo(global)(a0)
; CHECK-NEXT: lui a1, %hi(global.2)
; CHECK-NEXT: lbu a1, %lo(global.2)(a1)
; CHECK-NEXT: addi a0, a0, 1
; CHECK-NEXT: lui a2, %hi(global.1)
; CHECK-NEXT: sw a0, %lo(global.1)(a2)
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: slli a1, a0, 48
; CHECK-NEXT: lui a2, 52429
; CHECK-NEXT: slli a2, a2, 4
; CHECK-NEXT: mulhu a1, a1, a2
; CHECK-NEXT: srli a1, a1, 18
; CHECK-NEXT: lui a2, %hi(global.3)
; CHECK-NEXT: li a3, 5
; CHECK-NEXT: sw a1, %lo(global.3)(a2)
; CHECK-NEXT: bltu a0, a3, .LBB0_2
; CHECK-NEXT: # %bb.1: # %bb10
; CHECK-NEXT: call quux@plt
; CHECK-NEXT: .LBB0_2: # %bb12
; CHECK-NEXT: li a0, 0
; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
bb:
%tmp = load i8, i8* @global, align 1
%tmp1 = zext i8 %tmp to i32
%tmp2 = add nuw nsw i32 %tmp1, 1
store i32 %tmp2, i32* @global.1, align 4
%tmp3 = load i8, i8* @global.2, align 1
%tmp4 = zext i8 %tmp3 to i32
%tmp5 = mul nuw nsw i32 %tmp2, %tmp4
%tmp6 = trunc i32 %tmp5 to i16
%tmp7 = udiv i16 %tmp6, 5
%tmp8 = zext i16 %tmp7 to i32
store i32 %tmp8, i32* @global.3, align 4
%tmp9 = icmp ult i32 %tmp5, 5
br i1 %tmp9, label %bb12, label %bb10
bb10: ; preds = %bb
%tmp11 = tail call signext i32 bitcast (i32 (...)* @quux to i32 ()*)()
br label %bb12
bb12: ; preds = %bb10, %bb
ret i32 undef
}
declare signext i32 @quux(...)