LLVM normally prefers to minimize the number of bits set in an AND immediate, but that doesn't always match the available ARM instructions. In Thumb1 mode, prefer uxtb or uxth where possible; otherwise, prefer a two-instruction sequence movs+ands or movs+bics. Some potential improvements outlined in ARMTargetLowering::targetShrinkDemandedConstant, but seems to work pretty well already. The ARMISelDAGToDAG fix ensures we don't generate an invalid UBFX instruction due to a larger-than-expected mask. (It's orthogonal, in some sense, but as far as I can tell it's either impossible or nearly impossible to reproduce the bug without this change.) According to my testing, this seems to consistently improve codesize by a small amount by forming bic more often for ISD::AND with an immediate. Differential Revision: https://reviews.llvm.org/D50030 llvm-svn: 339472
27 lines
870 B
LLVM
27 lines
870 B
LLVM
; RUN: llc -O0 -mtriple thumbv7-windows-itanium -filetype asm -o - %s | FileCheck %s
|
|
; RUN: llc -O0 -mtriple thumbv7-windows-msvc -filetype asm -o - %s | FileCheck %s
|
|
; RUN: llc -O0 -mtriple thumbv7-windows-mingw32 -filetype asm -o - %s | FileCheck %s
|
|
|
|
declare arm_aapcs_vfpcc i32 @num_entries()
|
|
|
|
define arm_aapcs_vfpcc void @test___builtin_alloca() {
|
|
entry:
|
|
%array = alloca i8*, align 4
|
|
%call = call arm_aapcs_vfpcc i32 @num_entries()
|
|
%mul = mul i32 4, %call
|
|
%0 = alloca i8, i32 %mul
|
|
store i8* %0, i8** %array, align 4
|
|
ret void
|
|
}
|
|
|
|
; CHECK: bl num_entries
|
|
; Any register is actually valid here, but turns out we use lr,
|
|
; because we do not have the kill flag on R0.
|
|
; CHECK: mov.w [[R1:lr]], #7
|
|
; CHECK: add.w [[R0:r[0-9]+]], [[R1]], [[R0]], lsl #2
|
|
; CHECK: bic [[R0]], [[R0]], #4
|
|
; CHECK: lsrs r4, [[R0]], #2
|
|
; CHECK: bl __chkstk
|
|
; CHECK: sub.w sp, sp, r4
|
|
|