This is a fix for a regression discussed in:
https://github.com/llvm/llvm-project/issues/53829
We cleared more high multiplier bits with 995d400,
but that can lead to worse codegen because we would fail
to recognize the now disguised multiplication by neg-power-of-2
as a shift-left. The problem exists independently of the IR
change in the case that the multiply already had cleared high
bits. We also convert shl+sub into mul+add in instcombine's
negator.
This patch fills in the high-bits to see the shift transform
opportunity. Alive2 attempt to show correctness:
https://alive2.llvm.org/ce/z/GgSKVX
The AArch64, RISCV, and MIPS diffs look like clear wins. The
x86 code requires an extra move register in the minimal examples,
but it's still an improvement to get rid of the multiply on all
CPUs that I am aware of (because multiply is never as fast as a
shift).
There's a potential follow-up noted by the TODO comment. We
should already convert that pattern into shl+add in IR, so
it's probably not common:
https://alive2.llvm.org/ce/z/7QY_Ga
Fixes #53829
Differential Revision: https://reviews.llvm.org/D120216
31 lines
945 B
LLVM
31 lines
945 B
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s
|
|
|
|
define i64 @muladd_demand(i64 %x, i64 %y) {
|
|
; CHECK-LABEL: muladd_demand:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: movq %rsi, %rax
|
|
; CHECK-NEXT: shll $6, %edi
|
|
; CHECK-NEXT: subl %edi, %eax
|
|
; CHECK-NEXT: shlq $47, %rax
|
|
; CHECK-NEXT: retq
|
|
%m = mul i64 %x, 131008 ; 0x0001ffc0
|
|
%a = add i64 %m, %y
|
|
%r = shl i64 %a, 47
|
|
ret i64 %r
|
|
}
|
|
|
|
define <2 x i64> @muladd_demand_commute(<2 x i64> %x, <2 x i64> %y) {
|
|
; CHECK-LABEL: muladd_demand_commute:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: psllq $6, %xmm0
|
|
; CHECK-NEXT: psubq %xmm0, %xmm1
|
|
; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
|
; CHECK-NEXT: movdqa %xmm1, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%m = mul <2 x i64> %x, <i64 131008, i64 131008>
|
|
%a = add <2 x i64> %y, %m
|
|
%r = and <2 x i64> %a, <i64 131071, i64 131071>
|
|
ret <2 x i64> %r
|
|
}
|