Correct the legality of i32 mul_lohi on AArch64. Previously, AArch64 incorrectly reported i32 mul_lohi as Legal. This allowed BuildUDIV/SDIV to use them. A later DAGCombiner would replace them with MULHS/MULHU because only the high half was used. This conversion does not check the legality of MULHS/MULHU under the assumption that LegalizeDAG can turn it back into MUL_LOHI later. After they are converted to MULHS/MULHU, DAGCombine ran and saw that these operations aren't supported but an i64 MUL is. So they get converted to that plus a shift. Without this, LegalizeDAG would convert back MUL_LOHI and isel would fail to find a pattern. This patch teaches BuildUDIV/SDIV to create the wide mul and shift so that we can report the correct operation legality on AArch64. It also enables div by constant folding for more cases on VE. I don't know if VE wants this div by constant optimization or not. If they don't want it, they can use the isIntDivCheap hook to disable it. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D150333
56 lines
1.9 KiB
LLVM
56 lines
1.9 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s
|
|
|
|
; This would assert because VE specified that all setcc
|
|
; nodes (even with vector operands) return a scalar value.
|
|
|
|
define <4 x i8> @udiv_by_minus_one(<4 x i8> %x) {
|
|
; CHECK-LABEL: udiv_by_minus_one:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: and %s0, %s0, (56)0
|
|
; CHECK-NEXT: lea %s4, 16843010
|
|
; CHECK-NEXT: muls.l %s0, %s0, %s4
|
|
; CHECK-NEXT: srl %s0, %s0, 32
|
|
; CHECK-NEXT: and %s1, %s1, (56)0
|
|
; CHECK-NEXT: muls.l %s1, %s1, %s4
|
|
; CHECK-NEXT: srl %s1, %s1, 32
|
|
; CHECK-NEXT: and %s2, %s2, (56)0
|
|
; CHECK-NEXT: muls.l %s2, %s2, %s4
|
|
; CHECK-NEXT: srl %s2, %s2, 32
|
|
; CHECK-NEXT: and %s3, %s3, (56)0
|
|
; CHECK-NEXT: muls.l %s3, %s3, %s4
|
|
; CHECK-NEXT: srl %s3, %s3, 32
|
|
; CHECK-NEXT: b.l.t (, %s10)
|
|
%r = udiv <4 x i8> %x, <i8 255, i8 255, i8 255, i8 255>
|
|
ret <4 x i8> %r
|
|
}
|
|
|
|
define <4 x i8> @urem_by_minus_one(<4 x i8> %x) {
|
|
; CHECK-LABEL: urem_by_minus_one:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: and %s0, %s0, (56)0
|
|
; CHECK-NEXT: and %s1, %s1, (56)0
|
|
; CHECK-NEXT: and %s2, %s2, (56)0
|
|
; CHECK-NEXT: and %s3, %s3, (56)0
|
|
; CHECK-NEXT: lea %s4, 16843010
|
|
; CHECK-NEXT: muls.l %s5, %s3, %s4
|
|
; CHECK-NEXT: srl %s5, %s5, 32
|
|
; CHECK-NEXT: muls.w.sx %s5, %s5, (56)0
|
|
; CHECK-NEXT: subs.w.sx %s3, %s3, %s5
|
|
; CHECK-NEXT: muls.l %s5, %s2, %s4
|
|
; CHECK-NEXT: srl %s5, %s5, 32
|
|
; CHECK-NEXT: muls.w.sx %s5, %s5, (56)0
|
|
; CHECK-NEXT: subs.w.sx %s2, %s2, %s5
|
|
; CHECK-NEXT: muls.l %s5, %s1, %s4
|
|
; CHECK-NEXT: srl %s5, %s5, 32
|
|
; CHECK-NEXT: muls.w.sx %s5, %s5, (56)0
|
|
; CHECK-NEXT: subs.w.sx %s1, %s1, %s5
|
|
; CHECK-NEXT: muls.l %s4, %s0, %s4
|
|
; CHECK-NEXT: srl %s4, %s4, 32
|
|
; CHECK-NEXT: muls.w.sx %s4, %s4, (56)0
|
|
; CHECK-NEXT: subs.w.sx %s0, %s0, %s4
|
|
; CHECK-NEXT: b.l.t (, %s10)
|
|
%r = urem <4 x i8> %x, <i8 255, i8 255, i8 255, i8 255>
|
|
ret <4 x i8> %r
|
|
}
|