Files
clang-p2996/llvm/test/CodeGen/Thumb/umul_fix.ll
PiJoules a356e6ccad [SelectionDAG] Expand fixed point multiplication into libcall (#79352)
32-bit ARMv6 with thumb doesn't support MULHS/MUL_LOHI as legal/custom
nodes during expansion which will cause fixed point multiplication of
_Accum types to fail with fixed point arithmetic. Prior to this, we just
happen to use fixed point multiplication on platforms that happen to
support these MULHS/MUL_LOHI.

This patch attempts to check if the multiplication can be done via
libcalls, which are provided by the arm runtime. These libcall attempts
are made elsewhere, so this patch refactors that libcall logic into its
own functions and the fixed point expansion calls and reuses that logic.
2024-01-30 13:58:55 -08:00

376 lines
12 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=thumbv6m-none-unknown-eabi -mcpu=cortex-m0 | FileCheck %s --check-prefix=ARM
declare i4 @llvm.umul.fix.i4 (i4, i4, i32)
declare i32 @llvm.umul.fix.i32 (i32, i32, i32)
declare i64 @llvm.umul.fix.i64 (i64, i64, i32)
define i32 @func(i32 %x, i32 %y) nounwind {
; ARM-LABEL: func:
; ARM: @ %bb.0:
; ARM-NEXT: .save {r7, lr}
; ARM-NEXT: push {r7, lr}
; ARM-NEXT: mov r2, r1
; ARM-NEXT: movs r1, #0
; ARM-NEXT: mov r3, r1
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: lsrs r0, r0, #2
; ARM-NEXT: lsls r1, r1, #30
; ARM-NEXT: adds r0, r1, r0
; ARM-NEXT: pop {r7, pc}
%tmp = call i32 @llvm.umul.fix.i32(i32 %x, i32 %y, i32 2)
ret i32 %tmp
}
define i64 @func2(i64 %x, i64 %y) nounwind {
; ARM-LABEL: func2:
; ARM: @ %bb.0:
; ARM-NEXT: .save {r4, r5, r6, r7, lr}
; ARM-NEXT: push {r4, r5, r6, r7, lr}
; ARM-NEXT: .pad #28
; ARM-NEXT: sub sp, #28
; ARM-NEXT: str r3, [sp, #8] @ 4-byte Spill
; ARM-NEXT: mov r4, r1
; ARM-NEXT: str r1, [sp, #4] @ 4-byte Spill
; ARM-NEXT: movs r6, #0
; ARM-NEXT: mov r5, r0
; ARM-NEXT: str r0, [sp, #12] @ 4-byte Spill
; ARM-NEXT: mov r1, r6
; ARM-NEXT: mov r7, r2
; ARM-NEXT: str r2, [sp, #20] @ 4-byte Spill
; ARM-NEXT: mov r3, r6
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: str r0, [sp, #24] @ 4-byte Spill
; ARM-NEXT: str r1, [sp, #16] @ 4-byte Spill
; ARM-NEXT: mov r0, r4
; ARM-NEXT: mov r1, r6
; ARM-NEXT: mov r2, r7
; ARM-NEXT: mov r3, r6
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: mov r4, r1
; ARM-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
; ARM-NEXT: adds r0, r0, r1
; ARM-NEXT: str r0, [sp, #16] @ 4-byte Spill
; ARM-NEXT: adcs r4, r6
; ARM-NEXT: mov r0, r5
; ARM-NEXT: mov r1, r6
; ARM-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
; ARM-NEXT: mov r2, r5
; ARM-NEXT: mov r3, r6
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: mov r7, r1
; ARM-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
; ARM-NEXT: adds r0, r0, r1
; ARM-NEXT: str r0, [sp, #16] @ 4-byte Spill
; ARM-NEXT: adcs r7, r4
; ARM-NEXT: ldr r4, [sp, #4] @ 4-byte Reload
; ARM-NEXT: mov r0, r4
; ARM-NEXT: mov r1, r6
; ARM-NEXT: mov r2, r5
; ARM-NEXT: mov r3, r6
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: adds r7, r0, r7
; ARM-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
; ARM-NEXT: mov r1, r5
; ARM-NEXT: mov r2, r6
; ARM-NEXT: mov r3, r6
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: mov r5, r0
; ARM-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
; ARM-NEXT: mov r1, r4
; ARM-NEXT: mov r2, r6
; ARM-NEXT: mov r3, r6
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: adds r0, r0, r5
; ARM-NEXT: adds r0, r7, r0
; ARM-NEXT: lsls r0, r0, #30
; ARM-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
; ARM-NEXT: lsrs r1, r2, #2
; ARM-NEXT: adds r1, r0, r1
; ARM-NEXT: lsls r0, r2, #30
; ARM-NEXT: ldr r2, [sp, #24] @ 4-byte Reload
; ARM-NEXT: lsrs r2, r2, #2
; ARM-NEXT: adds r0, r0, r2
; ARM-NEXT: add sp, #28
; ARM-NEXT: pop {r4, r5, r6, r7, pc}
%tmp = call i64 @llvm.umul.fix.i64(i64 %x, i64 %y, i32 2)
ret i64 %tmp
}
define i4 @func3(i4 %x, i4 %y) nounwind {
; ARM-LABEL: func3:
; ARM: @ %bb.0:
; ARM-NEXT: .save {r7, lr}
; ARM-NEXT: push {r7, lr}
; ARM-NEXT: movs r2, #15
; ARM-NEXT: ands r0, r2
; ARM-NEXT: ands r2, r1
; ARM-NEXT: movs r1, #0
; ARM-NEXT: mov r3, r1
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: lsrs r0, r0, #2
; ARM-NEXT: lsls r1, r1, #30
; ARM-NEXT: adds r0, r1, r0
; ARM-NEXT: pop {r7, pc}
%tmp = call i4 @llvm.umul.fix.i4(i4 %x, i4 %y, i32 2)
ret i4 %tmp
}
;; These result in regular integer multiplication
define i32 @func4(i32 %x, i32 %y) nounwind {
; ARM-LABEL: func4:
; ARM: @ %bb.0:
; ARM-NEXT: muls r0, r1, r0
; ARM-NEXT: bx lr
%tmp = call i32 @llvm.umul.fix.i32(i32 %x, i32 %y, i32 0)
ret i32 %tmp
}
define i64 @func5(i64 %x, i64 %y) nounwind {
; ARM-LABEL: func5:
; ARM: @ %bb.0:
; ARM-NEXT: .save {r7, lr}
; ARM-NEXT: push {r7, lr}
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: pop {r7, pc}
%tmp = call i64 @llvm.umul.fix.i64(i64 %x, i64 %y, i32 0)
ret i64 %tmp
}
define i4 @func6(i4 %x, i4 %y) nounwind {
; ARM-LABEL: func6:
; ARM: @ %bb.0:
; ARM-NEXT: movs r2, #15
; ARM-NEXT: ands r1, r2
; ARM-NEXT: ands r0, r2
; ARM-NEXT: muls r0, r1, r0
; ARM-NEXT: bx lr
%tmp = call i4 @llvm.umul.fix.i4(i4 %x, i4 %y, i32 0)
ret i4 %tmp
}
define i64 @func7(i64 %x, i64 %y) nounwind {
; ARM-LABEL: func7:
; ARM: @ %bb.0:
; ARM-NEXT: .save {r4, r5, r6, r7, lr}
; ARM-NEXT: push {r4, r5, r6, r7, lr}
; ARM-NEXT: .pad #20
; ARM-NEXT: sub sp, #20
; ARM-NEXT: str r3, [sp, #4] @ 4-byte Spill
; ARM-NEXT: mov r7, r1
; ARM-NEXT: str r1, [sp] @ 4-byte Spill
; ARM-NEXT: movs r5, #0
; ARM-NEXT: mov r4, r0
; ARM-NEXT: str r0, [sp, #8] @ 4-byte Spill
; ARM-NEXT: mov r1, r5
; ARM-NEXT: mov r6, r2
; ARM-NEXT: str r2, [sp, #16] @ 4-byte Spill
; ARM-NEXT: mov r3, r5
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: str r1, [sp, #12] @ 4-byte Spill
; ARM-NEXT: mov r0, r7
; ARM-NEXT: mov r1, r5
; ARM-NEXT: mov r2, r6
; ARM-NEXT: mov r3, r5
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: mov r7, r1
; ARM-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
; ARM-NEXT: adds r0, r0, r1
; ARM-NEXT: str r0, [sp, #12] @ 4-byte Spill
; ARM-NEXT: adcs r7, r5
; ARM-NEXT: mov r0, r4
; ARM-NEXT: mov r1, r5
; ARM-NEXT: ldr r4, [sp, #4] @ 4-byte Reload
; ARM-NEXT: mov r2, r4
; ARM-NEXT: mov r3, r5
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: mov r6, r1
; ARM-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
; ARM-NEXT: adds r0, r0, r1
; ARM-NEXT: str r0, [sp, #12] @ 4-byte Spill
; ARM-NEXT: adcs r6, r7
; ARM-NEXT: ldr r7, [sp] @ 4-byte Reload
; ARM-NEXT: mov r0, r7
; ARM-NEXT: mov r1, r5
; ARM-NEXT: mov r2, r4
; ARM-NEXT: mov r3, r5
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: adds r6, r0, r6
; ARM-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
; ARM-NEXT: mov r1, r4
; ARM-NEXT: mov r2, r5
; ARM-NEXT: mov r3, r5
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: mov r4, r0
; ARM-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
; ARM-NEXT: mov r1, r7
; ARM-NEXT: mov r2, r5
; ARM-NEXT: mov r3, r5
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: adds r0, r0, r4
; ARM-NEXT: adds r1, r6, r0
; ARM-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
; ARM-NEXT: add sp, #20
; ARM-NEXT: pop {r4, r5, r6, r7, pc}
%tmp = call i64 @llvm.umul.fix.i64(i64 %x, i64 %y, i32 32)
ret i64 %tmp
}
define i64 @func8(i64 %x, i64 %y) nounwind {
; ARM-LABEL: func8:
; ARM: @ %bb.0:
; ARM-NEXT: .save {r4, r5, r6, r7, lr}
; ARM-NEXT: push {r4, r5, r6, r7, lr}
; ARM-NEXT: .pad #28
; ARM-NEXT: sub sp, #28
; ARM-NEXT: str r3, [sp, #24] @ 4-byte Spill
; ARM-NEXT: mov r7, r2
; ARM-NEXT: str r2, [sp, #16] @ 4-byte Spill
; ARM-NEXT: mov r4, r1
; ARM-NEXT: str r1, [sp, #8] @ 4-byte Spill
; ARM-NEXT: movs r5, #0
; ARM-NEXT: mov r6, r0
; ARM-NEXT: str r0, [sp, #12] @ 4-byte Spill
; ARM-NEXT: mov r1, r5
; ARM-NEXT: mov r3, r5
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: str r1, [sp, #20] @ 4-byte Spill
; ARM-NEXT: mov r0, r4
; ARM-NEXT: mov r1, r5
; ARM-NEXT: mov r2, r7
; ARM-NEXT: mov r3, r5
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: mov r7, r1
; ARM-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
; ARM-NEXT: adds r4, r0, r1
; ARM-NEXT: adcs r7, r5
; ARM-NEXT: mov r0, r6
; ARM-NEXT: mov r1, r5
; ARM-NEXT: ldr r6, [sp, #24] @ 4-byte Reload
; ARM-NEXT: mov r2, r6
; ARM-NEXT: mov r3, r5
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: adds r0, r0, r4
; ARM-NEXT: str r0, [sp, #20] @ 4-byte Spill
; ARM-NEXT: adcs r1, r5
; ARM-NEXT: adds r0, r7, r1
; ARM-NEXT: str r0, [sp, #4] @ 4-byte Spill
; ARM-NEXT: mov r4, r5
; ARM-NEXT: adcs r4, r5
; ARM-NEXT: ldr r7, [sp, #8] @ 4-byte Reload
; ARM-NEXT: mov r0, r7
; ARM-NEXT: mov r1, r5
; ARM-NEXT: mov r2, r6
; ARM-NEXT: mov r3, r5
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: mov r6, r1
; ARM-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
; ARM-NEXT: adds r0, r0, r1
; ARM-NEXT: str r0, [sp, #4] @ 4-byte Spill
; ARM-NEXT: adcs r6, r4
; ARM-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
; ARM-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
; ARM-NEXT: mov r2, r5
; ARM-NEXT: mov r3, r5
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: mov r4, r0
; ARM-NEXT: str r1, [sp, #24] @ 4-byte Spill
; ARM-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
; ARM-NEXT: mov r1, r7
; ARM-NEXT: mov r2, r5
; ARM-NEXT: mov r3, r5
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: adds r0, r0, r4
; ARM-NEXT: ldr r2, [sp, #24] @ 4-byte Reload
; ARM-NEXT: adcs r1, r2
; ARM-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
; ARM-NEXT: adds r0, r2, r0
; ARM-NEXT: adcs r1, r6
; ARM-NEXT: lsls r1, r1, #1
; ARM-NEXT: lsrs r2, r0, #31
; ARM-NEXT: adds r1, r1, r2
; ARM-NEXT: lsls r0, r0, #1
; ARM-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
; ARM-NEXT: lsrs r2, r2, #31
; ARM-NEXT: adds r0, r0, r2
; ARM-NEXT: add sp, #28
; ARM-NEXT: pop {r4, r5, r6, r7, pc}
%tmp = call i64 @llvm.umul.fix.i64(i64 %x, i64 %y, i32 63)
ret i64 %tmp
}
define i64 @func9(i64 %x, i64 %y) nounwind {
; ARM-LABEL: func9:
; ARM: @ %bb.0:
; ARM-NEXT: .save {r4, r5, r6, r7, lr}
; ARM-NEXT: push {r4, r5, r6, r7, lr}
; ARM-NEXT: .pad #20
; ARM-NEXT: sub sp, #20
; ARM-NEXT: str r3, [sp, #16] @ 4-byte Spill
; ARM-NEXT: mov r7, r2
; ARM-NEXT: str r2, [sp, #12] @ 4-byte Spill
; ARM-NEXT: mov r4, r1
; ARM-NEXT: str r1, [sp, #4] @ 4-byte Spill
; ARM-NEXT: movs r5, #0
; ARM-NEXT: mov r6, r0
; ARM-NEXT: str r0, [sp, #8] @ 4-byte Spill
; ARM-NEXT: mov r1, r5
; ARM-NEXT: mov r3, r5
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: str r1, [sp] @ 4-byte Spill
; ARM-NEXT: mov r0, r4
; ARM-NEXT: mov r1, r5
; ARM-NEXT: mov r2, r7
; ARM-NEXT: mov r3, r5
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: mov r7, r1
; ARM-NEXT: ldr r1, [sp] @ 4-byte Reload
; ARM-NEXT: adds r4, r0, r1
; ARM-NEXT: adcs r7, r5
; ARM-NEXT: mov r0, r6
; ARM-NEXT: mov r1, r5
; ARM-NEXT: ldr r6, [sp, #16] @ 4-byte Reload
; ARM-NEXT: mov r2, r6
; ARM-NEXT: mov r3, r5
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: adds r0, r0, r4
; ARM-NEXT: adcs r1, r5
; ARM-NEXT: adds r0, r7, r1
; ARM-NEXT: str r0, [sp] @ 4-byte Spill
; ARM-NEXT: mov r4, r5
; ARM-NEXT: adcs r4, r5
; ARM-NEXT: ldr r7, [sp, #4] @ 4-byte Reload
; ARM-NEXT: mov r0, r7
; ARM-NEXT: mov r1, r5
; ARM-NEXT: mov r2, r6
; ARM-NEXT: mov r3, r5
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: mov r6, r1
; ARM-NEXT: ldr r1, [sp] @ 4-byte Reload
; ARM-NEXT: adds r0, r0, r1
; ARM-NEXT: str r0, [sp] @ 4-byte Spill
; ARM-NEXT: adcs r6, r4
; ARM-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
; ARM-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
; ARM-NEXT: mov r2, r5
; ARM-NEXT: mov r3, r5
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: mov r4, r0
; ARM-NEXT: str r1, [sp, #16] @ 4-byte Spill
; ARM-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
; ARM-NEXT: mov r1, r7
; ARM-NEXT: mov r2, r5
; ARM-NEXT: mov r3, r5
; ARM-NEXT: bl __aeabi_lmul
; ARM-NEXT: adds r0, r0, r4
; ARM-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
; ARM-NEXT: adcs r1, r2
; ARM-NEXT: ldr r2, [sp] @ 4-byte Reload
; ARM-NEXT: adds r0, r2, r0
; ARM-NEXT: adcs r1, r6
; ARM-NEXT: add sp, #20
; ARM-NEXT: pop {r4, r5, r6, r7, pc}
%tmp = call i64 @llvm.umul.fix.i64(i64 %x, i64 %y, i32 64)
ret i64 %tmp
}