The previous expansion of [US]CMP was done using two selects and two compares. It produced decent code, but on many platforms it is better to implement [US]CMP nodes by performing the following operation: ``` [us]cmp(x, y) = (x [us]> y) - (x [us]< y) ``` This patch adds this new expansion, as well as a hook in TargetLowering to allow some targets to still use the select-based approach. AArch64 and SystemZ are currently the only targets to prefer the former approach, but other targets may also start to use it if it provides for better codegen.
152 lines
4.2 KiB
LLVM
152 lines
4.2 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: llc -mtriple=thumbv7-apple-darwin %s -o - | FileCheck %s
|
|
|
|
define i8 @ucmp_8_8(i8 zeroext %x, i8 zeroext %y) nounwind {
|
|
; CHECK-LABEL: ucmp_8_8:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: cmp r0, r1
|
|
; CHECK-NEXT: mov.w r0, #0
|
|
; CHECK-NEXT: mov.w r2, #0
|
|
; CHECK-NEXT: it lo
|
|
; CHECK-NEXT: movlo r0, #1
|
|
; CHECK-NEXT: it hi
|
|
; CHECK-NEXT: movhi r2, #1
|
|
; CHECK-NEXT: subs r0, r2, r0
|
|
; CHECK-NEXT: bx lr
|
|
%1 = call i8 @llvm.ucmp(i8 %x, i8 %y)
|
|
ret i8 %1
|
|
}
|
|
|
|
define i8 @ucmp_8_16(i16 zeroext %x, i16 zeroext %y) nounwind {
|
|
; CHECK-LABEL: ucmp_8_16:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: cmp r0, r1
|
|
; CHECK-NEXT: mov.w r0, #0
|
|
; CHECK-NEXT: mov.w r2, #0
|
|
; CHECK-NEXT: it lo
|
|
; CHECK-NEXT: movlo r0, #1
|
|
; CHECK-NEXT: it hi
|
|
; CHECK-NEXT: movhi r2, #1
|
|
; CHECK-NEXT: subs r0, r2, r0
|
|
; CHECK-NEXT: bx lr
|
|
%1 = call i8 @llvm.ucmp(i16 %x, i16 %y)
|
|
ret i8 %1
|
|
}
|
|
|
|
define i8 @ucmp_8_32(i32 %x, i32 %y) nounwind {
|
|
; CHECK-LABEL: ucmp_8_32:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: cmp r0, r1
|
|
; CHECK-NEXT: mov.w r0, #0
|
|
; CHECK-NEXT: mov.w r2, #0
|
|
; CHECK-NEXT: it lo
|
|
; CHECK-NEXT: movlo r0, #1
|
|
; CHECK-NEXT: it hi
|
|
; CHECK-NEXT: movhi r2, #1
|
|
; CHECK-NEXT: subs r0, r2, r0
|
|
; CHECK-NEXT: bx lr
|
|
%1 = call i8 @llvm.ucmp(i32 %x, i32 %y)
|
|
ret i8 %1
|
|
}
|
|
|
|
define i8 @ucmp_8_64(i64 %x, i64 %y) nounwind {
|
|
; CHECK-LABEL: ucmp_8_64:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: subs.w r12, r0, r2
|
|
; CHECK-NEXT: mov.w r9, #0
|
|
; CHECK-NEXT: sbcs.w r12, r1, r3
|
|
; CHECK-NEXT: mov.w r12, #0
|
|
; CHECK-NEXT: it lo
|
|
; CHECK-NEXT: movlo.w r12, #1
|
|
; CHECK-NEXT: subs r0, r2, r0
|
|
; CHECK-NEXT: sbcs.w r0, r3, r1
|
|
; CHECK-NEXT: it lo
|
|
; CHECK-NEXT: movlo.w r9, #1
|
|
; CHECK-NEXT: sub.w r0, r9, r12
|
|
; CHECK-NEXT: bx lr
|
|
%1 = call i8 @llvm.ucmp(i64 %x, i64 %y)
|
|
ret i8 %1
|
|
}
|
|
|
|
define i8 @ucmp_8_128(i128 %x, i128 %y) nounwind {
|
|
; CHECK-LABEL: ucmp_8_128:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: push {r4, r5, r6, lr}
|
|
; CHECK-NEXT: add.w lr, sp, #16
|
|
; CHECK-NEXT: ldr r4, [sp, #28]
|
|
; CHECK-NEXT: movs r5, #0
|
|
; CHECK-NEXT: ldm.w lr, {r9, r12, lr}
|
|
; CHECK-NEXT: subs.w r6, r0, r9
|
|
; CHECK-NEXT: sbcs.w r6, r1, r12
|
|
; CHECK-NEXT: sbcs.w r6, r2, lr
|
|
; CHECK-NEXT: sbcs.w r6, r3, r4
|
|
; CHECK-NEXT: mov.w r6, #0
|
|
; CHECK-NEXT: it lo
|
|
; CHECK-NEXT: movlo r6, #1
|
|
; CHECK-NEXT: subs.w r0, r9, r0
|
|
; CHECK-NEXT: sbcs.w r0, r12, r1
|
|
; CHECK-NEXT: sbcs.w r0, lr, r2
|
|
; CHECK-NEXT: sbcs.w r0, r4, r3
|
|
; CHECK-NEXT: it lo
|
|
; CHECK-NEXT: movlo r5, #1
|
|
; CHECK-NEXT: subs r0, r5, r6
|
|
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
|
%1 = call i8 @llvm.ucmp(i128 %x, i128 %y)
|
|
ret i8 %1
|
|
}
|
|
|
|
define i32 @ucmp_32_32(i32 %x, i32 %y) nounwind {
|
|
; CHECK-LABEL: ucmp_32_32:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: cmp r0, r1
|
|
; CHECK-NEXT: mov.w r0, #0
|
|
; CHECK-NEXT: mov.w r2, #0
|
|
; CHECK-NEXT: it lo
|
|
; CHECK-NEXT: movlo r0, #1
|
|
; CHECK-NEXT: it hi
|
|
; CHECK-NEXT: movhi r2, #1
|
|
; CHECK-NEXT: subs r0, r2, r0
|
|
; CHECK-NEXT: bx lr
|
|
%1 = call i32 @llvm.ucmp(i32 %x, i32 %y)
|
|
ret i32 %1
|
|
}
|
|
|
|
define i32 @ucmp_32_64(i64 %x, i64 %y) nounwind {
|
|
; CHECK-LABEL: ucmp_32_64:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: subs.w r12, r0, r2
|
|
; CHECK-NEXT: mov.w r9, #0
|
|
; CHECK-NEXT: sbcs.w r12, r1, r3
|
|
; CHECK-NEXT: mov.w r12, #0
|
|
; CHECK-NEXT: it lo
|
|
; CHECK-NEXT: movlo.w r12, #1
|
|
; CHECK-NEXT: subs r0, r2, r0
|
|
; CHECK-NEXT: sbcs.w r0, r3, r1
|
|
; CHECK-NEXT: it lo
|
|
; CHECK-NEXT: movlo.w r9, #1
|
|
; CHECK-NEXT: sub.w r0, r9, r12
|
|
; CHECK-NEXT: bx lr
|
|
%1 = call i32 @llvm.ucmp(i64 %x, i64 %y)
|
|
ret i32 %1
|
|
}
|
|
|
|
define i64 @ucmp_64_64(i64 %x, i64 %y) nounwind {
|
|
; CHECK-LABEL: ucmp_64_64:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: subs.w r12, r0, r2
|
|
; CHECK-NEXT: mov.w r9, #0
|
|
; CHECK-NEXT: sbcs.w r12, r1, r3
|
|
; CHECK-NEXT: mov.w r12, #0
|
|
; CHECK-NEXT: it lo
|
|
; CHECK-NEXT: movlo.w r12, #1
|
|
; CHECK-NEXT: subs r0, r2, r0
|
|
; CHECK-NEXT: sbcs.w r0, r3, r1
|
|
; CHECK-NEXT: it lo
|
|
; CHECK-NEXT: movlo.w r9, #1
|
|
; CHECK-NEXT: sub.w r0, r9, r12
|
|
; CHECK-NEXT: asrs r1, r0, #31
|
|
; CHECK-NEXT: bx lr
|
|
%1 = call i64 @llvm.ucmp(i64 %x, i64 %y)
|
|
ret i64 %1
|
|
}
|