Files
clang-p2996/llvm/test/CodeGen/AArch64/parity.ll
Ties Stuij 82a5f1c62b [AArch64] use CNT for ISD::popcnt and ISD::parity if available
These are the two places where we explicitly want to use cnt in
SelectionDAG when feature CSSC is available: ISD::popcnt and ISD::parity

For both, we need to make sure we're emitting optimized code for i32 (and
lower), i64 and i128. The most optimal way is of course using the GPR CNT
instruction. If we don't have CSSC, but we do have neon, we'll use floating
point CNT. If all fails, we'll fall back on the general GPR popcnt and parity
implementations.

spec:
https://developer.arm.com/documentation/ddi0602/2022-09/Base-Instructions/CNT--Count-bits-

Reviewed By: lenary

Differential Revision: https://reviews.llvm.org/D138808
2022-12-02 11:27:14 +00:00

250 lines
6.9 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s
; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu -mattr=+cssc | FileCheck %s -check-prefix=CHECK-CSSC
define i4 @parity_4(i4 %x) {
; CHECK-LABEL: parity_4:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xf
; CHECK-NEXT: eor w8, w8, w8, lsr #2
; CHECK-NEXT: eor w8, w8, w8, lsr #1
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
;
; CHECK-CSSC-LABEL: parity_4:
; CHECK-CSSC: // %bb.0:
; CHECK-CSSC-NEXT: and w8, w0, #0xf
; CHECK-CSSC-NEXT: cnt w8, w8
; CHECK-CSSC-NEXT: and w0, w8, #0x1
; CHECK-CSSC-NEXT: ret
%1 = tail call i4 @llvm.ctpop.i4(i4 %x)
%2 = and i4 %1, 1
ret i4 %2
}
define i8 @parity_8(i8 %x) {
; CHECK-LABEL: parity_8:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: eor w8, w8, w8, lsr #4
; CHECK-NEXT: eor w8, w8, w8, lsr #2
; CHECK-NEXT: eor w8, w8, w8, lsr #1
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
;
; CHECK-CSSC-LABEL: parity_8:
; CHECK-CSSC: // %bb.0:
; CHECK-CSSC-NEXT: and w8, w0, #0xff
; CHECK-CSSC-NEXT: cnt w8, w8
; CHECK-CSSC-NEXT: and w0, w8, #0x1
; CHECK-CSSC-NEXT: ret
%1 = tail call i8 @llvm.ctpop.i8(i8 %x)
%2 = and i8 %1, 1
ret i8 %2
}
define i16 @parity_16(i16 %x) {
; CHECK-LABEL: parity_16:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
; CHECK-NEXT: eor w8, w8, w8, lsr #8
; CHECK-NEXT: eor w8, w8, w8, lsr #4
; CHECK-NEXT: eor w8, w8, w8, lsr #2
; CHECK-NEXT: eor w8, w8, w8, lsr #1
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
;
; CHECK-CSSC-LABEL: parity_16:
; CHECK-CSSC: // %bb.0:
; CHECK-CSSC-NEXT: and w8, w0, #0xffff
; CHECK-CSSC-NEXT: cnt w8, w8
; CHECK-CSSC-NEXT: and w0, w8, #0x1
; CHECK-CSSC-NEXT: ret
%1 = tail call i16 @llvm.ctpop.i16(i16 %x)
%2 = and i16 %1, 1
ret i16 %2
}
define i17 @parity_17(i17 %x) {
; CHECK-LABEL: parity_17:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0x1ffff
; CHECK-NEXT: eor w9, w8, w8, lsr #16
; CHECK-NEXT: eor w8, w9, w8, lsr #8
; CHECK-NEXT: eor w8, w8, w8, lsr #4
; CHECK-NEXT: eor w8, w8, w8, lsr #2
; CHECK-NEXT: eor w8, w8, w8, lsr #1
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
;
; CHECK-CSSC-LABEL: parity_17:
; CHECK-CSSC: // %bb.0:
; CHECK-CSSC-NEXT: and w8, w0, #0x1ffff
; CHECK-CSSC-NEXT: cnt w8, w8
; CHECK-CSSC-NEXT: and w0, w8, #0x1
; CHECK-CSSC-NEXT: ret
%1 = tail call i17 @llvm.ctpop.i17(i17 %x)
%2 = and i17 %1, 1
ret i17 %2
}
define i32 @parity_32(i32 %x) {
; CHECK-LABEL: parity_32:
; CHECK: // %bb.0:
; CHECK-NEXT: eor w8, w0, w0, lsr #16
; CHECK-NEXT: eor w8, w8, w8, lsr #8
; CHECK-NEXT: eor w8, w8, w8, lsr #4
; CHECK-NEXT: eor w8, w8, w8, lsr #2
; CHECK-NEXT: eor w8, w8, w8, lsr #1
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
;
; CHECK-CSSC-LABEL: parity_32:
; CHECK-CSSC: // %bb.0:
; CHECK-CSSC-NEXT: cnt w8, w0
; CHECK-CSSC-NEXT: and w0, w8, #0x1
; CHECK-CSSC-NEXT: ret
%1 = tail call i32 @llvm.ctpop.i32(i32 %x)
%2 = and i32 %1, 1
ret i32 %2
}
define i64 @parity_64(i64 %x) {
; CHECK-LABEL: parity_64:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov d0, x0
; CHECK-NEXT: cnt v0.8b, v0.8b
; CHECK-NEXT: uaddlv h0, v0.8b
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
;
; CHECK-CSSC-LABEL: parity_64:
; CHECK-CSSC: // %bb.0:
; CHECK-CSSC-NEXT: cnt x8, x0
; CHECK-CSSC-NEXT: and x0, x8, #0x1
; CHECK-CSSC-NEXT: ret
%1 = tail call i64 @llvm.ctpop.i64(i64 %x)
%2 = and i64 %1, 1
ret i64 %2
}
define i128 @parity_128(i128 %x) {
; CHECK-LABEL: parity_128:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov d0, x0
; CHECK-NEXT: mov v0.d[1], x1
; CHECK-NEXT: mov x1, xzr
; CHECK-NEXT: cnt v0.16b, v0.16b
; CHECK-NEXT: uaddlv h0, v0.16b
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
;
; CHECK-CSSC-LABEL: parity_128:
; CHECK-CSSC: // %bb.0:
; CHECK-CSSC-NEXT: eor x8, x0, x1
; CHECK-CSSC-NEXT: mov x1, xzr
; CHECK-CSSC-NEXT: cnt x8, x8
; CHECK-CSSC-NEXT: and x0, x8, #0x1
; CHECK-CSSC-NEXT: ret
%1 = tail call i128 @llvm.ctpop.i128(i128 %x)
%2 = and i128 %1, 1
ret i128 %2
}
define i32 @parity_64_trunc(i64 %x) {
; CHECK-LABEL: parity_64_trunc:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov d0, x0
; CHECK-NEXT: cnt v0.8b, v0.8b
; CHECK-NEXT: uaddlv h0, v0.8b
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
;
; CHECK-CSSC-LABEL: parity_64_trunc:
; CHECK-CSSC: // %bb.0:
; CHECK-CSSC-NEXT: cnt x8, x0
; CHECK-CSSC-NEXT: and w0, w8, #0x1
; CHECK-CSSC-NEXT: ret
%1 = tail call i64 @llvm.ctpop.i64(i64 %x)
%2 = trunc i64 %1 to i32
%3 = and i32 %2, 1
ret i32 %3
}
define i8 @parity_32_trunc(i32 %x) {
; CHECK-LABEL: parity_32_trunc:
; CHECK: // %bb.0:
; CHECK-NEXT: eor w8, w0, w0, lsr #16
; CHECK-NEXT: eor w8, w8, w8, lsr #8
; CHECK-NEXT: eor w8, w8, w8, lsr #4
; CHECK-NEXT: eor w8, w8, w8, lsr #2
; CHECK-NEXT: eor w8, w8, w8, lsr #1
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
;
; CHECK-CSSC-LABEL: parity_32_trunc:
; CHECK-CSSC: // %bb.0:
; CHECK-CSSC-NEXT: cnt w8, w0
; CHECK-CSSC-NEXT: and w0, w8, #0x1
; CHECK-CSSC-NEXT: ret
%1 = tail call i32 @llvm.ctpop.i32(i32 %x)
%2 = trunc i32 %1 to i8
%3 = and i8 %2, 1
ret i8 %3
}
define i32 @parity_8_zext(i8 %x) {
; CHECK-LABEL: parity_8_zext:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: eor w8, w8, w8, lsr #4
; CHECK-NEXT: eor w8, w8, w8, lsr #2
; CHECK-NEXT: eor w8, w8, w8, lsr #1
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
;
; CHECK-CSSC-LABEL: parity_8_zext:
; CHECK-CSSC: // %bb.0:
; CHECK-CSSC-NEXT: and w8, w0, #0xff
; CHECK-CSSC-NEXT: cnt w8, w8
; CHECK-CSSC-NEXT: and w0, w8, #0x1
; CHECK-CSSC-NEXT: ret
%a = zext i8 %x to i32
%b = tail call i32 @llvm.ctpop.i32(i32 %a)
%c = and i32 %b, 1
ret i32 %c
}
define i32 @parity_8_mask(i32 %x) {
; CHECK-LABEL: parity_8_mask:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: eor w8, w8, w8, lsr #4
; CHECK-NEXT: eor w8, w8, w8, lsr #2
; CHECK-NEXT: eor w8, w8, w8, lsr #1
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
;
; CHECK-CSSC-LABEL: parity_8_mask:
; CHECK-CSSC: // %bb.0:
; CHECK-CSSC-NEXT: and w8, w0, #0xff
; CHECK-CSSC-NEXT: cnt w8, w8
; CHECK-CSSC-NEXT: and w0, w8, #0x1
; CHECK-CSSC-NEXT: ret
%a = and i32 %x, 255
%b = tail call i32 @llvm.ctpop.i32(i32 %a)
%c = and i32 %b, 1
ret i32 %c
}
declare i4 @llvm.ctpop.i4(i4 %x)
declare i8 @llvm.ctpop.i8(i8 %x)
declare i16 @llvm.ctpop.i16(i16 %x)
declare i17 @llvm.ctpop.i17(i17 %x)
declare i32 @llvm.ctpop.i32(i32 %x)
declare i64 @llvm.ctpop.i64(i64 %x)
declare i128 @llvm.ctpop.i128(i128 %x)