This attempts to fold: ``` %1:_(<2 x s32>), %2:_(<2 x s32>) = G_UNMERGE_VALUES %0:_(<4 x s32>) %3:_(s32), %4:_(s32) = G_UNMERGE_VALUES %1 ``` Into a single UNMERGE: ``` %3:_(s32), %4:_(s32), %5:_(s32), %6:_(s32) = G_UNMERGE_VALUES %0 ``` This transform already exists, this patch alters it to occur when the result UNMERGE is considered legal. It does not try to transform where the result would be extracting a subelement from a vector at the moment, as the code is not setup to handle it. ``` %1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0:_(<2 x s32>) %3:_(s16), %4:_(s16) = G_UNMERGE_VALUES %1 ``` This helps us reduce the amount of legalization artefacts, especially from widened vectors padded with undef.
280 lines
7.8 KiB
LLVM
280 lines
7.8 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
|
|
; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
|
|
; RUN: llc -mtriple=aarch64 -global-isel %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
|
|
|
|
; ====== Scalar Tests =====
|
|
|
|
; ====== Scalar bswap.i16 Tests =====
|
|
define i16 @bswap_i16_to_i16_anyext(i16 %a){
|
|
; CHECK-SD-LABEL: bswap_i16_to_i16_anyext:
|
|
; CHECK-SD: // %bb.0:
|
|
; CHECK-SD-NEXT: rev16 w0, w0
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: bswap_i16_to_i16_anyext:
|
|
; CHECK-GI: // %bb.0:
|
|
; CHECK-GI-NEXT: rev w8, w0
|
|
; CHECK-GI-NEXT: lsr w0, w8, #16
|
|
; CHECK-GI-NEXT: ret
|
|
%3 = call i16 @llvm.bswap.i16(i16 %a)
|
|
ret i16 %3
|
|
}
|
|
declare i16 @llvm.bswap.i16(i16)
|
|
|
|
; The zext here is optimised to an any_extend during isel.
|
|
define i64 @bswap_i16_to_i64_anyext(i16 %a) {
|
|
; CHECK-SD-LABEL: bswap_i16_to_i64_anyext:
|
|
; CHECK-SD: // %bb.0:
|
|
; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
|
|
; CHECK-SD-NEXT: rev16 x8, x0
|
|
; CHECK-SD-NEXT: lsl x0, x8, #48
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: bswap_i16_to_i64_anyext:
|
|
; CHECK-GI: // %bb.0:
|
|
; CHECK-GI-NEXT: rev w8, w0
|
|
; CHECK-GI-NEXT: lsr w8, w8, #16
|
|
; CHECK-GI-NEXT: and x8, x8, #0xffff
|
|
; CHECK-GI-NEXT: lsl x0, x8, #48
|
|
; CHECK-GI-NEXT: ret
|
|
%3 = call i16 @llvm.bswap.i16(i16 %a)
|
|
%4 = zext i16 %3 to i64
|
|
%5 = shl i64 %4, 48
|
|
ret i64 %5
|
|
}
|
|
|
|
; The zext here is optimised to an any_extend during isel..
|
|
define i128 @bswap_i16_to_i128_anyext(i16 %a) {
|
|
; CHECK-SD-LABEL: bswap_i16_to_i128_anyext:
|
|
; CHECK-SD: // %bb.0:
|
|
; CHECK-SD-NEXT: mov w8, w0
|
|
; CHECK-SD-NEXT: mov x0, xzr
|
|
; CHECK-SD-NEXT: rev w8, w8
|
|
; CHECK-SD-NEXT: lsr w8, w8, #16
|
|
; CHECK-SD-NEXT: lsl x1, x8, #48
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: bswap_i16_to_i128_anyext:
|
|
; CHECK-GI: // %bb.0:
|
|
; CHECK-GI-NEXT: mov w8, w0
|
|
; CHECK-GI-NEXT: mov x0, xzr
|
|
; CHECK-GI-NEXT: rev w8, w8
|
|
; CHECK-GI-NEXT: lsr w8, w8, #16
|
|
; CHECK-GI-NEXT: bfi x8, x8, #32, #32
|
|
; CHECK-GI-NEXT: and x8, x8, #0xffff
|
|
; CHECK-GI-NEXT: lsl x1, x8, #48
|
|
; CHECK-GI-NEXT: ret
|
|
%3 = call i16 @llvm.bswap.i16(i16 %a)
|
|
%4 = zext i16 %3 to i128
|
|
%5 = shl i128 %4, 112
|
|
ret i128 %5
|
|
}
|
|
|
|
define i32 @bswap_i16_to_i32_zext(i16 %a){
|
|
; CHECK-LABEL: bswap_i16_to_i32_zext:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: rev w8, w0
|
|
; CHECK-NEXT: lsr w0, w8, #16
|
|
; CHECK-NEXT: ret
|
|
%3 = call i16 @llvm.bswap.i16(i16 %a)
|
|
%4 = zext i16 %3 to i32
|
|
ret i32 %4
|
|
}
|
|
|
|
; ====== Other scalar bswap tests =====
|
|
define i32 @bswap_i32(i32 %a){
|
|
; CHECK-LABEL: bswap_i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: rev w0, w0
|
|
; CHECK-NEXT: ret
|
|
%3 = call i32 @llvm.bswap.i32(i32 %a)
|
|
ret i32 %3
|
|
}
|
|
declare i32 @llvm.bswap.i32(i32)
|
|
|
|
define i64 @bswap_i64(i64 %a){
|
|
; CHECK-LABEL: bswap_i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: rev x0, x0
|
|
; CHECK-NEXT: ret
|
|
%3 = call i64 @llvm.bswap.i64(i64 %a)
|
|
ret i64 %3
|
|
}
|
|
declare i64 @llvm.bswap.i64(i64)
|
|
|
|
define i128 @bswap_i128(i128 %a){
|
|
; CHECK-LABEL: bswap_i128:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: rev x8, x1
|
|
; CHECK-NEXT: rev x1, x0
|
|
; CHECK-NEXT: mov x0, x8
|
|
; CHECK-NEXT: ret
|
|
%3 = call i128 @llvm.bswap.i128(i128 %a)
|
|
ret i128 %3
|
|
}
|
|
declare i128 @llvm.bswap.i128(i128)
|
|
|
|
; ===== Legal Vector Type Tests =====
|
|
|
|
define <4 x i16> @bswap_v4i16(<4 x i16> %a){
|
|
; CHECK-LABEL: bswap_v4i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: rev16 v0.8b, v0.8b
|
|
; CHECK-NEXT: ret
|
|
%3 = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %a)
|
|
ret <4 x i16> %3
|
|
}
|
|
declare <4 x i16> @llvm.bswap.v4i16(<4 x i16>)
|
|
|
|
define <8 x i16> @bswap_v8i16(<8 x i16> %a){
|
|
; CHECK-LABEL: bswap_v8i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: rev16 v0.16b, v0.16b
|
|
; CHECK-NEXT: ret
|
|
%3 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a)
|
|
ret <8 x i16> %3
|
|
}
|
|
declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
|
|
|
|
define <2 x i32> @bswap_v2i32(<2 x i32> %a){
|
|
; CHECK-LABEL: bswap_v2i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: rev32 v0.8b, v0.8b
|
|
; CHECK-NEXT: ret
|
|
%3 = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a)
|
|
ret <2 x i32> %3
|
|
}
|
|
declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>)
|
|
|
|
define <4 x i32> @bswap_v4i32(<4 x i32> %a){
|
|
; CHECK-LABEL: bswap_v4i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: rev32 v0.16b, v0.16b
|
|
; CHECK-NEXT: ret
|
|
%3 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a)
|
|
ret <4 x i32> %3
|
|
}
|
|
declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
|
|
|
|
define <2 x i64> @bswap_v2i64(<2 x i64> %a){
|
|
; CHECK-LABEL: bswap_v2i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: rev64 v0.16b, v0.16b
|
|
; CHECK-NEXT: ret
|
|
%3 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a)
|
|
ret <2 x i64> %3
|
|
}
|
|
declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
|
|
|
|
; ===== Smaller/Larger Width Vectors with Legal Element Sizes =====
|
|
|
|
define <2 x i16> @bswap_v2i16(<2 x i16> %a){
|
|
; CHECK-SD-LABEL: bswap_v2i16:
|
|
; CHECK-SD: // %bb.0: // %entry
|
|
; CHECK-SD-NEXT: rev32 v0.8b, v0.8b
|
|
; CHECK-SD-NEXT: ushr v0.2s, v0.2s, #16
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: bswap_v2i16:
|
|
; CHECK-GI: // %bb.0: // %entry
|
|
; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h
|
|
; CHECK-GI-NEXT: rev16 v0.8b, v0.8b
|
|
; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
|
|
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
; CHECK-GI-NEXT: ret
|
|
entry:
|
|
%res = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %a)
|
|
ret <2 x i16> %res
|
|
}
|
|
declare <2 x i16> @llvm.bswap.v2i16(<2 x i16>)
|
|
|
|
define <16 x i16> @bswap_v16i16(<16 x i16> %a){
|
|
; CHECK-LABEL: bswap_v16i16:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: rev16 v0.16b, v0.16b
|
|
; CHECK-NEXT: rev16 v1.16b, v1.16b
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%res = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a)
|
|
ret <16 x i16> %res
|
|
}
|
|
declare <16 x i16> @llvm.bswap.v16i16(<16 x i16>)
|
|
|
|
define <1 x i32> @bswap_v1i32(<1 x i32> %a){
|
|
; CHECK-SD-LABEL: bswap_v1i32:
|
|
; CHECK-SD: // %bb.0: // %entry
|
|
; CHECK-SD-NEXT: rev32 v0.8b, v0.8b
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: bswap_v1i32:
|
|
; CHECK-GI: // %bb.0: // %entry
|
|
; CHECK-GI-NEXT: fmov w8, s0
|
|
; CHECK-GI-NEXT: rev w8, w8
|
|
; CHECK-GI-NEXT: mov v0.s[0], w8
|
|
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
; CHECK-GI-NEXT: ret
|
|
entry:
|
|
%res = call <1 x i32> @llvm.bswap.v1i32(<1 x i32> %a)
|
|
ret <1 x i32> %res
|
|
}
|
|
declare <1 x i32> @llvm.bswap.v1i32(<1 x i32>)
|
|
|
|
define <8 x i32> @bswap_v8i32(<8 x i32> %a){
|
|
; CHECK-LABEL: bswap_v8i32:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: rev32 v0.16b, v0.16b
|
|
; CHECK-NEXT: rev32 v1.16b, v1.16b
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%res = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a)
|
|
ret <8 x i32> %res
|
|
}
|
|
declare <8 x i32> @llvm.bswap.v8i32(<8 x i32>)
|
|
|
|
define <4 x i64> @bswap_v4i64(<4 x i64> %a){
|
|
; CHECK-LABEL: bswap_v4i64:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: rev64 v0.16b, v0.16b
|
|
; CHECK-NEXT: rev64 v1.16b, v1.16b
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%res = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a)
|
|
ret <4 x i64> %res
|
|
}
|
|
declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>)
|
|
|
|
; ===== Vectors with Non-Pow 2 Widths =====
|
|
|
|
define <3 x i16> @bswap_v3i16(<3 x i16> %a){
|
|
; CHECK-LABEL: bswap_v3i16:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: rev16 v0.8b, v0.8b
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%res = call <3 x i16> @llvm.bswap.v3i16(<3 x i16> %a)
|
|
ret <3 x i16> %res
|
|
}
|
|
declare <3 x i16> @llvm.bswap.v3i16(<3 x i16>)
|
|
|
|
define <7 x i16> @bswap_v7i16(<7 x i16> %a){
|
|
; CHECK-LABEL: bswap_v7i16:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: rev16 v0.16b, v0.16b
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%res = call <7 x i16> @llvm.bswap.v7i16(<7 x i16> %a)
|
|
ret <7 x i16> %res
|
|
}
|
|
declare <7 x i16> @llvm.bswap.v7i16(<7 x i16>)
|
|
|
|
define <3 x i32> @bswap_v3i32(<3 x i32> %a){
|
|
; CHECK-LABEL: bswap_v3i32:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: rev32 v0.16b, v0.16b
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%res = call <3 x i32> @llvm.bswap.v3i32(<3 x i32> %a)
|
|
ret <3 x i32> %res
|
|
}
|
|
declare <3 x i32> @llvm.bswap.v3i32(<3 x i32>)
|