When compiling for an SVE target we can use INDEX to generate constant
fixed-length step vectors, e.g.:
```
uint32x4_t foo() {
return (uint32x4_t){0, 1, 2, 3};
}
```
Currently:
```
foo():
adrp x8, .LCPI1_0
ldr q0, [x8, :lo12:.LCPI1_0]
ret
```
With INDEX:
```
foo():
index z0.s, #0, #1
ret
```
The logic for this was already in `LowerBUILD_VECTOR`, though it was
hidden under a check for `!Subtarget->isNeonAvailable()`. This patch
refactors this to enable the corresponding code path unconditionally for
constant step vectors (as long as we can use SVE for them).
136 lines
4.1 KiB
LLVM
136 lines
4.1 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
|
|
|
|
; 128-bit vectors
|
|
|
|
define <16 x i8> @v16i8() #0 {
|
|
; CHECK-LABEL: v16i8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: index z0.b, #0, #1
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
ret <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>
|
|
}
|
|
|
|
define <8 x i16> @v8i16() #0 {
|
|
; CHECK-LABEL: v8i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: index z0.h, #0, #1
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
ret <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
|
|
}
|
|
|
|
define <4 x i32> @v4i32() #0 {
|
|
; CHECK-LABEL: v4i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: index z0.s, #0, #1
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
ret <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
}
|
|
|
|
define <2 x i64> @v2i64() #0 {
|
|
; CHECK-LABEL: v2i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: index z0.d, #0, #1
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
ret <2 x i64> <i64 0, i64 1>
|
|
}
|
|
|
|
; 64-bit vectors
|
|
|
|
define <8 x i8> @v8i8() #0 {
|
|
; CHECK-LABEL: v8i8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: index z0.b, #0, #1
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
ret <8 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>
|
|
}
|
|
|
|
define <4 x i16> @v4i16() #0 {
|
|
; CHECK-LABEL: v4i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: index z0.h, #0, #1
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
ret <4 x i16> <i16 0, i16 1, i16 2, i16 3>
|
|
}
|
|
|
|
define <2 x i32> @v2i32() #0 {
|
|
; CHECK-LABEL: v2i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: index z0.s, #0, #1
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
ret <2 x i32> <i32 0, i32 1>
|
|
}
|
|
|
|
; Positive test, non-zero start and non-unitary step.
|
|
; Note: This should be INDEX z0.s, #1, #2 (without the ORR).
|
|
define <4 x i32> @v4i32_non_zero_non_one() #0 {
|
|
; CHECK-LABEL: v4i32_non_zero_non_one:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: index z0.s, #0, #2
|
|
; CHECK-NEXT: orr z0.s, z0.s, #0x1
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
ret <4 x i32> <i32 1, i32 3, i32 5, i32 7>
|
|
}
|
|
|
|
; Positive test, same as above but negative immediates.
|
|
define <4 x i32> @v4i32_neg_immediates() #0 {
|
|
; CHECK-LABEL: v4i32_neg_immediates:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: index z0.s, #-1, #-2
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
ret <4 x i32> <i32 -1, i32 -3, i32 -5, i32 -7>
|
|
}
|
|
|
|
; Positive test, out of imm range start.
|
|
define <4 x i32> @v4i32_out_range_start() #0 {
|
|
; CHECK-LABEL: v4i32_out_range_start:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: index z0.s, #0, #1
|
|
; CHECK-NEXT: add z0.s, z0.s, #16 // =0x10
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
ret <4 x i32> <i32 16, i32 17, i32 18, i32 19>
|
|
}
|
|
|
|
; Positive test, out of imm range step.
|
|
define <4 x i32> @v4i32_out_range_step() #0 {
|
|
; CHECK-LABEL: v4i32_out_range_step:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, #16 // =0x10
|
|
; CHECK-NEXT: index z0.s, #0, w8
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
ret <4 x i32> <i32 0, i32 16, i32 32, i32 48>
|
|
}
|
|
|
|
; Positive test, out of imm range start and step.
|
|
define <4 x i32> @v4i32_out_range_start_step() #0 {
|
|
; CHECK-LABEL: v4i32_out_range_start_step:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov w8, #16 // =0x10
|
|
; CHECK-NEXT: index z0.s, #0, w8
|
|
; CHECK-NEXT: add z0.s, z0.s, #16 // =0x10
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
ret <4 x i32> <i32 16, i32 32, i32 48, i32 64>
|
|
}
|
|
|
|
; Negative test, non sequential.
|
|
define <4 x i32> @v4i32_non_sequential() #0 {
|
|
; CHECK-LABEL: v4i32_non_sequential:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: adrp x8, .LCPI12_0
|
|
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI12_0]
|
|
; CHECK-NEXT: ret
|
|
ret <4 x i32> <i32 0, i32 2, i32 2, i32 3>
|
|
}
|