Files
clang-p2996/llvm/test/CodeGen/AArch64/load-insert-undef.ll
David Green 02a1d311bd [AArch64] Extend and rewrite load zero and load undef patterns (#108185)
The ldr instructions implicitly zero any upper lanes, so we can use them
for insert(zerovec, load, 0) patterns. Likewise insert(undef, load, 0)
or scalar_to_reg can reuse the scalar loads as the top bits are undef.

This patch makes sure there are patterns for each type and for each of
the normal, unaligned, roW and roX addressing modes.
2024-09-19 14:52:52 +01:00

1079 lines
30 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16,+bf16,+sve | FileCheck %s
define <8 x i8> @loadv8i8(ptr %p) {
; CHECK-LABEL: loadv8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr b0, [x0]
; CHECK-NEXT: ret
%l = load i8, ptr %p
%v = insertelement <8 x i8> poison, i8 %l, i32 0
ret <8 x i8> %v
}
define <16 x i8> @loadv16i8(ptr %p) {
; CHECK-LABEL: loadv16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr b0, [x0]
; CHECK-NEXT: ret
%l = load i8, ptr %p
%v = insertelement <16 x i8> poison, i8 %l, i32 0
ret <16 x i8> %v
}
define <4 x i16> @loadv4i16(ptr %p) {
; CHECK-LABEL: loadv4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr h0, [x0]
; CHECK-NEXT: ret
%l = load i16, ptr %p
%v = insertelement <4 x i16> poison, i16 %l, i32 0
ret <4 x i16> %v
}
define <8 x i16> @loadv8i16(ptr %p) {
; CHECK-LABEL: loadv8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr h0, [x0]
; CHECK-NEXT: ret
%l = load i16, ptr %p
%v = insertelement <8 x i16> poison, i16 %l, i32 0
ret <8 x i16> %v
}
define <2 x i32> @loadv2i32(ptr %p) {
; CHECK-LABEL: loadv2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr s0, [x0]
; CHECK-NEXT: ret
%l = load i32, ptr %p
%v = insertelement <2 x i32> poison, i32 %l, i32 0
ret <2 x i32> %v
}
define <4 x i32> @loadv4i32(ptr %p) {
; CHECK-LABEL: loadv4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr s0, [x0]
; CHECK-NEXT: ret
%l = load i32, ptr %p
%v = insertelement <4 x i32> poison, i32 %l, i32 0
ret <4 x i32> %v
}
define <2 x i64> @loadv2i64(ptr %p) {
; CHECK-LABEL: loadv2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ret
%l = load i64, ptr %p
%v = insertelement <2 x i64> poison, i64 %l, i32 0
ret <2 x i64> %v
}
define <4 x half> @loadv4f16(ptr %p) {
; CHECK-LABEL: loadv4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr h0, [x0]
; CHECK-NEXT: ret
%l = load half, ptr %p
%v = insertelement <4 x half> poison, half %l, i32 0
ret <4 x half> %v
}
define <8 x half> @loadv8f16(ptr %p) {
; CHECK-LABEL: loadv8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr h0, [x0]
; CHECK-NEXT: ret
%l = load half, ptr %p
%v = insertelement <8 x half> poison, half %l, i32 0
ret <8 x half> %v
}
define <4 x bfloat> @loadv4bf16(ptr %p) {
; CHECK-LABEL: loadv4bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr h0, [x0]
; CHECK-NEXT: ret
%l = load bfloat, ptr %p
%v = insertelement <4 x bfloat> poison, bfloat %l, i32 0
ret <4 x bfloat> %v
}
define <8 x bfloat> @loadv8bf16(ptr %p) {
; CHECK-LABEL: loadv8bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr h0, [x0]
; CHECK-NEXT: ret
%l = load bfloat, ptr %p
%v = insertelement <8 x bfloat> poison, bfloat %l, i32 0
ret <8 x bfloat> %v
}
define <2 x float> @loadv2f32(ptr %p) {
; CHECK-LABEL: loadv2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr s0, [x0]
; CHECK-NEXT: ret
%l = load float, ptr %p
%v = insertelement <2 x float> poison, float %l, i32 0
ret <2 x float> %v
}
define <4 x float> @loadv4f32(ptr %p) {
; CHECK-LABEL: loadv4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr s0, [x0]
; CHECK-NEXT: ret
%l = load float, ptr %p
%v = insertelement <4 x float> poison, float %l, i32 0
ret <4 x float> %v
}
define <2 x double> @loadv2f64(ptr %p) {
; CHECK-LABEL: loadv2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ret
%l = load double, ptr %p
%v = insertelement <2 x double> poison, double %l, i32 0
ret <2 x double> %v
}
; Unscaled
define <8 x i8> @loadv8i8_offset(ptr %p) {
; CHECK-LABEL: loadv8i8_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr b0, [x0, #1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 1
%l = load i8, ptr %g
%v = insertelement <8 x i8> poison, i8 %l, i32 0
ret <8 x i8> %v
}
define <16 x i8> @loadv16i8_offset(ptr %p) {
; CHECK-LABEL: loadv16i8_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr b0, [x0, #1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 1
%l = load i8, ptr %g
%v = insertelement <16 x i8> poison, i8 %l, i32 0
ret <16 x i8> %v
}
define <4 x i16> @loadv4i16_offset(ptr %p) {
; CHECK-LABEL: loadv4i16_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur h0, [x0, #1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 1
%l = load i16, ptr %g
%v = insertelement <4 x i16> poison, i16 %l, i32 0
ret <4 x i16> %v
}
define <8 x i16> @loadv8i16_offset(ptr %p) {
; CHECK-LABEL: loadv8i16_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur h0, [x0, #1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 1
%l = load i16, ptr %g
%v = insertelement <8 x i16> poison, i16 %l, i32 0
ret <8 x i16> %v
}
define <2 x i32> @loadv2i32_offset(ptr %p) {
; CHECK-LABEL: loadv2i32_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur s0, [x0, #1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 1
%l = load i32, ptr %g
%v = insertelement <2 x i32> poison, i32 %l, i32 0
ret <2 x i32> %v
}
define <4 x i32> @loadv4i32_offset(ptr %p) {
; CHECK-LABEL: loadv4i32_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur s0, [x0, #1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 1
%l = load i32, ptr %g
%v = insertelement <4 x i32> poison, i32 %l, i32 0
ret <4 x i32> %v
}
define <2 x i64> @loadv2i64_offset(ptr %p) {
; CHECK-LABEL: loadv2i64_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur d0, [x0, #1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 1
%l = load i64, ptr %g
%v = insertelement <2 x i64> poison, i64 %l, i32 0
ret <2 x i64> %v
}
define <4 x half> @loadv4f16_offset(ptr %p) {
; CHECK-LABEL: loadv4f16_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur h0, [x0, #1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 1
%l = load half, ptr %g
%v = insertelement <4 x half> poison, half %l, i32 0
ret <4 x half> %v
}
define <8 x half> @loadv8f16_offset(ptr %p) {
; CHECK-LABEL: loadv8f16_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur h0, [x0, #1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 1
%l = load half, ptr %g
%v = insertelement <8 x half> poison, half %l, i32 0
ret <8 x half> %v
}
define <4 x bfloat> @loadv4bf16_offset(ptr %p) {
; CHECK-LABEL: loadv4bf16_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur h0, [x0, #1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 1
%l = load bfloat, ptr %g
%v = insertelement <4 x bfloat> poison, bfloat %l, i32 0
ret <4 x bfloat> %v
}
define <8 x bfloat> @loadv8bf16_offset(ptr %p) {
; CHECK-LABEL: loadv8bf16_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur h0, [x0, #1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 1
%l = load bfloat, ptr %g
%v = insertelement <8 x bfloat> poison, bfloat %l, i32 0
ret <8 x bfloat> %v
}
define <2 x float> @loadv2f32_offset(ptr %p) {
; CHECK-LABEL: loadv2f32_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur s0, [x0, #1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 1
%l = load float, ptr %g
%v = insertelement <2 x float> poison, float %l, i32 0
ret <2 x float> %v
}
define <4 x float> @loadv4f32_offset(ptr %p) {
; CHECK-LABEL: loadv4f32_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur s0, [x0, #1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 1
%l = load float, ptr %g
%v = insertelement <4 x float> poison, float %l, i32 0
ret <4 x float> %v
}
define <2 x double> @loadv2f64_offset(ptr %p) {
; CHECK-LABEL: loadv2f64_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur d0, [x0, #1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 1
%l = load double, ptr %g
%v = insertelement <2 x double> poison, double %l, i32 0
ret <2 x double> %v
}
define <8 x i8> @loadv8i8_noffset(ptr %p) {
; CHECK-LABEL: loadv8i8_noffset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur b0, [x0, #-1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 -1
%l = load i8, ptr %g
%v = insertelement <8 x i8> poison, i8 %l, i32 0
ret <8 x i8> %v
}
define <16 x i8> @loadv16i8_noffset(ptr %p) {
; CHECK-LABEL: loadv16i8_noffset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur b0, [x0, #-1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 -1
%l = load i8, ptr %g
%v = insertelement <16 x i8> poison, i8 %l, i32 0
ret <16 x i8> %v
}
define <4 x i16> @loadv4i16_noffset(ptr %p) {
; CHECK-LABEL: loadv4i16_noffset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur h0, [x0, #-1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 -1
%l = load i16, ptr %g
%v = insertelement <4 x i16> poison, i16 %l, i32 0
ret <4 x i16> %v
}
define <8 x i16> @loadv8i16_noffset(ptr %p) {
; CHECK-LABEL: loadv8i16_noffset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur h0, [x0, #-1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 -1
%l = load i16, ptr %g
%v = insertelement <8 x i16> poison, i16 %l, i32 0
ret <8 x i16> %v
}
define <2 x i32> @loadv2i32_noffset(ptr %p) {
; CHECK-LABEL: loadv2i32_noffset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur s0, [x0, #-1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 -1
%l = load i32, ptr %g
%v = insertelement <2 x i32> poison, i32 %l, i32 0
ret <2 x i32> %v
}
define <4 x i32> @loadv4i32_noffset(ptr %p) {
; CHECK-LABEL: loadv4i32_noffset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur s0, [x0, #-1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 -1
%l = load i32, ptr %g
%v = insertelement <4 x i32> poison, i32 %l, i32 0
ret <4 x i32> %v
}
define <2 x i64> @loadv2i64_noffset(ptr %p) {
; CHECK-LABEL: loadv2i64_noffset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur d0, [x0, #-1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 -1
%l = load i64, ptr %g
%v = insertelement <2 x i64> poison, i64 %l, i32 0
ret <2 x i64> %v
}
define <4 x half> @loadv4f16_noffset(ptr %p) {
; CHECK-LABEL: loadv4f16_noffset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur h0, [x0, #-1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 -1
%l = load half, ptr %g
%v = insertelement <4 x half> poison, half %l, i32 0
ret <4 x half> %v
}
define <8 x half> @loadv8f16_noffset(ptr %p) {
; CHECK-LABEL: loadv8f16_noffset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur h0, [x0, #-1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 -1
%l = load half, ptr %g
%v = insertelement <8 x half> poison, half %l, i32 0
ret <8 x half> %v
}
define <4 x bfloat> @loadv4bf16_noffset(ptr %p) {
; CHECK-LABEL: loadv4bf16_noffset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur h0, [x0, #-1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 -1
%l = load bfloat, ptr %g
%v = insertelement <4 x bfloat> poison, bfloat %l, i32 0
ret <4 x bfloat> %v
}
define <8 x bfloat> @loadv8bf16_noffset(ptr %p) {
; CHECK-LABEL: loadv8bf16_noffset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur h0, [x0, #-1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 -1
%l = load bfloat, ptr %g
%v = insertelement <8 x bfloat> poison, bfloat %l, i32 0
ret <8 x bfloat> %v
}
define <2 x float> @loadv2f32_noffset(ptr %p) {
; CHECK-LABEL: loadv2f32_noffset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur s0, [x0, #-1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 -1
%l = load float, ptr %g
%v = insertelement <2 x float> poison, float %l, i32 0
ret <2 x float> %v
}
define <4 x float> @loadv4f32_noffset(ptr %p) {
; CHECK-LABEL: loadv4f32_noffset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur s0, [x0, #-1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 -1
%l = load float, ptr %g
%v = insertelement <4 x float> poison, float %l, i32 0
ret <4 x float> %v
}
define <2 x double> @loadv2f64_noffset(ptr %p) {
; CHECK-LABEL: loadv2f64_noffset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur d0, [x0, #-1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 -1
%l = load double, ptr %g
%v = insertelement <2 x double> poison, double %l, i32 0
ret <2 x double> %v
}
; ROW addressing modes
define <8 x i8> @loadv8i8_roW(ptr %p, i32 %o) {
; CHECK-LABEL: loadv8i8_roW:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr b0, [x0, w1, sxtw]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i32 %o
%l = load i8, ptr %g
%v = insertelement <8 x i8> poison, i8 %l, i32 0
ret <8 x i8> %v
}
define <16 x i8> @loadv16i8_roW(ptr %p, i32 %o) {
; CHECK-LABEL: loadv16i8_roW:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr b0, [x0, w1, sxtw]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i32 %o
%l = load i8, ptr %g
%v = insertelement <16 x i8> poison, i8 %l, i32 0
ret <16 x i8> %v
}
define <4 x i16> @loadv4i16_roW(ptr %p, i32 %o) {
; CHECK-LABEL: loadv4i16_roW:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr h0, [x0, w1, sxtw #1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i16, ptr %p, i32 %o
%l = load i16, ptr %g
%v = insertelement <4 x i16> poison, i16 %l, i32 0
ret <4 x i16> %v
}
define <8 x i16> @loadv8i16_roW(ptr %p, i32 %o) {
; CHECK-LABEL: loadv8i16_roW:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr h0, [x0, w1, sxtw #1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i16, ptr %p, i32 %o
%l = load i16, ptr %g
%v = insertelement <8 x i16> poison, i16 %l, i32 0
ret <8 x i16> %v
}
define <2 x i32> @loadv2i32_roW(ptr %p, i32 %o) {
; CHECK-LABEL: loadv2i32_roW:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr s0, [x0, w1, sxtw #2]
; CHECK-NEXT: ret
%g = getelementptr inbounds i32, ptr %p, i32 %o
%l = load i32, ptr %g
%v = insertelement <2 x i32> poison, i32 %l, i32 0
ret <2 x i32> %v
}
define <4 x i32> @loadv4i32_roW(ptr %p, i32 %o) {
; CHECK-LABEL: loadv4i32_roW:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr s0, [x0, w1, sxtw #2]
; CHECK-NEXT: ret
%g = getelementptr inbounds i32, ptr %p, i32 %o
%l = load i32, ptr %g
%v = insertelement <4 x i32> poison, i32 %l, i32 0
ret <4 x i32> %v
}
define <2 x i64> @loadv2i64_roW(ptr %p, i32 %o) {
; CHECK-LABEL: loadv2i64_roW:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0, w1, sxtw #3]
; CHECK-NEXT: ret
%g = getelementptr inbounds i64, ptr %p, i32 %o
%l = load i64, ptr %g
%v = insertelement <2 x i64> poison, i64 %l, i32 0
ret <2 x i64> %v
}
define <4 x half> @loadv4f16_roW(ptr %p, i32 %o) {
; CHECK-LABEL: loadv4f16_roW:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr h0, [x0, w1, sxtw #1]
; CHECK-NEXT: ret
%g = getelementptr inbounds half, ptr %p, i32 %o
%l = load half, ptr %g
%v = insertelement <4 x half> poison, half %l, i32 0
ret <4 x half> %v
}
define <8 x half> @loadv8f16_roW(ptr %p, i32 %o) {
; CHECK-LABEL: loadv8f16_roW:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr h0, [x0, w1, sxtw #1]
; CHECK-NEXT: ret
%g = getelementptr inbounds half, ptr %p, i32 %o
%l = load half, ptr %g
%v = insertelement <8 x half> poison, half %l, i32 0
ret <8 x half> %v
}
define <4 x bfloat> @loadv4bf16_roW(ptr %p, i32 %o) {
; CHECK-LABEL: loadv4bf16_roW:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr h0, [x0, w1, sxtw #1]
; CHECK-NEXT: ret
%g = getelementptr inbounds bfloat, ptr %p, i32 %o
%l = load bfloat, ptr %g
%v = insertelement <4 x bfloat> poison, bfloat %l, i32 0
ret <4 x bfloat> %v
}
define <8 x bfloat> @loadv8bf16_roW(ptr %p, i32 %o) {
; CHECK-LABEL: loadv8bf16_roW:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr h0, [x0, w1, sxtw #1]
; CHECK-NEXT: ret
%g = getelementptr inbounds bfloat, ptr %p, i32 %o
%l = load bfloat, ptr %g
%v = insertelement <8 x bfloat> poison, bfloat %l, i32 0
ret <8 x bfloat> %v
}
define <2 x float> @loadv2f32_roW(ptr %p, i32 %o) {
; CHECK-LABEL: loadv2f32_roW:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr s0, [x0, w1, sxtw #2]
; CHECK-NEXT: ret
%g = getelementptr inbounds float, ptr %p, i32 %o
%l = load float, ptr %g
%v = insertelement <2 x float> poison, float %l, i32 0
ret <2 x float> %v
}
define <4 x float> @loadv4f32_roW(ptr %p, i32 %o) {
; CHECK-LABEL: loadv4f32_roW:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr s0, [x0, w1, sxtw #2]
; CHECK-NEXT: ret
%g = getelementptr inbounds float, ptr %p, i32 %o
%l = load float, ptr %g
%v = insertelement <4 x float> poison, float %l, i32 0
ret <4 x float> %v
}
define <2 x double> @loadv2f64_roW(ptr %p, i32 %o) {
; CHECK-LABEL: loadv2f64_roW:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0, w1, sxtw #3]
; CHECK-NEXT: ret
%g = getelementptr inbounds double, ptr %p, i32 %o
%l = load double, ptr %g
%v = insertelement <2 x double> poison, double %l, i32 0
ret <2 x double> %v
}
; roX
define <8 x i8> @loadv8i8_roX(ptr %p, i64 %o) {
; CHECK-LABEL: loadv8i8_roX:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr b0, [x0, x1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 %o
%l = load i8, ptr %g
%v = insertelement <8 x i8> poison, i8 %l, i32 0
ret <8 x i8> %v
}
define <16 x i8> @loadv16i8_roX(ptr %p, i64 %o) {
; CHECK-LABEL: loadv16i8_roX:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr b0, [x0, x1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 %o
%l = load i8, ptr %g
%v = insertelement <16 x i8> poison, i8 %l, i32 0
ret <16 x i8> %v
}
define <4 x i16> @loadv4i16_roX(ptr %p, i64 %o) {
; CHECK-LABEL: loadv4i16_roX:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr h0, [x0, x1, lsl #1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i16, ptr %p, i64 %o
%l = load i16, ptr %g
%v = insertelement <4 x i16> poison, i16 %l, i32 0
ret <4 x i16> %v
}
define <8 x i16> @loadv8i16_roX(ptr %p, i64 %o) {
; CHECK-LABEL: loadv8i16_roX:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr h0, [x0, x1, lsl #1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i16, ptr %p, i64 %o
%l = load i16, ptr %g
%v = insertelement <8 x i16> poison, i16 %l, i32 0
ret <8 x i16> %v
}
define <2 x i32> @loadv2i32_roX(ptr %p, i64 %o) {
; CHECK-LABEL: loadv2i32_roX:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr s0, [x0, x1, lsl #2]
; CHECK-NEXT: ret
%g = getelementptr inbounds i32, ptr %p, i64 %o
%l = load i32, ptr %g
%v = insertelement <2 x i32> poison, i32 %l, i32 0
ret <2 x i32> %v
}
define <4 x i32> @loadv4i32_roX(ptr %p, i64 %o) {
; CHECK-LABEL: loadv4i32_roX:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr s0, [x0, x1, lsl #2]
; CHECK-NEXT: ret
%g = getelementptr inbounds i32, ptr %p, i64 %o
%l = load i32, ptr %g
%v = insertelement <4 x i32> poison, i32 %l, i32 0
ret <4 x i32> %v
}
define <2 x i64> @loadv2i64_roX(ptr %p, i64 %o) {
; CHECK-LABEL: loadv2i64_roX:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0, x1, lsl #3]
; CHECK-NEXT: ret
%g = getelementptr inbounds i64, ptr %p, i64 %o
%l = load i64, ptr %g
%v = insertelement <2 x i64> poison, i64 %l, i32 0
ret <2 x i64> %v
}
define <4 x half> @loadv4f16_roX(ptr %p, i64 %o) {
; CHECK-LABEL: loadv4f16_roX:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr h0, [x0, x1, lsl #1]
; CHECK-NEXT: ret
%g = getelementptr inbounds half, ptr %p, i64 %o
%l = load half, ptr %g
%v = insertelement <4 x half> poison, half %l, i32 0
ret <4 x half> %v
}
define <8 x half> @loadv8f16_roX(ptr %p, i64 %o) {
; CHECK-LABEL: loadv8f16_roX:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr h0, [x0, x1, lsl #1]
; CHECK-NEXT: ret
%g = getelementptr inbounds half, ptr %p, i64 %o
%l = load half, ptr %g
%v = insertelement <8 x half> poison, half %l, i32 0
ret <8 x half> %v
}
define <4 x bfloat> @loadv4bf16_roX(ptr %p, i64 %o) {
; CHECK-LABEL: loadv4bf16_roX:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr h0, [x0, x1, lsl #1]
; CHECK-NEXT: ret
%g = getelementptr inbounds bfloat, ptr %p, i64 %o
%l = load bfloat, ptr %g
%v = insertelement <4 x bfloat> poison, bfloat %l, i32 0
ret <4 x bfloat> %v
}
define <8 x bfloat> @loadv8bf16_roX(ptr %p, i64 %o) {
; CHECK-LABEL: loadv8bf16_roX:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr h0, [x0, x1, lsl #1]
; CHECK-NEXT: ret
%g = getelementptr inbounds bfloat, ptr %p, i64 %o
%l = load bfloat, ptr %g
%v = insertelement <8 x bfloat> poison, bfloat %l, i32 0
ret <8 x bfloat> %v
}
define <2 x float> @loadv2f32_roX(ptr %p, i64 %o) {
; CHECK-LABEL: loadv2f32_roX:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr s0, [x0, x1, lsl #2]
; CHECK-NEXT: ret
%g = getelementptr inbounds float, ptr %p, i64 %o
%l = load float, ptr %g
%v = insertelement <2 x float> poison, float %l, i32 0
ret <2 x float> %v
}
define <4 x float> @loadv4f32_roX(ptr %p, i64 %o) {
; CHECK-LABEL: loadv4f32_roX:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr s0, [x0, x1, lsl #2]
; CHECK-NEXT: ret
%g = getelementptr inbounds float, ptr %p, i64 %o
%l = load float, ptr %g
%v = insertelement <4 x float> poison, float %l, i32 0
ret <4 x float> %v
}
define <2 x double> @loadv2f64_roX(ptr %p, i64 %o) {
; CHECK-LABEL: loadv2f64_roX:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0, x1, lsl #3]
; CHECK-NEXT: ret
%g = getelementptr inbounds double, ptr %p, i64 %o
%l = load double, ptr %g
%v = insertelement <2 x double> poison, double %l, i32 0
ret <2 x double> %v
}
; SVE
define <vscale x 8 x i8> @loadnxv8i8(ptr %p) {
; CHECK-LABEL: loadnxv8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldrb w8, [x0]
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: ret
%l = load i8, ptr %p
%v = insertelement <vscale x 8 x i8> poison, i8 %l, i32 0
ret <vscale x 8 x i8> %v
}
define <vscale x 16 x i8> @loadnxv16i8(ptr %p) {
; CHECK-LABEL: loadnxv16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr b0, [x0]
; CHECK-NEXT: ret
%l = load i8, ptr %p
%v = insertelement <vscale x 16 x i8> poison, i8 %l, i32 0
ret <vscale x 16 x i8> %v
}
define <vscale x 4 x i16> @loadnxv4i16(ptr %p) {
; CHECK-LABEL: loadnxv4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldrh w8, [x0]
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: ret
%l = load i16, ptr %p
%v = insertelement <vscale x 4 x i16> poison, i16 %l, i32 0
ret <vscale x 4 x i16> %v
}
define <vscale x 8 x i16> @loadnxv8i16(ptr %p) {
; CHECK-LABEL: loadnxv8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr h0, [x0]
; CHECK-NEXT: ret
%l = load i16, ptr %p
%v = insertelement <vscale x 8 x i16> poison, i16 %l, i32 0
ret <vscale x 8 x i16> %v
}
define <vscale x 2 x i32> @loadnxv2i32(ptr %p) {
; CHECK-LABEL: loadnxv2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr w8, [x0]
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: ret
%l = load i32, ptr %p
%v = insertelement <vscale x 2 x i32> poison, i32 %l, i32 0
ret <vscale x 2 x i32> %v
}
define <vscale x 4 x i32> @loadnxv4i32(ptr %p) {
; CHECK-LABEL: loadnxv4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr s0, [x0]
; CHECK-NEXT: ret
%l = load i32, ptr %p
%v = insertelement <vscale x 4 x i32> poison, i32 %l, i32 0
ret <vscale x 4 x i32> %v
}
define <vscale x 2 x i64> @loadnxv2i64(ptr %p) {
; CHECK-LABEL: loadnxv2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ret
%l = load i64, ptr %p
%v = insertelement <vscale x 2 x i64> poison, i64 %l, i32 0
ret <vscale x 2 x i64> %v
}
define <vscale x 4 x half> @loadnxv4f16(ptr %p) {
; CHECK-LABEL: loadnxv4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr h0, [x0]
; CHECK-NEXT: ret
%l = load half, ptr %p
%v = insertelement <vscale x 4 x half> poison, half %l, i32 0
ret <vscale x 4 x half> %v
}
define <vscale x 8 x half> @loadnxv8f16(ptr %p) {
; CHECK-LABEL: loadnxv8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr h0, [x0]
; CHECK-NEXT: ret
%l = load half, ptr %p
%v = insertelement <vscale x 8 x half> poison, half %l, i32 0
ret <vscale x 8 x half> %v
}
define <vscale x 4 x bfloat> @loadnxv4bf16(ptr %p) {
; CHECK-LABEL: loadnxv4bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr h0, [x0]
; CHECK-NEXT: ret
%l = load bfloat, ptr %p
%v = insertelement <vscale x 4 x bfloat> poison, bfloat %l, i32 0
ret <vscale x 4 x bfloat> %v
}
define <vscale x 8 x bfloat> @loadnxv8bf16(ptr %p) {
; CHECK-LABEL: loadnxv8bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr h0, [x0]
; CHECK-NEXT: ret
%l = load bfloat, ptr %p
%v = insertelement <vscale x 8 x bfloat> poison, bfloat %l, i32 0
ret <vscale x 8 x bfloat> %v
}
define <vscale x 2 x float> @loadnxv2f32(ptr %p) {
; CHECK-LABEL: loadnxv2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr s0, [x0]
; CHECK-NEXT: ret
%l = load float, ptr %p
%v = insertelement <vscale x 2 x float> poison, float %l, i32 0
ret <vscale x 2 x float> %v
}
define <vscale x 4 x float> @loadnxv4f32(ptr %p) {
; CHECK-LABEL: loadnxv4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr s0, [x0]
; CHECK-NEXT: ret
%l = load float, ptr %p
%v = insertelement <vscale x 4 x float> poison, float %l, i32 0
ret <vscale x 4 x float> %v
}
define <vscale x 2 x double> @loadnxv2f64(ptr %p) {
; CHECK-LABEL: loadnxv2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ret
%l = load double, ptr %p
%v = insertelement <vscale x 2 x double> poison, double %l, i32 0
ret <vscale x 2 x double> %v
}
; Unscaled
define <vscale x 8 x i8> @loadnxv8i8_offset(ptr %p) {
; CHECK-LABEL: loadnxv8i8_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldrb w8, [x0, #1]
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 1
%l = load i8, ptr %g
%v = insertelement <vscale x 8 x i8> poison, i8 %l, i32 0
ret <vscale x 8 x i8> %v
}
define <vscale x 16 x i8> @loadnxv16i8_offset(ptr %p) {
; CHECK-LABEL: loadnxv16i8_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr b0, [x0, #1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 1
%l = load i8, ptr %g
%v = insertelement <vscale x 16 x i8> poison, i8 %l, i32 0
ret <vscale x 16 x i8> %v
}
define <vscale x 4 x i16> @loadnxv4i16_offset(ptr %p) {
; CHECK-LABEL: loadnxv4i16_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldurh w8, [x0, #1]
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 1
%l = load i16, ptr %g
%v = insertelement <vscale x 4 x i16> poison, i16 %l, i32 0
ret <vscale x 4 x i16> %v
}
define <vscale x 8 x i16> @loadnxv8i16_offset(ptr %p) {
; CHECK-LABEL: loadnxv8i16_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur h0, [x0, #1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 1
%l = load i16, ptr %g
%v = insertelement <vscale x 8 x i16> poison, i16 %l, i32 0
ret <vscale x 8 x i16> %v
}
define <vscale x 2 x i32> @loadnxv2i32_offset(ptr %p) {
; CHECK-LABEL: loadnxv2i32_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur w8, [x0, #1]
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 1
%l = load i32, ptr %g
%v = insertelement <vscale x 2 x i32> poison, i32 %l, i32 0
ret <vscale x 2 x i32> %v
}
define <vscale x 4 x i32> @loadnxv4i32_offset(ptr %p) {
; CHECK-LABEL: loadnxv4i32_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur s0, [x0, #1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 1
%l = load i32, ptr %g
%v = insertelement <vscale x 4 x i32> poison, i32 %l, i32 0
ret <vscale x 4 x i32> %v
}
define <vscale x 2 x i64> @loadnxv2i64_offset(ptr %p) {
; CHECK-LABEL: loadnxv2i64_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur d0, [x0, #1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 1
%l = load i64, ptr %g
%v = insertelement <vscale x 2 x i64> poison, i64 %l, i32 0
ret <vscale x 2 x i64> %v
}
define <vscale x 4 x half> @loadnxv4f16_offset(ptr %p) {
; CHECK-LABEL: loadnxv4f16_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur h0, [x0, #1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 1
%l = load half, ptr %g
%v = insertelement <vscale x 4 x half> poison, half %l, i32 0
ret <vscale x 4 x half> %v
}
define <vscale x 8 x half> @loadnxv8f16_offset(ptr %p) {
; CHECK-LABEL: loadnxv8f16_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur h0, [x0, #1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 1
%l = load half, ptr %g
%v = insertelement <vscale x 8 x half> poison, half %l, i32 0
ret <vscale x 8 x half> %v
}
define <vscale x 4 x bfloat> @loadnxv4bf16_offset(ptr %p) {
; CHECK-LABEL: loadnxv4bf16_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur h0, [x0, #1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 1
%l = load bfloat, ptr %g
%v = insertelement <vscale x 4 x bfloat> poison, bfloat %l, i32 0
ret <vscale x 4 x bfloat> %v
}
define <vscale x 8 x bfloat> @loadnxv8bf16_offset(ptr %p) {
; CHECK-LABEL: loadnxv8bf16_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur h0, [x0, #1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 1
%l = load bfloat, ptr %g
%v = insertelement <vscale x 8 x bfloat> poison, bfloat %l, i32 0
ret <vscale x 8 x bfloat> %v
}
define <vscale x 2 x float> @loadnxv2f32_offset(ptr %p) {
; CHECK-LABEL: loadnxv2f32_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur s0, [x0, #1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 1
%l = load float, ptr %g
%v = insertelement <vscale x 2 x float> poison, float %l, i32 0
ret <vscale x 2 x float> %v
}
define <vscale x 4 x float> @loadnxv4f32_offset(ptr %p) {
; CHECK-LABEL: loadnxv4f32_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur s0, [x0, #1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 1
%l = load float, ptr %g
%v = insertelement <vscale x 4 x float> poison, float %l, i32 0
ret <vscale x 4 x float> %v
}
define <vscale x 2 x double> @loadnxv2f64_offset(ptr %p) {
; CHECK-LABEL: loadnxv2f64_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur d0, [x0, #1]
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 1
%l = load double, ptr %g
%v = insertelement <vscale x 2 x double> poison, double %l, i32 0
ret <vscale x 2 x double> %v
}