Files
clang-p2996/llvm/test/CodeGen/AArch64/sve-insert-element.ll
Sander de Smalen 61510b51c3 Revert "[AArch64] Enable subreg liveness tracking by default."
This reverts commit 9c319d5bb4.

Some issues were discovered with the bootstrap builds, which
seem like they were caused by this commit. I'm reverting to investigate.
2024-12-12 17:22:15 +00:00

619 lines
21 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s
define <vscale x 16 x i8> @test_lane0_16xi8(<vscale x 16 x i8> %a) {
; CHECK-LABEL: test_lane0_16xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl1
; CHECK-NEXT: mov w8, #30 // =0x1e
; CHECK-NEXT: mov z0.b, p0/m, w8
; CHECK-NEXT: ret
%b = insertelement <vscale x 16 x i8> %a, i8 30, i32 0
ret <vscale x 16 x i8> %b
}
define <vscale x 8 x i16> @test_lane0_8xi16(<vscale x 8 x i16> %a) {
; CHECK-LABEL: test_lane0_8xi16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl1
; CHECK-NEXT: mov w8, #30 // =0x1e
; CHECK-NEXT: mov z0.h, p0/m, w8
; CHECK-NEXT: ret
%b = insertelement <vscale x 8 x i16> %a, i16 30, i32 0
ret <vscale x 8 x i16> %b
}
define <vscale x 4 x i32> @test_lane0_4xi32(<vscale x 4 x i32> %a) {
; CHECK-LABEL: test_lane0_4xi32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl1
; CHECK-NEXT: mov w8, #30 // =0x1e
; CHECK-NEXT: mov z0.s, p0/m, w8
; CHECK-NEXT: ret
%b = insertelement <vscale x 4 x i32> %a, i32 30, i32 0
ret <vscale x 4 x i32> %b
}
define <vscale x 2 x i64> @test_lane0_2xi64(<vscale x 2 x i64> %a) {
; CHECK-LABEL: test_lane0_2xi64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl1
; CHECK-NEXT: mov w8, #30 // =0x1e
; CHECK-NEXT: mov z0.d, p0/m, x8
; CHECK-NEXT: ret
%b = insertelement <vscale x 2 x i64> %a, i64 30, i32 0
ret <vscale x 2 x i64> %b
}
define <vscale x 2 x double> @test_lane0_2xf64(<vscale x 2 x double> %a) {
; CHECK-LABEL: test_lane0_2xf64:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov d1, #1.00000000
; CHECK-NEXT: ptrue p0.d, vl1
; CHECK-NEXT: mov z0.d, p0/m, z1.d
; CHECK-NEXT: ret
%b = insertelement <vscale x 2 x double> %a, double 1.0, i32 0
ret <vscale x 2 x double> %b
}
define <vscale x 4 x float> @test_lane0_4xf32(<vscale x 4 x float> %a) {
; CHECK-LABEL: test_lane0_4xf32:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov s1, #1.00000000
; CHECK-NEXT: ptrue p0.s, vl1
; CHECK-NEXT: mov z0.s, p0/m, z1.s
; CHECK-NEXT: ret
%b = insertelement <vscale x 4 x float> %a, float 1.0, i32 0
ret <vscale x 4 x float> %b
}
define <vscale x 8 x half> @test_lane0_8xf16(<vscale x 8 x half> %a) {
; CHECK-LABEL: test_lane0_8xf16:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov h1, #1.00000000
; CHECK-NEXT: ptrue p0.h, vl1
; CHECK-NEXT: mov z0.h, p0/m, z1.h
; CHECK-NEXT: ret
%b = insertelement <vscale x 8 x half> %a, half 1.0, i32 0
ret <vscale x 8 x half> %b
}
define <vscale x 8 x bfloat> @test_lane0_8xbf16(<vscale x 8 x bfloat> %a, bfloat %x) {
; CHECK-LABEL: test_lane0_8xbf16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl1
; CHECK-NEXT: // kill: def $h1 killed $h1 def $z1
; CHECK-NEXT: mov z0.h, p0/m, z1.h
; CHECK-NEXT: ret
%b = insertelement <vscale x 8 x bfloat> %a, bfloat %x, i32 0
ret <vscale x 8 x bfloat> %b
}
; Undefined lane insert
define <vscale x 2 x i64> @test_lane4_2xi64(<vscale x 2 x i64> %a) {
; CHECK-LABEL: test_lane4_2xi64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #4 // =0x4
; CHECK-NEXT: index z1.d, #0, #1
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z2.d, x8
; CHECK-NEXT: mov w8, #30 // =0x1e
; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
; CHECK-NEXT: mov z0.d, p0/m, x8
; CHECK-NEXT: ret
%b = insertelement <vscale x 2 x i64> %a, i64 30, i32 4
ret <vscale x 2 x i64> %b
}
; Undefined lane insert
define <vscale x 8 x half> @test_lane9_8xf16(<vscale x 8 x half> %a) {
; CHECK-LABEL: test_lane9_8xf16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #9 // =0x9
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z2.h, w8
; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h
; CHECK-NEXT: fmov h1, #1.00000000
; CHECK-NEXT: mov z0.h, p0/m, h1
; CHECK-NEXT: ret
%b = insertelement <vscale x 8 x half> %a, half 1.0, i32 9
ret <vscale x 8 x half> %b
}
define <vscale x 8 x bfloat> @test_lane9_8xbf16(<vscale x 8 x bfloat> %a, bfloat %x) {
; CHECK-LABEL: test_lane9_8xbf16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #9 // =0x9
; CHECK-NEXT: index z2.h, #0, #1
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z3.h, w8
; CHECK-NEXT: cmpeq p0.h, p0/z, z2.h, z3.h
; CHECK-NEXT: mov z0.h, p0/m, h1
; CHECK-NEXT: ret
%b = insertelement <vscale x 8 x bfloat> %a, bfloat %x, i32 9
ret <vscale x 8 x bfloat> %b
}
define <vscale x 16 x i8> @test_lane1_16xi8(<vscale x 16 x i8> %a) {
; CHECK-LABEL: test_lane1_16xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: mov w8, #30 // =0x1e
; CHECK-NEXT: cmpeq p0.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p0/m, w8
; CHECK-NEXT: ret
%b = insertelement <vscale x 16 x i8> %a, i8 30, i32 1
ret <vscale x 16 x i8> %b
}
define <vscale x 16 x i8> @test_lanex_16xi8(<vscale x 16 x i8> %a, i32 %x) {
; CHECK-LABEL: test_lanex_16xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: mov w8, #30 // =0x1e
; CHECK-NEXT: cmpeq p0.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p0/m, w8
; CHECK-NEXT: ret
%b = insertelement <vscale x 16 x i8> %a, i8 30, i32 %x
ret <vscale x 16 x i8> %b
}
; Redundant lane insert
define <vscale x 4 x i32> @extract_insert_4xi32(<vscale x 4 x i32> %a) {
; CHECK-LABEL: extract_insert_4xi32:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
%b = extractelement <vscale x 4 x i32> %a, i32 2
%c = insertelement <vscale x 4 x i32> %a, i32 %b, i32 2
ret <vscale x 4 x i32> %c
}
define <vscale x 8 x i16> @test_lane6_undef_8xi16(i16 %a) {
; CHECK-LABEL: test_lane6_undef_8xi16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #6 // =0x6
; CHECK-NEXT: index z0.h, #0, #1
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, z1.h
; CHECK-NEXT: mov z0.h, p0/m, w0
; CHECK-NEXT: ret
%b = insertelement <vscale x 8 x i16> undef, i16 %a, i32 6
ret <vscale x 8 x i16> %b
}
define <vscale x 16 x i8> @test_lane0_undef_16xi8(i8 %a) {
; CHECK-LABEL: test_lane0_undef_16xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov s0, w0
; CHECK-NEXT: ret
%b = insertelement <vscale x 16 x i8> undef, i8 %a, i32 0
ret <vscale x 16 x i8> %b
}
define <vscale x 16 x i8> @test_insert0_of_extract0_16xi8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: test_insert0_of_extract0_16xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov w8, s1
; CHECK-NEXT: ptrue p0.b, vl1
; CHECK-NEXT: mov z0.b, p0/m, w8
; CHECK-NEXT: ret
%c = extractelement <vscale x 16 x i8> %b, i32 0
%d = insertelement <vscale x 16 x i8> %a, i8 %c, i32 0
ret <vscale x 16 x i8> %d
}
define <vscale x 16 x i8> @test_insert64_of_extract64_16xi8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: test_insert64_of_extract64_16xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #64 // =0x40
; CHECK-NEXT: whilels p0.b, xzr, x8
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: lastb w9, p0, z1.b
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: cmpeq p0.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p0/m, w9
; CHECK-NEXT: ret
%c = extractelement <vscale x 16 x i8> %b, i32 64
%d = insertelement <vscale x 16 x i8> %a, i8 %c, i32 64
ret <vscale x 16 x i8> %d
}
define <vscale x 16 x i8> @test_insert3_of_extract1_16xi8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: test_insert3_of_extract1_16xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #3 // =0x3
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov z3.b, w8
; CHECK-NEXT: umov w8, v1.b[1]
; CHECK-NEXT: cmpeq p0.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p0/m, w8
; CHECK-NEXT: ret
%c = extractelement <vscale x 16 x i8> %b, i32 1
%d = insertelement <vscale x 16 x i8> %a, i8 %c, i32 3
ret <vscale x 16 x i8> %d
}
define <vscale x 8 x half> @test_insert_into_undef_nxv8f16(half %a) {
; CHECK-LABEL: test_insert_into_undef_nxv8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
; CHECK-NEXT: ret
%b = insertelement <vscale x 8 x half> undef, half %a, i32 0
ret <vscale x 8 x half> %b
}
define <vscale x 4 x half> @test_insert_into_undef_nxv4f16(half %a) {
; CHECK-LABEL: test_insert_into_undef_nxv4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
; CHECK-NEXT: ret
%b = insertelement <vscale x 4 x half> undef, half %a, i32 0
ret <vscale x 4 x half> %b
}
define <vscale x 2 x half> @test_insert_into_undef_nxv2f16(half %a) {
; CHECK-LABEL: test_insert_into_undef_nxv2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
; CHECK-NEXT: ret
%b = insertelement <vscale x 2 x half> undef, half %a, i32 0
ret <vscale x 2 x half> %b
}
define <vscale x 8 x bfloat> @test_insert_into_undef_nxv8bf16(bfloat %a) {
; CHECK-LABEL: test_insert_into_undef_nxv8bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
; CHECK-NEXT: ret
%b = insertelement <vscale x 8 x bfloat> undef, bfloat %a, i32 0
ret <vscale x 8 x bfloat> %b
}
define <vscale x 4 x bfloat> @test_insert_into_undef_nxv4bf16(bfloat %a) {
; CHECK-LABEL: test_insert_into_undef_nxv4bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
; CHECK-NEXT: ret
%b = insertelement <vscale x 4 x bfloat> undef, bfloat %a, i32 0
ret <vscale x 4 x bfloat> %b
}
define <vscale x 2 x bfloat> @test_insert_into_undef_nxv2bf16(bfloat %a) {
; CHECK-LABEL: test_insert_into_undef_nxv2bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
; CHECK-NEXT: ret
%b = insertelement <vscale x 2 x bfloat> undef, bfloat %a, i32 0
ret <vscale x 2 x bfloat> %b
}
define <vscale x 4 x float> @test_insert_into_undef_nxv4f32(float %a) {
; CHECK-LABEL: test_insert_into_undef_nxv4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
; CHECK-NEXT: ret
%b = insertelement <vscale x 4 x float> undef, float %a, i32 0
ret <vscale x 4 x float> %b
}
define <vscale x 2 x float> @test_insert_into_undef_nxv2f32(float %a) {
; CHECK-LABEL: test_insert_into_undef_nxv2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
; CHECK-NEXT: ret
%b = insertelement <vscale x 2 x float> undef, float %a, i32 0
ret <vscale x 2 x float> %b
}
define <vscale x 2 x double> @test_insert_into_undef_nxv2f64(double %a) {
; CHECK-LABEL: test_insert_into_undef_nxv2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ret
%b = insertelement <vscale x 2 x double> undef, double %a, i32 0
ret <vscale x 2 x double> %b
}
; Insert scalar at index
define <vscale x 2 x half> @test_insert_with_index_nxv2f16(half %h, i64 %idx) {
; CHECK-LABEL: test_insert_with_index_nxv2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: index z1.d, #0, #1
; CHECK-NEXT: mov z2.d, x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
; CHECK-NEXT: mov z0.h, p0/m, h0
; CHECK-NEXT: ret
%res = insertelement <vscale x 2 x half> undef, half %h, i64 %idx
ret <vscale x 2 x half> %res
}
define <vscale x 4 x half> @test_insert_with_index_nxv4f16(half %h, i64 %idx) {
; CHECK-LABEL: test_insert_with_index_nxv4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w0
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.h, p0/m, h0
; CHECK-NEXT: ret
%res = insertelement <vscale x 4 x half> undef, half %h, i64 %idx
ret <vscale x 4 x half> %res
}
define <vscale x 8 x half> @test_insert_with_index_nxv8f16(half %h, i64 %idx) {
; CHECK-LABEL: test_insert_with_index_nxv8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w0
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p0/m, h0
; CHECK-NEXT: ret
%res = insertelement <vscale x 8 x half> undef, half %h, i64 %idx
ret <vscale x 8 x half> %res
}
define <vscale x 2 x bfloat> @test_insert_with_index_nxv2bf16(bfloat %h, i64 %idx) {
; CHECK-LABEL: test_insert_with_index_nxv2bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: index z1.d, #0, #1
; CHECK-NEXT: mov z2.d, x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
; CHECK-NEXT: mov z0.h, p0/m, h0
; CHECK-NEXT: ret
%res = insertelement <vscale x 2 x bfloat> undef, bfloat %h, i64 %idx
ret <vscale x 2 x bfloat> %res
}
define <vscale x 4 x bfloat> @test_insert_with_index_nxv4bf16(bfloat %h, i64 %idx) {
; CHECK-LABEL: test_insert_with_index_nxv4bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w0
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.h, p0/m, h0
; CHECK-NEXT: ret
%res = insertelement <vscale x 4 x bfloat> undef, bfloat %h, i64 %idx
ret <vscale x 4 x bfloat> %res
}
define <vscale x 8 x bfloat> @test_insert_with_index_nxv8bf16(bfloat %h, i64 %idx) {
; CHECK-LABEL: test_insert_with_index_nxv8bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w0
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p0/m, h0
; CHECK-NEXT: ret
%res = insertelement <vscale x 8 x bfloat> undef, bfloat %h, i64 %idx
ret <vscale x 8 x bfloat> %res
}
define <vscale x 2 x float> @test_insert_with_index_nxv2f32(float %f, i64 %idx) {
; CHECK-LABEL: test_insert_with_index_nxv2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: index z1.d, #0, #1
; CHECK-NEXT: mov z2.d, x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
; CHECK-NEXT: mov z0.s, p0/m, s0
; CHECK-NEXT: ret
%res = insertelement <vscale x 2 x float> undef, float %f, i64 %idx
ret <vscale x 2 x float> %res
}
define <vscale x 4 x float> @test_insert_with_index_nxv4f32(float %f, i64 %idx) {
; CHECK-LABEL: test_insert_with_index_nxv4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w0
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p0/m, s0
; CHECK-NEXT: ret
%res = insertelement <vscale x 4 x float> undef, float %f, i64 %idx
ret <vscale x 4 x float> %res
}
define <vscale x 2 x double> @test_insert_with_index_nxv2f64(double %d, i64 %idx) {
; CHECK-LABEL: test_insert_with_index_nxv2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: index z1.d, #0, #1
; CHECK-NEXT: mov z2.d, x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
; CHECK-NEXT: mov z0.d, p0/m, d0
; CHECK-NEXT: ret
%res = insertelement <vscale x 2 x double> undef, double %d, i64 %idx
ret <vscale x 2 x double> %res
}
;Predicate insert
define <vscale x 2 x i1> @test_predicate_insert_2xi1_immediate (<vscale x 2 x i1> %val, i1 %elt) {
; CHECK-LABEL: test_predicate_insert_2xi1_immediate:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1
; CHECK-NEXT: ptrue p0.d, vl1
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: mov z0.d, p0/m, x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: and z0.d, z0.d, #0x1
; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
; CHECK-NEXT: ret
%res = insertelement <vscale x 2 x i1> %val, i1 %elt, i32 0
ret <vscale x 2 x i1> %res
}
define <vscale x 4 x i1> @test_predicate_insert_4xi1_immediate (<vscale x 4 x i1> %val, i1 %elt) {
; CHECK-LABEL: test_predicate_insert_4xi1_immediate:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #2 // =0x2
; CHECK-NEXT: index z0.s, #0, #1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: cmpeq p2.s, p1/z, z0.s, z1.s
; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1
; CHECK-NEXT: mov z0.s, p2/m, w0
; CHECK-NEXT: and z0.s, z0.s, #0x1
; CHECK-NEXT: cmpne p0.s, p1/z, z0.s, #0
; CHECK-NEXT: ret
%res = insertelement <vscale x 4 x i1> %val, i1 %elt, i32 2
ret <vscale x 4 x i1> %res
}
define <vscale x 8 x i1> @test_predicate_insert_8xi1_immediate (<vscale x 8 x i1> %val, i32 %idx) {
; CHECK-LABEL: test_predicate_insert_8xi1_immediate:
; CHECK: // %bb.0:
; CHECK-NEXT: index z0.h, #0, #1
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: cmpeq p2.h, p1/z, z0.h, z1.h
; CHECK-NEXT: mov z0.h, p0/z, #1 // =0x1
; CHECK-NEXT: mov z0.h, p2/m, w8
; CHECK-NEXT: and z0.h, z0.h, #0x1
; CHECK-NEXT: cmpne p0.h, p1/z, z0.h, #0
; CHECK-NEXT: ret
%res = insertelement <vscale x 8 x i1> %val, i1 1, i32 %idx
ret <vscale x 8 x i1> %res
}
define <vscale x 16 x i1> @test_predicate_insert_16xi1_immediate (<vscale x 16 x i1> %val) {
; CHECK-LABEL: test_predicate_insert_16xi1_immediate:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #4 // =0x4
; CHECK-NEXT: index z0.b, #0, #1
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: mov z1.b, w8
; CHECK-NEXT: mov w8, wzr
; CHECK-NEXT: cmpeq p2.b, p1/z, z0.b, z1.b
; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1
; CHECK-NEXT: mov z0.b, p2/m, w8
; CHECK-NEXT: and z0.b, z0.b, #0x1
; CHECK-NEXT: cmpne p0.b, p1/z, z0.b, #0
; CHECK-NEXT: ret
%res = insertelement <vscale x 16 x i1> %val, i1 0, i32 4
ret <vscale x 16 x i1> %res
}
define <vscale x 2 x i1> @test_predicate_insert_2xi1(<vscale x 2 x i1> %val, i1 %elt, i32 %idx) {
; CHECK-LABEL: test_predicate_insert_2xi1:
; CHECK: // %bb.0:
; CHECK-NEXT: index z0.d, #0, #1
; CHECK-NEXT: mov w8, w1
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: cmpeq p2.d, p1/z, z0.d, z1.d
; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1
; CHECK-NEXT: mov z0.d, p2/m, x0
; CHECK-NEXT: and z0.d, z0.d, #0x1
; CHECK-NEXT: cmpne p0.d, p1/z, z0.d, #0
; CHECK-NEXT: ret
%res = insertelement <vscale x 2 x i1> %val, i1 %elt, i32 %idx
ret <vscale x 2 x i1> %res
}
define <vscale x 4 x i1> @test_predicate_insert_4xi1(<vscale x 4 x i1> %val, i1 %elt, i32 %idx) {
; CHECK-LABEL: test_predicate_insert_4xi1:
; CHECK: // %bb.0:
; CHECK-NEXT: index z0.s, #0, #1
; CHECK-NEXT: mov w8, w1
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: cmpeq p2.s, p1/z, z0.s, z1.s
; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1
; CHECK-NEXT: mov z0.s, p2/m, w0
; CHECK-NEXT: and z0.s, z0.s, #0x1
; CHECK-NEXT: cmpne p0.s, p1/z, z0.s, #0
; CHECK-NEXT: ret
%res = insertelement <vscale x 4 x i1> %val, i1 %elt, i32 %idx
ret <vscale x 4 x i1> %res
}
define <vscale x 8 x i1> @test_predicate_insert_8xi1(<vscale x 8 x i1> %val, i1 %elt, i32 %idx) {
; CHECK-LABEL: test_predicate_insert_8xi1:
; CHECK: // %bb.0:
; CHECK-NEXT: index z0.h, #0, #1
; CHECK-NEXT: mov w8, w1
; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: cmpeq p2.h, p1/z, z0.h, z1.h
; CHECK-NEXT: mov z0.h, p0/z, #1 // =0x1
; CHECK-NEXT: mov z0.h, p2/m, w0
; CHECK-NEXT: and z0.h, z0.h, #0x1
; CHECK-NEXT: cmpne p0.h, p1/z, z0.h, #0
; CHECK-NEXT: ret
%res = insertelement <vscale x 8 x i1> %val, i1 %elt, i32 %idx
ret <vscale x 8 x i1> %res
}
define <vscale x 16 x i1> @test_predicate_insert_16xi1(<vscale x 16 x i1> %val, i1 %elt, i32 %idx) {
; CHECK-LABEL: test_predicate_insert_16xi1:
; CHECK: // %bb.0:
; CHECK-NEXT: index z0.b, #0, #1
; CHECK-NEXT: mov w8, w1
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: mov z1.b, w8
; CHECK-NEXT: cmpeq p2.b, p1/z, z0.b, z1.b
; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1
; CHECK-NEXT: mov z0.b, p2/m, w0
; CHECK-NEXT: and z0.b, z0.b, #0x1
; CHECK-NEXT: cmpne p0.b, p1/z, z0.b, #0
; CHECK-NEXT: ret
%res = insertelement <vscale x 16 x i1> %val, i1 %elt, i32 %idx
ret <vscale x 16 x i1> %res
}
define <vscale x 32 x i1> @test_predicate_insert_32xi1(<vscale x 32 x i1> %val, i1 %elt, i32 %idx) uwtable {
; CHECK-LABEL: test_predicate_insert_32xi1:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: rdvl x8, #2
; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1
; CHECK-NEXT: mov z1.b, p0/z, #1 // =0x1
; CHECK-NEXT: sub x8, x8, #1
; CHECK-NEXT: mov w9, w1
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: cmp x9, x8
; CHECK-NEXT: csel x8, x9, x8, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: st1b { z0.b }, p1, [sp, #1, mul vl]
; CHECK-NEXT: st1b { z1.b }, p1, [sp]
; CHECK-NEXT: strb w0, [x9, x8]
; CHECK-NEXT: ld1b { z0.b }, p1/z, [sp]
; CHECK-NEXT: ld1b { z1.b }, p1/z, [sp, #1, mul vl]
; CHECK-NEXT: and z0.b, z0.b, #0x1
; CHECK-NEXT: and z1.b, z1.b, #0x1
; CHECK-NEXT: cmpne p0.b, p1/z, z0.b, #0
; CHECK-NEXT: cmpne p1.b, p1/z, z1.b, #0
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: .cfi_def_cfa wsp, 16
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w29
; CHECK-NEXT: ret
%res = insertelement <vscale x 32 x i1> %val, i1 %elt, i32 %idx
ret <vscale x 32 x i1> %res
}