Files
clang-p2996/llvm/test/CodeGen/AArch64/sve-insert-element.ll
Harvin Iriawan db158c7c83 [AArch64] Update generic sched model to A510
Refresh of the generic scheduling model to use A510 instead of A55.
  Main benefits are to the little core, and introducing SVE scheduling information.
  Changes tested on various OoO cores, no performance degradation is seen.

  Differential Revision: https://reviews.llvm.org/D156799
2023-08-21 12:25:15 +01:00

619 lines
21 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s
define <vscale x 16 x i8> @test_lane0_16xi8(<vscale x 16 x i8> %a) {
; CHECK-LABEL: test_lane0_16xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl1
; CHECK-NEXT: mov w8, #30 // =0x1e
; CHECK-NEXT: mov z0.b, p0/m, w8
; CHECK-NEXT: ret
%b = insertelement <vscale x 16 x i8> %a, i8 30, i32 0
ret <vscale x 16 x i8> %b
}
define <vscale x 8 x i16> @test_lane0_8xi16(<vscale x 8 x i16> %a) {
; CHECK-LABEL: test_lane0_8xi16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl1
; CHECK-NEXT: mov w8, #30 // =0x1e
; CHECK-NEXT: mov z0.h, p0/m, w8
; CHECK-NEXT: ret
%b = insertelement <vscale x 8 x i16> %a, i16 30, i32 0
ret <vscale x 8 x i16> %b
}
define <vscale x 4 x i32> @test_lane0_4xi32(<vscale x 4 x i32> %a) {
; CHECK-LABEL: test_lane0_4xi32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl1
; CHECK-NEXT: mov w8, #30 // =0x1e
; CHECK-NEXT: mov z0.s, p0/m, w8
; CHECK-NEXT: ret
%b = insertelement <vscale x 4 x i32> %a, i32 30, i32 0
ret <vscale x 4 x i32> %b
}
define <vscale x 2 x i64> @test_lane0_2xi64(<vscale x 2 x i64> %a) {
; CHECK-LABEL: test_lane0_2xi64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl1
; CHECK-NEXT: mov w8, #30 // =0x1e
; CHECK-NEXT: mov z0.d, p0/m, x8
; CHECK-NEXT: ret
%b = insertelement <vscale x 2 x i64> %a, i64 30, i32 0
ret <vscale x 2 x i64> %b
}
define <vscale x 2 x double> @test_lane0_2xf64(<vscale x 2 x double> %a) {
; CHECK-LABEL: test_lane0_2xf64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl1
; CHECK-NEXT: fmov d1, #1.00000000
; CHECK-NEXT: mov z0.d, p0/m, z1.d
; CHECK-NEXT: ret
%b = insertelement <vscale x 2 x double> %a, double 1.0, i32 0
ret <vscale x 2 x double> %b
}
define <vscale x 4 x float> @test_lane0_4xf32(<vscale x 4 x float> %a) {
; CHECK-LABEL: test_lane0_4xf32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl1
; CHECK-NEXT: fmov s1, #1.00000000
; CHECK-NEXT: mov z0.s, p0/m, z1.s
; CHECK-NEXT: ret
%b = insertelement <vscale x 4 x float> %a, float 1.0, i32 0
ret <vscale x 4 x float> %b
}
define <vscale x 8 x half> @test_lane0_8xf16(<vscale x 8 x half> %a) {
; CHECK-LABEL: test_lane0_8xf16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl1
; CHECK-NEXT: fmov h1, #1.00000000
; CHECK-NEXT: mov z0.h, p0/m, z1.h
; CHECK-NEXT: ret
%b = insertelement <vscale x 8 x half> %a, half 1.0, i32 0
ret <vscale x 8 x half> %b
}
define <vscale x 8 x bfloat> @test_lane0_8xbf16(<vscale x 8 x bfloat> %a, bfloat %x) {
; CHECK-LABEL: test_lane0_8xbf16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl1
; CHECK-NEXT: // kill: def $h1 killed $h1 def $z1
; CHECK-NEXT: mov z0.h, p0/m, z1.h
; CHECK-NEXT: ret
%b = insertelement <vscale x 8 x bfloat> %a, bfloat %x, i32 0
ret <vscale x 8 x bfloat> %b
}
; Undefined lane insert
define <vscale x 2 x i64> @test_lane4_2xi64(<vscale x 2 x i64> %a) {
; CHECK-LABEL: test_lane4_2xi64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov w8, #4 // =0x4
; CHECK-NEXT: index z1.d, #0, #1
; CHECK-NEXT: mov z2.d, x8
; CHECK-NEXT: mov w8, #30 // =0x1e
; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
; CHECK-NEXT: mov z0.d, p0/m, x8
; CHECK-NEXT: ret
%b = insertelement <vscale x 2 x i64> %a, i64 30, i32 4
ret <vscale x 2 x i64> %b
}
; Undefined lane insert
define <vscale x 8 x half> @test_lane9_8xf16(<vscale x 8 x half> %a) {
; CHECK-LABEL: test_lane9_8xf16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov w8, #9 // =0x9
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w8
; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h
; CHECK-NEXT: fmov h1, #1.00000000
; CHECK-NEXT: mov z0.h, p0/m, h1
; CHECK-NEXT: ret
%b = insertelement <vscale x 8 x half> %a, half 1.0, i32 9
ret <vscale x 8 x half> %b
}
define <vscale x 8 x bfloat> @test_lane9_8xbf16(<vscale x 8 x bfloat> %a, bfloat %x) {
; CHECK-LABEL: test_lane9_8xbf16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov w8, #9 // =0x9
; CHECK-NEXT: index z2.h, #0, #1
; CHECK-NEXT: mov z3.h, w8
; CHECK-NEXT: cmpeq p0.h, p0/z, z2.h, z3.h
; CHECK-NEXT: mov z0.h, p0/m, h1
; CHECK-NEXT: ret
%b = insertelement <vscale x 8 x bfloat> %a, bfloat %x, i32 9
ret <vscale x 8 x bfloat> %b
}
define <vscale x 16 x i8> @test_lane1_16xi8(<vscale x 16 x i8> %a) {
; CHECK-LABEL: test_lane1_16xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: mov w8, #30 // =0x1e
; CHECK-NEXT: cmpeq p0.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p0/m, w8
; CHECK-NEXT: ret
%b = insertelement <vscale x 16 x i8> %a, i8 30, i32 1
ret <vscale x 16 x i8> %b
}
define <vscale x 16 x i8> @test_lanex_16xi8(<vscale x 16 x i8> %a, i32 %x) {
; CHECK-LABEL: test_lanex_16xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: mov w8, #30 // =0x1e
; CHECK-NEXT: cmpeq p0.b, p0/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p0/m, w8
; CHECK-NEXT: ret
%b = insertelement <vscale x 16 x i8> %a, i8 30, i32 %x
ret <vscale x 16 x i8> %b
}
; Redundant lane insert
define <vscale x 4 x i32> @extract_insert_4xi32(<vscale x 4 x i32> %a) {
; CHECK-LABEL: extract_insert_4xi32:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
%b = extractelement <vscale x 4 x i32> %a, i32 2
%c = insertelement <vscale x 4 x i32> %a, i32 %b, i32 2
ret <vscale x 4 x i32> %c
}
define <vscale x 8 x i16> @test_lane6_undef_8xi16(i16 %a) {
; CHECK-LABEL: test_lane6_undef_8xi16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov w8, #6 // =0x6
; CHECK-NEXT: index z0.h, #0, #1
; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, z1.h
; CHECK-NEXT: mov z0.h, p0/m, w0
; CHECK-NEXT: ret
%b = insertelement <vscale x 8 x i16> undef, i16 %a, i32 6
ret <vscale x 8 x i16> %b
}
define <vscale x 16 x i8> @test_lane0_undef_16xi8(i8 %a) {
; CHECK-LABEL: test_lane0_undef_16xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov s0, w0
; CHECK-NEXT: ret
%b = insertelement <vscale x 16 x i8> undef, i8 %a, i32 0
ret <vscale x 16 x i8> %b
}
define <vscale x 16 x i8> @test_insert0_of_extract0_16xi8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: test_insert0_of_extract0_16xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl1
; CHECK-NEXT: fmov w8, s1
; CHECK-NEXT: mov z0.b, p0/m, w8
; CHECK-NEXT: ret
%c = extractelement <vscale x 16 x i8> %b, i32 0
%d = insertelement <vscale x 16 x i8> %a, i8 %c, i32 0
ret <vscale x 16 x i8> %d
}
define <vscale x 16 x i8> @test_insert64_of_extract64_16xi8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: test_insert64_of_extract64_16xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #64 // =0x40
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: whilels p0.b, xzr, x8
; CHECK-NEXT: mov z2.b, w8
; CHECK-NEXT: lastb w9, p0, z1.b
; CHECK-NEXT: index z1.b, #0, #1
; CHECK-NEXT: cmpeq p0.b, p1/z, z1.b, z2.b
; CHECK-NEXT: mov z0.b, p0/m, w9
; CHECK-NEXT: ret
%c = extractelement <vscale x 16 x i8> %b, i32 64
%d = insertelement <vscale x 16 x i8> %a, i8 %c, i32 64
ret <vscale x 16 x i8> %d
}
define <vscale x 16 x i8> @test_insert3_of_extract1_16xi8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: test_insert3_of_extract1_16xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov w8, #3 // =0x3
; CHECK-NEXT: index z2.b, #0, #1
; CHECK-NEXT: mov z3.b, w8
; CHECK-NEXT: umov w8, v1.b[1]
; CHECK-NEXT: cmpeq p0.b, p0/z, z2.b, z3.b
; CHECK-NEXT: mov z0.b, p0/m, w8
; CHECK-NEXT: ret
%c = extractelement <vscale x 16 x i8> %b, i32 1
%d = insertelement <vscale x 16 x i8> %a, i8 %c, i32 3
ret <vscale x 16 x i8> %d
}
define <vscale x 8 x half> @test_insert_into_undef_nxv8f16(half %a) {
; CHECK-LABEL: test_insert_into_undef_nxv8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
; CHECK-NEXT: ret
%b = insertelement <vscale x 8 x half> undef, half %a, i32 0
ret <vscale x 8 x half> %b
}
define <vscale x 4 x half> @test_insert_into_undef_nxv4f16(half %a) {
; CHECK-LABEL: test_insert_into_undef_nxv4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
; CHECK-NEXT: ret
%b = insertelement <vscale x 4 x half> undef, half %a, i32 0
ret <vscale x 4 x half> %b
}
define <vscale x 2 x half> @test_insert_into_undef_nxv2f16(half %a) {
; CHECK-LABEL: test_insert_into_undef_nxv2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
; CHECK-NEXT: ret
%b = insertelement <vscale x 2 x half> undef, half %a, i32 0
ret <vscale x 2 x half> %b
}
define <vscale x 8 x bfloat> @test_insert_into_undef_nxv8bf16(bfloat %a) {
; CHECK-LABEL: test_insert_into_undef_nxv8bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
; CHECK-NEXT: ret
%b = insertelement <vscale x 8 x bfloat> undef, bfloat %a, i32 0
ret <vscale x 8 x bfloat> %b
}
define <vscale x 4 x bfloat> @test_insert_into_undef_nxv4bf16(bfloat %a) {
; CHECK-LABEL: test_insert_into_undef_nxv4bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
; CHECK-NEXT: ret
%b = insertelement <vscale x 4 x bfloat> undef, bfloat %a, i32 0
ret <vscale x 4 x bfloat> %b
}
define <vscale x 2 x bfloat> @test_insert_into_undef_nxv2bf16(bfloat %a) {
; CHECK-LABEL: test_insert_into_undef_nxv2bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
; CHECK-NEXT: ret
%b = insertelement <vscale x 2 x bfloat> undef, bfloat %a, i32 0
ret <vscale x 2 x bfloat> %b
}
define <vscale x 4 x float> @test_insert_into_undef_nxv4f32(float %a) {
; CHECK-LABEL: test_insert_into_undef_nxv4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
; CHECK-NEXT: ret
%b = insertelement <vscale x 4 x float> undef, float %a, i32 0
ret <vscale x 4 x float> %b
}
define <vscale x 2 x float> @test_insert_into_undef_nxv2f32(float %a) {
; CHECK-LABEL: test_insert_into_undef_nxv2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
; CHECK-NEXT: ret
%b = insertelement <vscale x 2 x float> undef, float %a, i32 0
ret <vscale x 2 x float> %b
}
define <vscale x 2 x double> @test_insert_into_undef_nxv2f64(double %a) {
; CHECK-LABEL: test_insert_into_undef_nxv2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ret
%b = insertelement <vscale x 2 x double> undef, double %a, i32 0
ret <vscale x 2 x double> %b
}
; Insert scalar at index
define <vscale x 2 x half> @test_insert_with_index_nxv2f16(half %h, i64 %idx) {
; CHECK-LABEL: test_insert_with_index_nxv2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: index z1.d, #0, #1
; CHECK-NEXT: mov z2.d, x0
; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
; CHECK-NEXT: mov z0.h, p0/m, h0
; CHECK-NEXT: ret
%res = insertelement <vscale x 2 x half> undef, half %h, i64 %idx
ret <vscale x 2 x half> %res
}
define <vscale x 4 x half> @test_insert_with_index_nxv4f16(half %h, i64 %idx) {
; CHECK-LABEL: test_insert_with_index_nxv4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w0
; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.h, p0/m, h0
; CHECK-NEXT: ret
%res = insertelement <vscale x 4 x half> undef, half %h, i64 %idx
ret <vscale x 4 x half> %res
}
define <vscale x 8 x half> @test_insert_with_index_nxv8f16(half %h, i64 %idx) {
; CHECK-LABEL: test_insert_with_index_nxv8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w0
; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p0/m, h0
; CHECK-NEXT: ret
%res = insertelement <vscale x 8 x half> undef, half %h, i64 %idx
ret <vscale x 8 x half> %res
}
define <vscale x 2 x bfloat> @test_insert_with_index_nxv2bf16(bfloat %h, i64 %idx) {
; CHECK-LABEL: test_insert_with_index_nxv2bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: index z1.d, #0, #1
; CHECK-NEXT: mov z2.d, x0
; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
; CHECK-NEXT: mov z0.h, p0/m, h0
; CHECK-NEXT: ret
%res = insertelement <vscale x 2 x bfloat> undef, bfloat %h, i64 %idx
ret <vscale x 2 x bfloat> %res
}
define <vscale x 4 x bfloat> @test_insert_with_index_nxv4bf16(bfloat %h, i64 %idx) {
; CHECK-LABEL: test_insert_with_index_nxv4bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w0
; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.h, p0/m, h0
; CHECK-NEXT: ret
%res = insertelement <vscale x 4 x bfloat> undef, bfloat %h, i64 %idx
ret <vscale x 4 x bfloat> %res
}
define <vscale x 8 x bfloat> @test_insert_with_index_nxv8bf16(bfloat %h, i64 %idx) {
; CHECK-LABEL: test_insert_with_index_nxv8bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w0
; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p0/m, h0
; CHECK-NEXT: ret
%res = insertelement <vscale x 8 x bfloat> undef, bfloat %h, i64 %idx
ret <vscale x 8 x bfloat> %res
}
define <vscale x 2 x float> @test_insert_with_index_nxv2f32(float %f, i64 %idx) {
; CHECK-LABEL: test_insert_with_index_nxv2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: index z1.d, #0, #1
; CHECK-NEXT: mov z2.d, x0
; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
; CHECK-NEXT: mov z0.s, p0/m, s0
; CHECK-NEXT: ret
%res = insertelement <vscale x 2 x float> undef, float %f, i64 %idx
ret <vscale x 2 x float> %res
}
define <vscale x 4 x float> @test_insert_with_index_nxv4f32(float %f, i64 %idx) {
; CHECK-LABEL: test_insert_with_index_nxv4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w0
; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p0/m, s0
; CHECK-NEXT: ret
%res = insertelement <vscale x 4 x float> undef, float %f, i64 %idx
ret <vscale x 4 x float> %res
}
define <vscale x 2 x double> @test_insert_with_index_nxv2f64(double %d, i64 %idx) {
; CHECK-LABEL: test_insert_with_index_nxv2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: index z1.d, #0, #1
; CHECK-NEXT: mov z2.d, x0
; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
; CHECK-NEXT: mov z0.d, p0/m, d0
; CHECK-NEXT: ret
%res = insertelement <vscale x 2 x double> undef, double %d, i64 %idx
ret <vscale x 2 x double> %res
}
;Predicate insert
define <vscale x 2 x i1> @test_predicate_insert_2xi1_immediate (<vscale x 2 x i1> %val, i1 %elt) {
; CHECK-LABEL: test_predicate_insert_2xi1_immediate:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.d, vl1
; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z0.d, p1/m, x0
; CHECK-NEXT: and z0.d, z0.d, #0x1
; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
; CHECK-NEXT: ret
%res = insertelement <vscale x 2 x i1> %val, i1 %elt, i32 0
ret <vscale x 2 x i1> %res
}
define <vscale x 4 x i1> @test_predicate_insert_4xi1_immediate (<vscale x 4 x i1> %val, i1 %elt) {
; CHECK-LABEL: test_predicate_insert_4xi1_immediate:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov w8, #2 // =0x2
; CHECK-NEXT: index z0.s, #0, #1
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: cmpeq p2.s, p1/z, z0.s, z1.s
; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1
; CHECK-NEXT: mov z0.s, p2/m, w0
; CHECK-NEXT: and z0.s, z0.s, #0x1
; CHECK-NEXT: cmpne p0.s, p1/z, z0.s, #0
; CHECK-NEXT: ret
%res = insertelement <vscale x 4 x i1> %val, i1 %elt, i32 2
ret <vscale x 4 x i1> %res
}
define <vscale x 8 x i1> @test_predicate_insert_8xi1_immediate (<vscale x 8 x i1> %val, i32 %idx) {
; CHECK-LABEL: test_predicate_insert_8xi1_immediate:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: index z0.h, #0, #1
; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: cmpeq p2.h, p1/z, z0.h, z1.h
; CHECK-NEXT: mov z0.h, p0/z, #1 // =0x1
; CHECK-NEXT: mov z0.h, p2/m, w8
; CHECK-NEXT: and z0.h, z0.h, #0x1
; CHECK-NEXT: cmpne p0.h, p1/z, z0.h, #0
; CHECK-NEXT: ret
%res = insertelement <vscale x 8 x i1> %val, i1 1, i32 %idx
ret <vscale x 8 x i1> %res
}
define <vscale x 16 x i1> @test_predicate_insert_16xi1_immediate (<vscale x 16 x i1> %val) {
; CHECK-LABEL: test_predicate_insert_16xi1_immediate:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: mov w8, #4 // =0x4
; CHECK-NEXT: index z0.b, #0, #1
; CHECK-NEXT: mov z1.b, w8
; CHECK-NEXT: mov w8, wzr
; CHECK-NEXT: cmpeq p2.b, p1/z, z0.b, z1.b
; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1
; CHECK-NEXT: mov z0.b, p2/m, w8
; CHECK-NEXT: and z0.b, z0.b, #0x1
; CHECK-NEXT: cmpne p0.b, p1/z, z0.b, #0
; CHECK-NEXT: ret
%res = insertelement <vscale x 16 x i1> %val, i1 0, i32 4
ret <vscale x 16 x i1> %res
}
define <vscale x 2 x i1> @test_predicate_insert_2xi1(<vscale x 2 x i1> %val, i1 %elt, i32 %idx) {
; CHECK-LABEL: test_predicate_insert_2xi1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: mov w8, w1
; CHECK-NEXT: index z0.d, #0, #1
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: cmpeq p2.d, p1/z, z0.d, z1.d
; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1
; CHECK-NEXT: mov z0.d, p2/m, x0
; CHECK-NEXT: and z0.d, z0.d, #0x1
; CHECK-NEXT: cmpne p0.d, p1/z, z0.d, #0
; CHECK-NEXT: ret
%res = insertelement <vscale x 2 x i1> %val, i1 %elt, i32 %idx
ret <vscale x 2 x i1> %res
}
define <vscale x 4 x i1> @test_predicate_insert_4xi1(<vscale x 4 x i1> %val, i1 %elt, i32 %idx) {
; CHECK-LABEL: test_predicate_insert_4xi1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov w8, w1
; CHECK-NEXT: index z0.s, #0, #1
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: cmpeq p2.s, p1/z, z0.s, z1.s
; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1
; CHECK-NEXT: mov z0.s, p2/m, w0
; CHECK-NEXT: and z0.s, z0.s, #0x1
; CHECK-NEXT: cmpne p0.s, p1/z, z0.s, #0
; CHECK-NEXT: ret
%res = insertelement <vscale x 4 x i1> %val, i1 %elt, i32 %idx
ret <vscale x 4 x i1> %res
}
define <vscale x 8 x i1> @test_predicate_insert_8xi1(<vscale x 8 x i1> %val, i1 %elt, i32 %idx) {
; CHECK-LABEL: test_predicate_insert_8xi1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: mov w8, w1
; CHECK-NEXT: index z0.h, #0, #1
; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: cmpeq p2.h, p1/z, z0.h, z1.h
; CHECK-NEXT: mov z0.h, p0/z, #1 // =0x1
; CHECK-NEXT: mov z0.h, p2/m, w0
; CHECK-NEXT: and z0.h, z0.h, #0x1
; CHECK-NEXT: cmpne p0.h, p1/z, z0.h, #0
; CHECK-NEXT: ret
%res = insertelement <vscale x 8 x i1> %val, i1 %elt, i32 %idx
ret <vscale x 8 x i1> %res
}
define <vscale x 16 x i1> @test_predicate_insert_16xi1(<vscale x 16 x i1> %val, i1 %elt, i32 %idx) {
; CHECK-LABEL: test_predicate_insert_16xi1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: mov w8, w1
; CHECK-NEXT: index z0.b, #0, #1
; CHECK-NEXT: mov z1.b, w8
; CHECK-NEXT: cmpeq p2.b, p1/z, z0.b, z1.b
; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1
; CHECK-NEXT: mov z0.b, p2/m, w0
; CHECK-NEXT: and z0.b, z0.b, #0x1
; CHECK-NEXT: cmpne p0.b, p1/z, z0.b, #0
; CHECK-NEXT: ret
%res = insertelement <vscale x 16 x i1> %val, i1 %elt, i32 %idx
ret <vscale x 16 x i1> %res
}
define <vscale x 32 x i1> @test_predicate_insert_32xi1(<vscale x 32 x i1> %val, i1 %elt, i32 %idx) uwtable {
; CHECK-LABEL: test_predicate_insert_32xi1:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: ptrue p2.b
; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1
; CHECK-NEXT: mov z1.b, p0/z, #1 // =0x1
; CHECK-NEXT: addvl x8, x8, #2
; CHECK-NEXT: mov w9, w1
; CHECK-NEXT: cmp x9, x8
; CHECK-NEXT: csel x8, x9, x8, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: st1b { z0.b }, p2, [sp, #1, mul vl]
; CHECK-NEXT: st1b { z1.b }, p2, [sp]
; CHECK-NEXT: strb w0, [x9, x8]
; CHECK-NEXT: ld1b { z0.b }, p2/z, [sp]
; CHECK-NEXT: ld1b { z1.b }, p2/z, [sp, #1, mul vl]
; CHECK-NEXT: and z0.b, z0.b, #0x1
; CHECK-NEXT: and z1.b, z1.b, #0x1
; CHECK-NEXT: cmpne p0.b, p2/z, z0.b, #0
; CHECK-NEXT: cmpne p1.b, p2/z, z1.b, #0
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: .cfi_def_cfa wsp, 16
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w29
; CHECK-NEXT: ret
%res = insertelement <vscale x 32 x i1> %val, i1 %elt, i32 %idx
ret <vscale x 32 x i1> %res
}