Files
clang-p2996/llvm/test/CodeGen/AArch64/predicated-add-sub.ll
Harvin Iriawan db158c7c83 [AArch64] Update generic sched model to A510
Refresh of the generic scheduling model to use A510 instead of A55.
  Main benefits are to the little core, and introducing SVE scheduling information.
  Changes tested on various OoO cores, no performance degradation is seen.

  Differential Revision: https://reviews.llvm.org/D156799
2023-08-21 12:25:15 +01:00

451 lines
16 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s | FileCheck %s
target triple = "aarch64-unknown-linux"
define <vscale x 8 x i8> @zext.add.8xi8(<vscale x 8 x i8> %a, <vscale x 8 x i1> %v) #0 {
; CHECK-LABEL: zext.add.8xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.h, #1 // =0x1
; CHECK-NEXT: add z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
%extend = zext <vscale x 8 x i1> %v to <vscale x 8 x i8>
%result = add <vscale x 8 x i8> %a, %extend
ret <vscale x 8 x i8> %result
}
define <vscale x 4 x i16> @zext.add.4xi16(<vscale x 4 x i16> %a, <vscale x 4 x i1> %v) #0 {
; CHECK-LABEL: zext.add.4xi16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.s, #1 // =0x1
; CHECK-NEXT: add z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%extend = zext <vscale x 4 x i1> %v to <vscale x 4 x i16>
%result = add <vscale x 4 x i16> %a, %extend
ret <vscale x 4 x i16> %result
}
define <vscale x 2 x i32> @zext.add.2xi32(<vscale x 2 x i32> %a, <vscale x 2 x i1> %v) #0 {
; CHECK-LABEL: zext.add.2xi32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.d, #1 // =0x1
; CHECK-NEXT: add z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%extend = zext <vscale x 2 x i1> %v to <vscale x 2 x i32>
%result = add <vscale x 2 x i32> %a, %extend
ret <vscale x 2 x i32> %result
}
define <vscale x 16 x i8> @zext.add.16xi8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %v) #0 {
; CHECK-LABEL: zext.add.16xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.b, #1 // =0x1
; CHECK-NEXT: add z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: ret
%extend = zext <vscale x 16 x i1> %v to <vscale x 16 x i8>
%result = add <vscale x 16 x i8> %a, %extend
ret <vscale x 16 x i8> %result
}
define <vscale x 8 x i16> @zext.add.8xi16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %v) #0 {
; CHECK-LABEL: zext.add.8xi16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.h, #1 // =0x1
; CHECK-NEXT: add z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
%extend = zext <vscale x 8 x i1> %v to <vscale x 8 x i16>
%result = add <vscale x 8 x i16> %a, %extend
ret <vscale x 8 x i16> %result
}
define <vscale x 4 x i32> @zext.add.4xi32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %v) #0 {
; CHECK-LABEL: zext.add.4xi32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.s, #1 // =0x1
; CHECK-NEXT: add z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%extend = zext <vscale x 4 x i1> %v to <vscale x 4 x i32>
%result = add <vscale x 4 x i32> %a, %extend
ret <vscale x 4 x i32> %result
}
define <vscale x 2 x i64> @zext.add.2xi64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %v) #0 {
; CHECK-LABEL: zext.add.2xi64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.d, #1 // =0x1
; CHECK-NEXT: add z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%extend = zext <vscale x 2 x i1> %v to <vscale x 2 x i64>
%result = add <vscale x 2 x i64> %a, %extend
ret <vscale x 2 x i64> %result
}
define <vscale x 8 x i32> @zext.add.8xi32(<vscale x 8 x i32> %a, <vscale x 8 x i1> %v) #0 {
; CHECK-LABEL: zext.add.8xi32:
; CHECK: // %bb.0:
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: mov z2.s, #1 // =0x1
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: add z1.s, p1/m, z1.s, z2.s
; CHECK-NEXT: add z0.s, p0/m, z0.s, z2.s
; CHECK-NEXT: ret
%extend = zext <vscale x 8 x i1> %v to <vscale x 8 x i32>
%result = add <vscale x 8 x i32> %a, %extend
ret <vscale x 8 x i32> %result
}
define <vscale x 16 x i32> @zext.add.16xi32(<vscale x 16 x i32> %a, <vscale x 16 x i1> %v) #0 {
; CHECK-LABEL: zext.add.16xi32:
; CHECK: // %bb.0:
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: mov z4.s, #1 // =0x1
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: punpkhi p2.h, p1.b
; CHECK-NEXT: punpklo p1.h, p1.b
; CHECK-NEXT: punpklo p3.h, p0.b
; CHECK-NEXT: punpkhi p0.h, p0.b
; CHECK-NEXT: add z3.s, p2/m, z3.s, z4.s
; CHECK-NEXT: add z2.s, p1/m, z2.s, z4.s
; CHECK-NEXT: add z0.s, p3/m, z0.s, z4.s
; CHECK-NEXT: add z1.s, p0/m, z1.s, z4.s
; CHECK-NEXT: ret
%extend = zext <vscale x 16 x i1> %v to <vscale x 16 x i32>
%result = add <vscale x 16 x i32> %a, %extend
ret <vscale x 16 x i32> %result
}
define <vscale x 8 x i8> @zext.sub.8xi8(<vscale x 8 x i8> %a, <vscale x 8 x i1> %v) #0 {
; CHECK-LABEL: zext.sub.8xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff
; CHECK-NEXT: add z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
%extend = zext <vscale x 8 x i1> %v to <vscale x 8 x i8>
%result = sub <vscale x 8 x i8> %a, %extend
ret <vscale x 8 x i8> %result
}
define <vscale x 4 x i16> @zext.sub.4xi16(<vscale x 4 x i16> %a, <vscale x 4 x i1> %v) #0 {
; CHECK-LABEL: zext.sub.4xi16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff
; CHECK-NEXT: add z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%extend = zext <vscale x 4 x i1> %v to <vscale x 4 x i16>
%result = sub <vscale x 4 x i16> %a, %extend
ret <vscale x 4 x i16> %result
}
define <vscale x 2 x i32> @zext.sub.2xi32(<vscale x 2 x i32> %a, <vscale x 2 x i1> %v) #0 {
; CHECK-LABEL: zext.sub.2xi32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.d, #-1 // =0xffffffffffffffff
; CHECK-NEXT: add z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%extend = zext <vscale x 2 x i1> %v to <vscale x 2 x i32>
%result = sub <vscale x 2 x i32> %a, %extend
ret <vscale x 2 x i32> %result
}
define <vscale x 16 x i8> @zext.sub.16xi8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %v) #0 {
; CHECK-LABEL: zext.sub.16xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.b, #-1 // =0xffffffffffffffff
; CHECK-NEXT: add z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: ret
%extend = zext <vscale x 16 x i1> %v to <vscale x 16 x i8>
%result = sub <vscale x 16 x i8> %a, %extend
ret <vscale x 16 x i8> %result
}
define <vscale x 8 x i16> @zext.sub.8xi16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %v) #0 {
; CHECK-LABEL: zext.sub.8xi16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff
; CHECK-NEXT: add z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
%extend = zext <vscale x 8 x i1> %v to <vscale x 8 x i16>
%result = sub <vscale x 8 x i16> %a, %extend
ret <vscale x 8 x i16> %result
}
define <vscale x 4 x i32> @zext.sub.4xi32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %v) #0 {
; CHECK-LABEL: zext.sub.4xi32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff
; CHECK-NEXT: add z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%extend = zext <vscale x 4 x i1> %v to <vscale x 4 x i32>
%result = sub <vscale x 4 x i32> %a, %extend
ret <vscale x 4 x i32> %result
}
define <vscale x 2 x i64> @zext.sub.2xi64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %v) #0 {
; CHECK-LABEL: zext.sub.2xi64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.d, #-1 // =0xffffffffffffffff
; CHECK-NEXT: add z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%extend = zext <vscale x 2 x i1> %v to <vscale x 2 x i64>
%result = sub <vscale x 2 x i64> %a, %extend
ret <vscale x 2 x i64> %result
}
define <vscale x 8 x i32> @zext.sub.8xi32(<vscale x 8 x i32> %a, <vscale x 8 x i1> %v) #0 {
; CHECK-LABEL: zext.sub.8xi32:
; CHECK: // %bb.0:
; CHECK-NEXT: punpklo p1.h, p0.b
; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
; CHECK-NEXT: punpkhi p0.h, p0.b
; CHECK-NEXT: add z0.s, p1/m, z0.s, z2.s
; CHECK-NEXT: add z1.s, p0/m, z1.s, z2.s
; CHECK-NEXT: ret
%extend = zext <vscale x 8 x i1> %v to <vscale x 8 x i32>
%result = sub <vscale x 8 x i32> %a, %extend
ret <vscale x 8 x i32> %result
}
define <vscale x 16 x i32> @zext.sub.16xi32(<vscale x 16 x i32> %a, <vscale x 16 x i1> %v) #0 {
; CHECK-LABEL: zext.sub.16xi32:
; CHECK: // %bb.0:
; CHECK-NEXT: punpklo p1.h, p0.b
; CHECK-NEXT: mov z4.s, #-1 // =0xffffffffffffffff
; CHECK-NEXT: punpkhi p0.h, p0.b
; CHECK-NEXT: punpklo p2.h, p1.b
; CHECK-NEXT: punpkhi p1.h, p1.b
; CHECK-NEXT: punpklo p3.h, p0.b
; CHECK-NEXT: punpkhi p0.h, p0.b
; CHECK-NEXT: add z0.s, p2/m, z0.s, z4.s
; CHECK-NEXT: add z1.s, p1/m, z1.s, z4.s
; CHECK-NEXT: add z2.s, p3/m, z2.s, z4.s
; CHECK-NEXT: add z3.s, p0/m, z3.s, z4.s
; CHECK-NEXT: ret
%extend = zext <vscale x 16 x i1> %v to <vscale x 16 x i32>
%result = sub <vscale x 16 x i32> %a, %extend
ret <vscale x 16 x i32> %result
}
define <vscale x 8 x i8> @sext.add.8xi8(<vscale x 8 x i8> %a, <vscale x 8 x i1> %v) #0 {
; CHECK-LABEL: sext.add.8xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff
; CHECK-NEXT: add z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
%extend = sext <vscale x 8 x i1> %v to <vscale x 8 x i8>
%result = add <vscale x 8 x i8> %a, %extend
ret <vscale x 8 x i8> %result
}
define <vscale x 4 x i16> @sext.add.4xi16(<vscale x 4 x i16> %a, <vscale x 4 x i1> %v) #0 {
; CHECK-LABEL: sext.add.4xi16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff
; CHECK-NEXT: add z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%extend = sext <vscale x 4 x i1> %v to <vscale x 4 x i16>
%result = add <vscale x 4 x i16> %a, %extend
ret <vscale x 4 x i16> %result
}
define <vscale x 2 x i32> @sext.add.2xi32(<vscale x 2 x i32> %a, <vscale x 2 x i1> %v) #0 {
; CHECK-LABEL: sext.add.2xi32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.d, #-1 // =0xffffffffffffffff
; CHECK-NEXT: add z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%extend = sext <vscale x 2 x i1> %v to <vscale x 2 x i32>
%result = add <vscale x 2 x i32> %a, %extend
ret <vscale x 2 x i32> %result
}
define <vscale x 16 x i8> @sext.add.16xi8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %v) #0 {
; CHECK-LABEL: sext.add.16xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.b, #-1 // =0xffffffffffffffff
; CHECK-NEXT: add z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: ret
%extend = sext <vscale x 16 x i1> %v to <vscale x 16 x i8>
%result = add <vscale x 16 x i8> %a, %extend
ret <vscale x 16 x i8> %result
}
define <vscale x 8 x i16> @sext.add.8xi16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %v) #0 {
; CHECK-LABEL: sext.add.8xi16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff
; CHECK-NEXT: add z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
%extend = sext <vscale x 8 x i1> %v to <vscale x 8 x i16>
%result = add <vscale x 8 x i16> %a, %extend
ret <vscale x 8 x i16> %result
}
define <vscale x 4 x i32> @sext.add.4xi32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %v) #0 {
; CHECK-LABEL: sext.add.4xi32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff
; CHECK-NEXT: add z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%extend = sext <vscale x 4 x i1> %v to <vscale x 4 x i32>
%result = add <vscale x 4 x i32> %a, %extend
ret <vscale x 4 x i32> %result
}
define <vscale x 2 x i64> @sext.add.2xi64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %v) #0 {
; CHECK-LABEL: sext.add.2xi64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.d, #-1 // =0xffffffffffffffff
; CHECK-NEXT: add z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%extend = sext <vscale x 2 x i1> %v to <vscale x 2 x i64>
%result = add <vscale x 2 x i64> %a, %extend
ret <vscale x 2 x i64> %result
}
define <vscale x 8 x i32> @sext.add.8xi32(<vscale x 8 x i32> %a, <vscale x 8 x i1> %v) #0 {
; CHECK-LABEL: sext.add.8xi32:
; CHECK: // %bb.0:
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: add z1.s, p1/m, z1.s, z2.s
; CHECK-NEXT: add z0.s, p0/m, z0.s, z2.s
; CHECK-NEXT: ret
%extend = sext <vscale x 8 x i1> %v to <vscale x 8 x i32>
%result = add <vscale x 8 x i32> %a, %extend
ret <vscale x 8 x i32> %result
}
define <vscale x 16 x i32> @sext.add.16xi32(<vscale x 16 x i32> %a, <vscale x 16 x i1> %v) #0 {
; CHECK-LABEL: sext.add.16xi32:
; CHECK: // %bb.0:
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: mov z4.s, #-1 // =0xffffffffffffffff
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: punpkhi p2.h, p1.b
; CHECK-NEXT: punpklo p1.h, p1.b
; CHECK-NEXT: punpklo p3.h, p0.b
; CHECK-NEXT: punpkhi p0.h, p0.b
; CHECK-NEXT: add z3.s, p2/m, z3.s, z4.s
; CHECK-NEXT: add z2.s, p1/m, z2.s, z4.s
; CHECK-NEXT: add z0.s, p3/m, z0.s, z4.s
; CHECK-NEXT: add z1.s, p0/m, z1.s, z4.s
; CHECK-NEXT: ret
%extend = sext <vscale x 16 x i1> %v to <vscale x 16 x i32>
%result = add <vscale x 16 x i32> %a, %extend
ret <vscale x 16 x i32> %result
}
define <vscale x 8 x i8> @sext.sub.8xi8(<vscale x 8 x i8> %a, <vscale x 8 x i1> %v) #0 {
; CHECK-LABEL: sext.sub.8xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff
; CHECK-NEXT: sub z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
%extend = sext <vscale x 8 x i1> %v to <vscale x 8 x i8>
%result = sub <vscale x 8 x i8> %a, %extend
ret <vscale x 8 x i8> %result
}
define <vscale x 4 x i16> @sext.sub.4xi16(<vscale x 4 x i16> %a, <vscale x 4 x i1> %v) #0 {
; CHECK-LABEL: sext.sub.4xi16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff
; CHECK-NEXT: sub z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%extend = sext <vscale x 4 x i1> %v to <vscale x 4 x i16>
%result = sub <vscale x 4 x i16> %a, %extend
ret <vscale x 4 x i16> %result
}
define <vscale x 2 x i32> @sext.sub.2xi32(<vscale x 2 x i32> %a, <vscale x 2 x i1> %v) #0 {
; CHECK-LABEL: sext.sub.2xi32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.d, #-1 // =0xffffffffffffffff
; CHECK-NEXT: sub z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%extend = sext <vscale x 2 x i1> %v to <vscale x 2 x i32>
%result = sub <vscale x 2 x i32> %a, %extend
ret <vscale x 2 x i32> %result
}
define <vscale x 16 x i8> @sext.sub.16xi8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %v) #0 {
; CHECK-LABEL: sext.sub.16xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.b, #-1 // =0xffffffffffffffff
; CHECK-NEXT: sub z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: ret
%extend = sext <vscale x 16 x i1> %v to <vscale x 16 x i8>
%result = sub <vscale x 16 x i8> %a, %extend
ret <vscale x 16 x i8> %result
}
define <vscale x 8 x i16> @sext.sub.8xi16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %v) #0 {
; CHECK-LABEL: sext.sub.8xi16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff
; CHECK-NEXT: sub z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
%extend = sext <vscale x 8 x i1> %v to <vscale x 8 x i16>
%result = sub <vscale x 8 x i16> %a, %extend
ret <vscale x 8 x i16> %result
}
define <vscale x 4 x i32> @sext.sub.4xi32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %v) #0 {
; CHECK-LABEL: sext.sub.4xi32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff
; CHECK-NEXT: sub z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%extend = sext <vscale x 4 x i1> %v to <vscale x 4 x i32>
%result = sub <vscale x 4 x i32> %a, %extend
ret <vscale x 4 x i32> %result
}
define <vscale x 2 x i64> @sext.sub.2xi64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %v) #0 {
; CHECK-LABEL: sext.sub.2xi64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.d, #-1 // =0xffffffffffffffff
; CHECK-NEXT: sub z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%extend = sext <vscale x 2 x i1> %v to <vscale x 2 x i64>
%result = sub <vscale x 2 x i64> %a, %extend
ret <vscale x 2 x i64> %result
}
define <vscale x 8 x i32> @sext.sub.8xi32(<vscale x 8 x i32> %a, <vscale x 8 x i1> %v) #0 {
; CHECK-LABEL: sext.sub.8xi32:
; CHECK: // %bb.0:
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: sub z1.s, p1/m, z1.s, z2.s
; CHECK-NEXT: sub z0.s, p0/m, z0.s, z2.s
; CHECK-NEXT: ret
%extend = sext <vscale x 8 x i1> %v to <vscale x 8 x i32>
%result = sub <vscale x 8 x i32> %a, %extend
ret <vscale x 8 x i32> %result
}
define <vscale x 16 x i32> @sext.sub.16xi32(<vscale x 16 x i32> %a, <vscale x 16 x i1> %v) #0 {
; CHECK-LABEL: sext.sub.16xi32:
; CHECK: // %bb.0:
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: mov z4.s, #-1 // =0xffffffffffffffff
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: punpkhi p2.h, p1.b
; CHECK-NEXT: punpklo p1.h, p1.b
; CHECK-NEXT: punpklo p3.h, p0.b
; CHECK-NEXT: punpkhi p0.h, p0.b
; CHECK-NEXT: sub z3.s, p2/m, z3.s, z4.s
; CHECK-NEXT: sub z2.s, p1/m, z2.s, z4.s
; CHECK-NEXT: sub z0.s, p3/m, z0.s, z4.s
; CHECK-NEXT: sub z1.s, p0/m, z1.s, z4.s
; CHECK-NEXT: ret
%extend = sext <vscale x 16 x i1> %v to <vscale x 16 x i32>
%result = sub <vscale x 16 x i32> %a, %extend
ret <vscale x 16 x i32> %result
}
attributes #0 = { "target-features"="+sve" }