This was previously reverted due to a hang on a Hexagon bot. This turned out to be a bug in the Hexagon backend around how splat_vectors are legalized (which they're using for fixed length vectors!). I adjusted this patch to remove the implicit truncate support. This hides the hexagon bug for now, and unblocks the rest of the change. Original commit message: This is the SelectionDAG equivalent of D136470, and is thus an alternate patch to D128159. The basic idea here is that we track a single lane for scalable vectors which corresponds to an unknown number of lanes at runtime. This is enough for us to perform lane wise reasoning on many arithmetic operations. This patch also includes an implementation for SPLAT_VECTOR as without it, the lane wise reasoning has no base case. The original patch which inspired this (D128159), also included STEP_VECTOR. I plan to do that as a separate patch. Differential Revision: https://reviews.llvm.org/D137140
448 lines
20 KiB
LLVM
448 lines
20 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -verify-machineinstrs < %s | FileCheck %s
|
|
|
|
declare { <vscale x 2 x i8>, <vscale x 2 x i1> } @llvm.umul.with.overflow.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>)
|
|
|
|
define <vscale x 2 x i8> @umulo_nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %y) {
|
|
; CHECK-LABEL: umulo_nxv2i8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: and z1.d, z1.d, #0xff
|
|
; CHECK-NEXT: and z0.d, z0.d, #0xff
|
|
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
|
|
; CHECK-NEXT: lsr z1.d, z0.d, #8
|
|
; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
|
|
; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0
|
|
; CHECK-NEXT: ret
|
|
%a = call { <vscale x 2 x i8>, <vscale x 2 x i1> } @llvm.umul.with.overflow.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %y)
|
|
%b = extractvalue { <vscale x 2 x i8>, <vscale x 2 x i1> } %a, 0
|
|
%c = extractvalue { <vscale x 2 x i8>, <vscale x 2 x i1> } %a, 1
|
|
%d = select <vscale x 2 x i1> %c, <vscale x 2 x i8> zeroinitializer, <vscale x 2 x i8> %b
|
|
ret <vscale x 2 x i8> %d
|
|
}
|
|
|
|
declare { <vscale x 4 x i8>, <vscale x 4 x i1> } @llvm.umul.with.overflow.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
|
|
|
|
define <vscale x 4 x i8> @umulo_nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %y) {
|
|
; CHECK-LABEL: umulo_nxv4i8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: and z1.s, z1.s, #0xff
|
|
; CHECK-NEXT: and z0.s, z0.s, #0xff
|
|
; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
|
|
; CHECK-NEXT: lsr z1.s, z0.s, #8
|
|
; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
|
|
; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0
|
|
; CHECK-NEXT: ret
|
|
%a = call { <vscale x 4 x i8>, <vscale x 4 x i1> } @llvm.umul.with.overflow.nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %y)
|
|
%b = extractvalue { <vscale x 4 x i8>, <vscale x 4 x i1> } %a, 0
|
|
%c = extractvalue { <vscale x 4 x i8>, <vscale x 4 x i1> } %a, 1
|
|
%d = select <vscale x 4 x i1> %c, <vscale x 4 x i8> zeroinitializer, <vscale x 4 x i8> %b
|
|
ret <vscale x 4 x i8> %d
|
|
}
|
|
|
|
declare { <vscale x 8 x i8>, <vscale x 8 x i1> } @llvm.umul.with.overflow.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
|
|
|
|
define <vscale x 8 x i8> @umulo_nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %y) {
|
|
; CHECK-LABEL: umulo_nxv8i8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h
|
|
; CHECK-NEXT: and z1.h, z1.h, #0xff
|
|
; CHECK-NEXT: and z0.h, z0.h, #0xff
|
|
; CHECK-NEXT: movprfx z2, z0
|
|
; CHECK-NEXT: mul z2.h, p0/m, z2.h, z1.h
|
|
; CHECK-NEXT: umulh z0.h, p0/m, z0.h, z1.h
|
|
; CHECK-NEXT: lsr z1.h, z2.h, #8
|
|
; CHECK-NEXT: cmpne p1.h, p0/z, z0.h, #0
|
|
; CHECK-NEXT: cmpne p0.h, p0/z, z1.h, #0
|
|
; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b
|
|
; CHECK-NEXT: mov z2.h, p0/m, #0 // =0x0
|
|
; CHECK-NEXT: mov z0.d, z2.d
|
|
; CHECK-NEXT: ret
|
|
%a = call { <vscale x 8 x i8>, <vscale x 8 x i1> } @llvm.umul.with.overflow.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %y)
|
|
%b = extractvalue { <vscale x 8 x i8>, <vscale x 8 x i1> } %a, 0
|
|
%c = extractvalue { <vscale x 8 x i8>, <vscale x 8 x i1> } %a, 1
|
|
%d = select <vscale x 8 x i1> %c, <vscale x 8 x i8> zeroinitializer, <vscale x 8 x i8> %b
|
|
ret <vscale x 8 x i8> %d
|
|
}
|
|
|
|
declare { <vscale x 16 x i8>, <vscale x 16 x i1> } @llvm.umul.with.overflow.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
|
|
|
|
define <vscale x 16 x i8> @umulo_nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %y) {
|
|
; CHECK-LABEL: umulo_nxv16i8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.b
|
|
; CHECK-NEXT: movprfx z2, z0
|
|
; CHECK-NEXT: umulh z2.b, p0/m, z2.b, z1.b
|
|
; CHECK-NEXT: mul z0.b, p0/m, z0.b, z1.b
|
|
; CHECK-NEXT: cmpne p0.b, p0/z, z2.b, #0
|
|
; CHECK-NEXT: mov z0.b, p0/m, #0 // =0x0
|
|
; CHECK-NEXT: ret
|
|
%a = call { <vscale x 16 x i8>, <vscale x 16 x i1> } @llvm.umul.with.overflow.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %y)
|
|
%b = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i1> } %a, 0
|
|
%c = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i1> } %a, 1
|
|
%d = select <vscale x 16 x i1> %c, <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> %b
|
|
ret <vscale x 16 x i8> %d
|
|
}
|
|
|
|
declare { <vscale x 32 x i8>, <vscale x 32 x i1> } @llvm.umul.with.overflow.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i8>)
|
|
|
|
define <vscale x 32 x i8> @umulo_nxv32i8(<vscale x 32 x i8> %x, <vscale x 32 x i8> %y) {
|
|
; CHECK-LABEL: umulo_nxv32i8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.b
|
|
; CHECK-NEXT: movprfx z4, z1
|
|
; CHECK-NEXT: mul z4.b, p0/m, z4.b, z3.b
|
|
; CHECK-NEXT: umulh z1.b, p0/m, z1.b, z3.b
|
|
; CHECK-NEXT: movprfx z3, z0
|
|
; CHECK-NEXT: umulh z3.b, p0/m, z3.b, z2.b
|
|
; CHECK-NEXT: cmpne p1.b, p0/z, z1.b, #0
|
|
; CHECK-NEXT: mul z0.b, p0/m, z0.b, z2.b
|
|
; CHECK-NEXT: cmpne p0.b, p0/z, z3.b, #0
|
|
; CHECK-NEXT: mov z4.b, p1/m, #0 // =0x0
|
|
; CHECK-NEXT: mov z0.b, p0/m, #0 // =0x0
|
|
; CHECK-NEXT: mov z1.d, z4.d
|
|
; CHECK-NEXT: ret
|
|
%a = call { <vscale x 32 x i8>, <vscale x 32 x i1> } @llvm.umul.with.overflow.nxv32i8(<vscale x 32 x i8> %x, <vscale x 32 x i8> %y)
|
|
%b = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i1> } %a, 0
|
|
%c = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i1> } %a, 1
|
|
%d = select <vscale x 32 x i1> %c, <vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> %b
|
|
ret <vscale x 32 x i8> %d
|
|
}
|
|
|
|
declare { <vscale x 64 x i8>, <vscale x 64 x i1> } @llvm.umul.with.overflow.nxv64i8(<vscale x 64 x i8>, <vscale x 64 x i8>)
|
|
|
|
define <vscale x 64 x i8> @umulo_nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i8> %y) {
|
|
; CHECK-LABEL: umulo_nxv64i8:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.b
|
|
; CHECK-NEXT: movprfx z24, z3
|
|
; CHECK-NEXT: mul z24.b, p0/m, z24.b, z7.b
|
|
; CHECK-NEXT: umulh z3.b, p0/m, z3.b, z7.b
|
|
; CHECK-NEXT: cmpne p1.b, p0/z, z3.b, #0
|
|
; CHECK-NEXT: movprfx z3, z2
|
|
; CHECK-NEXT: umulh z3.b, p0/m, z3.b, z6.b
|
|
; CHECK-NEXT: cmpne p2.b, p0/z, z3.b, #0
|
|
; CHECK-NEXT: movprfx z3, z1
|
|
; CHECK-NEXT: mul z3.b, p0/m, z3.b, z5.b
|
|
; CHECK-NEXT: umulh z1.b, p0/m, z1.b, z5.b
|
|
; CHECK-NEXT: mul z2.b, p0/m, z2.b, z6.b
|
|
; CHECK-NEXT: cmpne p3.b, p0/z, z1.b, #0
|
|
; CHECK-NEXT: movprfx z1, z0
|
|
; CHECK-NEXT: umulh z1.b, p0/m, z1.b, z4.b
|
|
; CHECK-NEXT: mul z0.b, p0/m, z0.b, z4.b
|
|
; CHECK-NEXT: cmpne p0.b, p0/z, z1.b, #0
|
|
; CHECK-NEXT: mov z3.b, p3/m, #0 // =0x0
|
|
; CHECK-NEXT: mov z24.b, p1/m, #0 // =0x0
|
|
; CHECK-NEXT: mov z0.b, p0/m, #0 // =0x0
|
|
; CHECK-NEXT: mov z2.b, p2/m, #0 // =0x0
|
|
; CHECK-NEXT: mov z1.d, z3.d
|
|
; CHECK-NEXT: mov z3.d, z24.d
|
|
; CHECK-NEXT: ret
|
|
%a = call { <vscale x 64 x i8>, <vscale x 64 x i1> } @llvm.umul.with.overflow.nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i8> %y)
|
|
%b = extractvalue { <vscale x 64 x i8>, <vscale x 64 x i1> } %a, 0
|
|
%c = extractvalue { <vscale x 64 x i8>, <vscale x 64 x i1> } %a, 1
|
|
%d = select <vscale x 64 x i1> %c, <vscale x 64 x i8> zeroinitializer, <vscale x 64 x i8> %b
|
|
ret <vscale x 64 x i8> %d
|
|
}
|
|
|
|
declare { <vscale x 2 x i16>, <vscale x 2 x i1> } @llvm.umul.with.overflow.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
|
|
|
|
define <vscale x 2 x i16> @umulo_nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i16> %y) {
|
|
; CHECK-LABEL: umulo_nxv2i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: and z1.d, z1.d, #0xffff
|
|
; CHECK-NEXT: and z0.d, z0.d, #0xffff
|
|
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
|
|
; CHECK-NEXT: lsr z1.d, z0.d, #16
|
|
; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
|
|
; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0
|
|
; CHECK-NEXT: ret
|
|
%a = call { <vscale x 2 x i16>, <vscale x 2 x i1> } @llvm.umul.with.overflow.nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i16> %y)
|
|
%b = extractvalue { <vscale x 2 x i16>, <vscale x 2 x i1> } %a, 0
|
|
%c = extractvalue { <vscale x 2 x i16>, <vscale x 2 x i1> } %a, 1
|
|
%d = select <vscale x 2 x i1> %c, <vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> %b
|
|
ret <vscale x 2 x i16> %d
|
|
}
|
|
|
|
declare { <vscale x 4 x i16>, <vscale x 4 x i1> } @llvm.umul.with.overflow.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
|
|
|
|
define <vscale x 4 x i16> @umulo_nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i16> %y) {
|
|
; CHECK-LABEL: umulo_nxv4i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: and z1.s, z1.s, #0xffff
|
|
; CHECK-NEXT: and z0.s, z0.s, #0xffff
|
|
; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
|
|
; CHECK-NEXT: lsr z1.s, z0.s, #16
|
|
; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
|
|
; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0
|
|
; CHECK-NEXT: ret
|
|
%a = call { <vscale x 4 x i16>, <vscale x 4 x i1> } @llvm.umul.with.overflow.nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i16> %y)
|
|
%b = extractvalue { <vscale x 4 x i16>, <vscale x 4 x i1> } %a, 0
|
|
%c = extractvalue { <vscale x 4 x i16>, <vscale x 4 x i1> } %a, 1
|
|
%d = select <vscale x 4 x i1> %c, <vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> %b
|
|
ret <vscale x 4 x i16> %d
|
|
}
|
|
|
|
declare { <vscale x 8 x i16>, <vscale x 8 x i1> } @llvm.umul.with.overflow.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
|
|
|
|
define <vscale x 8 x i16> @umulo_nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %y) {
|
|
; CHECK-LABEL: umulo_nxv8i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h
|
|
; CHECK-NEXT: movprfx z2, z0
|
|
; CHECK-NEXT: umulh z2.h, p0/m, z2.h, z1.h
|
|
; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h
|
|
; CHECK-NEXT: cmpne p0.h, p0/z, z2.h, #0
|
|
; CHECK-NEXT: mov z0.h, p0/m, #0 // =0x0
|
|
; CHECK-NEXT: ret
|
|
%a = call { <vscale x 8 x i16>, <vscale x 8 x i1> } @llvm.umul.with.overflow.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %y)
|
|
%b = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i1> } %a, 0
|
|
%c = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i1> } %a, 1
|
|
%d = select <vscale x 8 x i1> %c, <vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> %b
|
|
ret <vscale x 8 x i16> %d
|
|
}
|
|
|
|
declare { <vscale x 16 x i16>, <vscale x 16 x i1> } @llvm.umul.with.overflow.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>)
|
|
|
|
define <vscale x 16 x i16> @umulo_nxv16i16(<vscale x 16 x i16> %x, <vscale x 16 x i16> %y) {
|
|
; CHECK-LABEL: umulo_nxv16i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h
|
|
; CHECK-NEXT: movprfx z4, z1
|
|
; CHECK-NEXT: mul z4.h, p0/m, z4.h, z3.h
|
|
; CHECK-NEXT: umulh z1.h, p0/m, z1.h, z3.h
|
|
; CHECK-NEXT: movprfx z3, z0
|
|
; CHECK-NEXT: umulh z3.h, p0/m, z3.h, z2.h
|
|
; CHECK-NEXT: cmpne p1.h, p0/z, z1.h, #0
|
|
; CHECK-NEXT: mul z0.h, p0/m, z0.h, z2.h
|
|
; CHECK-NEXT: cmpne p0.h, p0/z, z3.h, #0
|
|
; CHECK-NEXT: mov z4.h, p1/m, #0 // =0x0
|
|
; CHECK-NEXT: mov z0.h, p0/m, #0 // =0x0
|
|
; CHECK-NEXT: mov z1.d, z4.d
|
|
; CHECK-NEXT: ret
|
|
%a = call { <vscale x 16 x i16>, <vscale x 16 x i1> } @llvm.umul.with.overflow.nxv16i16(<vscale x 16 x i16> %x, <vscale x 16 x i16> %y)
|
|
%b = extractvalue { <vscale x 16 x i16>, <vscale x 16 x i1> } %a, 0
|
|
%c = extractvalue { <vscale x 16 x i16>, <vscale x 16 x i1> } %a, 1
|
|
%d = select <vscale x 16 x i1> %c, <vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> %b
|
|
ret <vscale x 16 x i16> %d
|
|
}
|
|
|
|
declare { <vscale x 32 x i16>, <vscale x 32 x i1> } @llvm.umul.with.overflow.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i16>)
|
|
|
|
define <vscale x 32 x i16> @umulo_nxv32i16(<vscale x 32 x i16> %x, <vscale x 32 x i16> %y) {
|
|
; CHECK-LABEL: umulo_nxv32i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h
|
|
; CHECK-NEXT: movprfx z24, z3
|
|
; CHECK-NEXT: mul z24.h, p0/m, z24.h, z7.h
|
|
; CHECK-NEXT: umulh z3.h, p0/m, z3.h, z7.h
|
|
; CHECK-NEXT: cmpne p1.h, p0/z, z3.h, #0
|
|
; CHECK-NEXT: movprfx z3, z2
|
|
; CHECK-NEXT: umulh z3.h, p0/m, z3.h, z6.h
|
|
; CHECK-NEXT: cmpne p2.h, p0/z, z3.h, #0
|
|
; CHECK-NEXT: movprfx z3, z1
|
|
; CHECK-NEXT: mul z3.h, p0/m, z3.h, z5.h
|
|
; CHECK-NEXT: umulh z1.h, p0/m, z1.h, z5.h
|
|
; CHECK-NEXT: mul z2.h, p0/m, z2.h, z6.h
|
|
; CHECK-NEXT: cmpne p3.h, p0/z, z1.h, #0
|
|
; CHECK-NEXT: movprfx z1, z0
|
|
; CHECK-NEXT: umulh z1.h, p0/m, z1.h, z4.h
|
|
; CHECK-NEXT: mul z0.h, p0/m, z0.h, z4.h
|
|
; CHECK-NEXT: cmpne p0.h, p0/z, z1.h, #0
|
|
; CHECK-NEXT: mov z3.h, p3/m, #0 // =0x0
|
|
; CHECK-NEXT: mov z24.h, p1/m, #0 // =0x0
|
|
; CHECK-NEXT: mov z0.h, p0/m, #0 // =0x0
|
|
; CHECK-NEXT: mov z2.h, p2/m, #0 // =0x0
|
|
; CHECK-NEXT: mov z1.d, z3.d
|
|
; CHECK-NEXT: mov z3.d, z24.d
|
|
; CHECK-NEXT: ret
|
|
%a = call { <vscale x 32 x i16>, <vscale x 32 x i1> } @llvm.umul.with.overflow.nxv32i16(<vscale x 32 x i16> %x, <vscale x 32 x i16> %y)
|
|
%b = extractvalue { <vscale x 32 x i16>, <vscale x 32 x i1> } %a, 0
|
|
%c = extractvalue { <vscale x 32 x i16>, <vscale x 32 x i1> } %a, 1
|
|
%d = select <vscale x 32 x i1> %c, <vscale x 32 x i16> zeroinitializer, <vscale x 32 x i16> %b
|
|
ret <vscale x 32 x i16> %d
|
|
}
|
|
|
|
declare { <vscale x 2 x i32>, <vscale x 2 x i1> } @llvm.umul.with.overflow.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
|
|
|
|
define <vscale x 2 x i32> @umulo_nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y) {
|
|
; CHECK-LABEL: umulo_nxv2i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
|
|
; CHECK-NEXT: and z0.d, z0.d, #0xffffffff
|
|
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
|
|
; CHECK-NEXT: lsr z1.d, z0.d, #32
|
|
; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
|
|
; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0
|
|
; CHECK-NEXT: ret
|
|
%a = call { <vscale x 2 x i32>, <vscale x 2 x i1> } @llvm.umul.with.overflow.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y)
|
|
%b = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i1> } %a, 0
|
|
%c = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i1> } %a, 1
|
|
%d = select <vscale x 2 x i1> %c, <vscale x 2 x i32> zeroinitializer, <vscale x 2 x i32> %b
|
|
ret <vscale x 2 x i32> %d
|
|
}
|
|
|
|
declare { <vscale x 4 x i32>, <vscale x 4 x i1> } @llvm.umul.with.overflow.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
|
|
|
|
define <vscale x 4 x i32> @umulo_nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y) {
|
|
; CHECK-LABEL: umulo_nxv4i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: movprfx z2, z0
|
|
; CHECK-NEXT: umulh z2.s, p0/m, z2.s, z1.s
|
|
; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
|
|
; CHECK-NEXT: cmpne p0.s, p0/z, z2.s, #0
|
|
; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0
|
|
; CHECK-NEXT: ret
|
|
%a = call { <vscale x 4 x i32>, <vscale x 4 x i1> } @llvm.umul.with.overflow.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y)
|
|
%b = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i1> } %a, 0
|
|
%c = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i1> } %a, 1
|
|
%d = select <vscale x 4 x i1> %c, <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> %b
|
|
ret <vscale x 4 x i32> %d
|
|
}
|
|
|
|
declare { <vscale x 8 x i32>, <vscale x 8 x i1> } @llvm.umul.with.overflow.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
|
|
|
|
define <vscale x 8 x i32> @umulo_nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %y) {
|
|
; CHECK-LABEL: umulo_nxv8i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: movprfx z4, z1
|
|
; CHECK-NEXT: mul z4.s, p0/m, z4.s, z3.s
|
|
; CHECK-NEXT: umulh z1.s, p0/m, z1.s, z3.s
|
|
; CHECK-NEXT: movprfx z3, z0
|
|
; CHECK-NEXT: umulh z3.s, p0/m, z3.s, z2.s
|
|
; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0
|
|
; CHECK-NEXT: mul z0.s, p0/m, z0.s, z2.s
|
|
; CHECK-NEXT: cmpne p0.s, p0/z, z3.s, #0
|
|
; CHECK-NEXT: mov z4.s, p1/m, #0 // =0x0
|
|
; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0
|
|
; CHECK-NEXT: mov z1.d, z4.d
|
|
; CHECK-NEXT: ret
|
|
%a = call { <vscale x 8 x i32>, <vscale x 8 x i1> } @llvm.umul.with.overflow.nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %y)
|
|
%b = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i1> } %a, 0
|
|
%c = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i1> } %a, 1
|
|
%d = select <vscale x 8 x i1> %c, <vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> %b
|
|
ret <vscale x 8 x i32> %d
|
|
}
|
|
|
|
declare { <vscale x 16 x i32>, <vscale x 16 x i1> } @llvm.umul.with.overflow.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>)
|
|
|
|
define <vscale x 16 x i32> @umulo_nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %y) {
|
|
; CHECK-LABEL: umulo_nxv16i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: movprfx z24, z3
|
|
; CHECK-NEXT: mul z24.s, p0/m, z24.s, z7.s
|
|
; CHECK-NEXT: umulh z3.s, p0/m, z3.s, z7.s
|
|
; CHECK-NEXT: cmpne p1.s, p0/z, z3.s, #0
|
|
; CHECK-NEXT: movprfx z3, z2
|
|
; CHECK-NEXT: umulh z3.s, p0/m, z3.s, z6.s
|
|
; CHECK-NEXT: cmpne p2.s, p0/z, z3.s, #0
|
|
; CHECK-NEXT: movprfx z3, z1
|
|
; CHECK-NEXT: mul z3.s, p0/m, z3.s, z5.s
|
|
; CHECK-NEXT: umulh z1.s, p0/m, z1.s, z5.s
|
|
; CHECK-NEXT: mul z2.s, p0/m, z2.s, z6.s
|
|
; CHECK-NEXT: cmpne p3.s, p0/z, z1.s, #0
|
|
; CHECK-NEXT: movprfx z1, z0
|
|
; CHECK-NEXT: umulh z1.s, p0/m, z1.s, z4.s
|
|
; CHECK-NEXT: mul z0.s, p0/m, z0.s, z4.s
|
|
; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
|
|
; CHECK-NEXT: mov z3.s, p3/m, #0 // =0x0
|
|
; CHECK-NEXT: mov z24.s, p1/m, #0 // =0x0
|
|
; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0
|
|
; CHECK-NEXT: mov z2.s, p2/m, #0 // =0x0
|
|
; CHECK-NEXT: mov z1.d, z3.d
|
|
; CHECK-NEXT: mov z3.d, z24.d
|
|
; CHECK-NEXT: ret
|
|
%a = call { <vscale x 16 x i32>, <vscale x 16 x i1> } @llvm.umul.with.overflow.nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %y)
|
|
%b = extractvalue { <vscale x 16 x i32>, <vscale x 16 x i1> } %a, 0
|
|
%c = extractvalue { <vscale x 16 x i32>, <vscale x 16 x i1> } %a, 1
|
|
%d = select <vscale x 16 x i1> %c, <vscale x 16 x i32> zeroinitializer, <vscale x 16 x i32> %b
|
|
ret <vscale x 16 x i32> %d
|
|
}
|
|
|
|
declare { <vscale x 2 x i64>, <vscale x 2 x i1> } @llvm.umul.with.overflow.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
|
|
|
|
define <vscale x 2 x i64> @umulo_nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) {
|
|
; CHECK-LABEL: umulo_nxv2i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: movprfx z2, z0
|
|
; CHECK-NEXT: umulh z2.d, p0/m, z2.d, z1.d
|
|
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
|
|
; CHECK-NEXT: cmpne p0.d, p0/z, z2.d, #0
|
|
; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0
|
|
; CHECK-NEXT: ret
|
|
%a = call { <vscale x 2 x i64>, <vscale x 2 x i1> } @llvm.umul.with.overflow.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y)
|
|
%b = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i1> } %a, 0
|
|
%c = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i1> } %a, 1
|
|
%d = select <vscale x 2 x i1> %c, <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> %b
|
|
ret <vscale x 2 x i64> %d
|
|
}
|
|
|
|
declare { <vscale x 4 x i64>, <vscale x 4 x i1> } @llvm.umul.with.overflow.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
|
|
|
|
define <vscale x 4 x i64> @umulo_nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %y) {
|
|
; CHECK-LABEL: umulo_nxv4i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: movprfx z4, z1
|
|
; CHECK-NEXT: mul z4.d, p0/m, z4.d, z3.d
|
|
; CHECK-NEXT: umulh z1.d, p0/m, z1.d, z3.d
|
|
; CHECK-NEXT: movprfx z3, z0
|
|
; CHECK-NEXT: umulh z3.d, p0/m, z3.d, z2.d
|
|
; CHECK-NEXT: cmpne p1.d, p0/z, z1.d, #0
|
|
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z2.d
|
|
; CHECK-NEXT: cmpne p0.d, p0/z, z3.d, #0
|
|
; CHECK-NEXT: mov z4.d, p1/m, #0 // =0x0
|
|
; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0
|
|
; CHECK-NEXT: mov z1.d, z4.d
|
|
; CHECK-NEXT: ret
|
|
%a = call { <vscale x 4 x i64>, <vscale x 4 x i1> } @llvm.umul.with.overflow.nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %y)
|
|
%b = extractvalue { <vscale x 4 x i64>, <vscale x 4 x i1> } %a, 0
|
|
%c = extractvalue { <vscale x 4 x i64>, <vscale x 4 x i1> } %a, 1
|
|
%d = select <vscale x 4 x i1> %c, <vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> %b
|
|
ret <vscale x 4 x i64> %d
|
|
}
|
|
|
|
declare { <vscale x 8 x i64>, <vscale x 8 x i1> } @llvm.umul.with.overflow.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>)
|
|
|
|
define <vscale x 8 x i64> @umulo_nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %y) {
|
|
; CHECK-LABEL: umulo_nxv8i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: movprfx z24, z3
|
|
; CHECK-NEXT: mul z24.d, p0/m, z24.d, z7.d
|
|
; CHECK-NEXT: umulh z3.d, p0/m, z3.d, z7.d
|
|
; CHECK-NEXT: cmpne p1.d, p0/z, z3.d, #0
|
|
; CHECK-NEXT: movprfx z3, z2
|
|
; CHECK-NEXT: umulh z3.d, p0/m, z3.d, z6.d
|
|
; CHECK-NEXT: cmpne p2.d, p0/z, z3.d, #0
|
|
; CHECK-NEXT: movprfx z3, z1
|
|
; CHECK-NEXT: mul z3.d, p0/m, z3.d, z5.d
|
|
; CHECK-NEXT: umulh z1.d, p0/m, z1.d, z5.d
|
|
; CHECK-NEXT: mul z2.d, p0/m, z2.d, z6.d
|
|
; CHECK-NEXT: cmpne p3.d, p0/z, z1.d, #0
|
|
; CHECK-NEXT: movprfx z1, z0
|
|
; CHECK-NEXT: umulh z1.d, p0/m, z1.d, z4.d
|
|
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z4.d
|
|
; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
|
|
; CHECK-NEXT: mov z3.d, p3/m, #0 // =0x0
|
|
; CHECK-NEXT: mov z24.d, p1/m, #0 // =0x0
|
|
; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0
|
|
; CHECK-NEXT: mov z2.d, p2/m, #0 // =0x0
|
|
; CHECK-NEXT: mov z1.d, z3.d
|
|
; CHECK-NEXT: mov z3.d, z24.d
|
|
; CHECK-NEXT: ret
|
|
%a = call { <vscale x 8 x i64>, <vscale x 8 x i1> } @llvm.umul.with.overflow.nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %y)
|
|
%b = extractvalue { <vscale x 8 x i64>, <vscale x 8 x i1> } %a, 0
|
|
%c = extractvalue { <vscale x 8 x i64>, <vscale x 8 x i1> } %a, 1
|
|
%d = select <vscale x 8 x i1> %c, <vscale x 8 x i64> zeroinitializer, <vscale x 8 x i64> %b
|
|
ret <vscale x 8 x i64> %d
|
|
}
|