[AArch64][GlobalISel] Combine G_UNMERGE(G_DUPLANE16) -> G_DUPLANE16 (#142731)
We will generate G_UNMERGE(G_DUPLANE16) due to the legalization of shuffle vector splats with mismatching vector sizes. The G_DUPLANE intrinsics can handle different vector sizes (128bit and 64bit output, for example), and we can combine away the unmerge.
This commit is contained in:
@@ -172,6 +172,21 @@ def form_duplane : GICombineRule <
|
||||
(apply [{ applyDupLane(*${root}, MRI, B, ${matchinfo}); }])
|
||||
>;
|
||||
|
||||
// Clean up G_UNMERGE(G_DUPLANE16) -> G_DUPLANE16
|
||||
class unmerge_duplane<Instruction Op> : GICombineRule <
|
||||
(defs root:$root),
|
||||
(match (Op $a, $src, $c),
|
||||
(G_UNMERGE_VALUES $d1, $d2, $a):$root,
|
||||
[{ return MRI.getType(${d1}.getReg()).getSizeInBits() == 64; }]),
|
||||
(apply (GIReplaceReg $d2, $d1), (Op $d1, $src, $c))
|
||||
>;
|
||||
def unmerge_duplane8 : unmerge_duplane<G_DUPLANE8>;
|
||||
def unmerge_duplane16 : unmerge_duplane<G_DUPLANE16>;
|
||||
def unmerge_duplane32 : unmerge_duplane<G_DUPLANE32>;
|
||||
// G_DUPLANE64 is not included as the result in scalar.
|
||||
def unmerge_duplanes : GICombineGroup<[unmerge_duplane8, unmerge_duplane16,
|
||||
unmerge_duplane32]>;
|
||||
|
||||
def shuffle_vector_lowering : GICombineGroup<[dup, form_duplane, rev, ext, zip,
|
||||
uzp, trn, fullrev, shuf_to_ins]>;
|
||||
|
||||
@@ -325,7 +340,8 @@ def AArch64PostLegalizerLowering
|
||||
lower_vector_fcmp, form_truncstore,
|
||||
vector_sext_inreg_to_shift,
|
||||
unmerge_ext_to_unmerge, lower_mulv2s64,
|
||||
vector_unmerge_lowering, insertelt_nonconst]> {
|
||||
vector_unmerge_lowering, insertelt_nonconst,
|
||||
unmerge_duplanes]> {
|
||||
}
|
||||
|
||||
// Post-legalization combines which are primarily optimizations.
|
||||
|
||||
@@ -0,0 +1,91 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-lowering -verify-machineinstrs %s -o - | FileCheck %s
|
||||
|
||||
---
|
||||
name: unmerge_dup8
|
||||
legalized: true
|
||||
body: |
|
||||
bb.1.entry:
|
||||
; CHECK-LABEL: name: unmerge_dup8
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(<16 x s8>) = COPY $q0
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
|
||||
; CHECK-NEXT: [[DUPLANE8_:%[0-9]+]]:_(<8 x s8>) = G_DUPLANE8 [[COPY]], [[C]](s64)
|
||||
; CHECK-NEXT: $d0 = COPY [[DUPLANE8_]](<8 x s8>)
|
||||
; CHECK-NEXT: $d1 = COPY [[DUPLANE8_]](<8 x s8>)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $x0
|
||||
%0:_(<16 x s8>) = COPY $q0
|
||||
%1:_(s64) = G_CONSTANT i64 1
|
||||
%2:_(<16 x s8>) = G_DUPLANE8 %0, %1
|
||||
%3:_(<8 x s8>), %4:_(<8 x s8>) = G_UNMERGE_VALUES %2
|
||||
$d0 = COPY %3
|
||||
$d1 = COPY %4
|
||||
RET_ReallyLR implicit $x0
|
||||
|
||||
...
|
||||
---
|
||||
name: unmerge_dup16
|
||||
legalized: true
|
||||
body: |
|
||||
bb.1.entry:
|
||||
; CHECK-LABEL: name: unmerge_dup16
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
|
||||
; CHECK-NEXT: [[DUPLANE16_:%[0-9]+]]:_(<4 x s16>) = G_DUPLANE16 [[COPY]], [[C]](s64)
|
||||
; CHECK-NEXT: $d0 = COPY [[DUPLANE16_]](<4 x s16>)
|
||||
; CHECK-NEXT: $d1 = COPY [[DUPLANE16_]](<4 x s16>)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $x0
|
||||
%0:_(<8 x s16>) = COPY $q0
|
||||
%1:_(s64) = G_CONSTANT i64 1
|
||||
%2:_(<8 x s16>) = G_DUPLANE16 %0, %1
|
||||
%3:_(<4 x s16>), %4:_(<4 x s16>) = G_UNMERGE_VALUES %2
|
||||
$d0 = COPY %3
|
||||
$d1 = COPY %4
|
||||
RET_ReallyLR implicit $x0
|
||||
|
||||
...
|
||||
---
|
||||
name: unmerge_dup32
|
||||
legalized: true
|
||||
body: |
|
||||
bb.1.entry:
|
||||
; CHECK-LABEL: name: unmerge_dup32
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
|
||||
; CHECK-NEXT: [[DUPLANE32_:%[0-9]+]]:_(<2 x s32>) = G_DUPLANE32 [[COPY]], [[C]](s64)
|
||||
; CHECK-NEXT: $d0 = COPY [[DUPLANE32_]](<2 x s32>)
|
||||
; CHECK-NEXT: $d1 = COPY [[DUPLANE32_]](<2 x s32>)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $x0
|
||||
%0:_(<4 x s32>) = COPY $q0
|
||||
%1:_(s64) = G_CONSTANT i64 1
|
||||
%2:_(<4 x s32>) = G_DUPLANE32 %0, %1
|
||||
%3:_(<2 x s32>), %4:_(<2 x s32>) = G_UNMERGE_VALUES %2
|
||||
$d0 = COPY %3
|
||||
$d1 = COPY %4
|
||||
RET_ReallyLR implicit $x0
|
||||
|
||||
...
|
||||
---
|
||||
name: unmerge_dup64
|
||||
legalized: true
|
||||
body: |
|
||||
bb.1.entry:
|
||||
; CHECK-LABEL: name: unmerge_dup64
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
|
||||
; CHECK-NEXT: [[DUPLANE64_:%[0-9]+]]:_(<2 x s64>) = G_DUPLANE64 [[COPY]], [[C]](s64)
|
||||
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
|
||||
; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DUPLANE64_]](<2 x s64>), [[C1]](s64)
|
||||
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
|
||||
; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DUPLANE64_]](<2 x s64>), [[C2]](s64)
|
||||
; CHECK-NEXT: $d0 = COPY [[EVEC]](s64)
|
||||
; CHECK-NEXT: $d1 = COPY [[EVEC1]](s64)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $x0
|
||||
%0:_(<2 x s64>) = COPY $q0
|
||||
%1:_(s64) = G_CONSTANT i64 1
|
||||
%2:_(<2 x s64>) = G_DUPLANE64 %0, %1
|
||||
%3:_(s64), %4:_(s64) = G_UNMERGE_VALUES %2
|
||||
$d0 = COPY %3
|
||||
$d1 = COPY %4
|
||||
RET_ReallyLR implicit $x0
|
||||
|
||||
...
|
||||
@@ -401,16 +401,10 @@ define <4 x i16> @test_build_illegal(<4 x i32> %in) {
|
||||
; SelectionDAGBuilder here. We then added a DUPLANE on top of that, preventing
|
||||
; the formation of an indexed-by-7 MLS.
|
||||
define <4 x i16> @test_high_splat(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) #0 {
|
||||
; CHECK-SD-LABEL: test_high_splat:
|
||||
; CHECK-SD: // %bb.0: // %entry
|
||||
; CHECK-SD-NEXT: mls.4h v0, v1, v2[7]
|
||||
; CHECK-SD-NEXT: ret
|
||||
;
|
||||
; CHECK-GI-LABEL: test_high_splat:
|
||||
; CHECK-GI: // %bb.0: // %entry
|
||||
; CHECK-GI-NEXT: dup.8h v2, v2[7]
|
||||
; CHECK-GI-NEXT: mls.4h v0, v2, v1
|
||||
; CHECK-GI-NEXT: ret
|
||||
; CHECK-LABEL: test_high_splat:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: mls.4h v0, v1, v2[7]
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
|
||||
%mul = mul <4 x i16> %shuffle, %b
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -807,46 +807,28 @@ define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %v1) #0 {
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vdup_laneq_s8(<16 x i8> %v1) #0 {
|
||||
; CHECK-SD-LABEL: test_vdup_laneq_s8:
|
||||
; CHECK-SD: // %bb.0:
|
||||
; CHECK-SD-NEXT: dup v0.8b, v0.b[5]
|
||||
; CHECK-SD-NEXT: ret
|
||||
;
|
||||
; CHECK-GI-LABEL: test_vdup_laneq_s8:
|
||||
; CHECK-GI: // %bb.0:
|
||||
; CHECK-GI-NEXT: dup v0.16b, v0.b[5]
|
||||
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
|
||||
; CHECK-GI-NEXT: ret
|
||||
; CHECK-LABEL: test_vdup_laneq_s8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: dup v0.8b, v0.b[5]
|
||||
; CHECK-NEXT: ret
|
||||
%shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
|
||||
ret <8 x i8> %shuffle
|
||||
}
|
||||
|
||||
define <4 x i16> @test_vdup_laneq_s16(<8 x i16> %v1) #0 {
|
||||
; CHECK-SD-LABEL: test_vdup_laneq_s16:
|
||||
; CHECK-SD: // %bb.0:
|
||||
; CHECK-SD-NEXT: dup v0.4h, v0.h[2]
|
||||
; CHECK-SD-NEXT: ret
|
||||
;
|
||||
; CHECK-GI-LABEL: test_vdup_laneq_s16:
|
||||
; CHECK-GI: // %bb.0:
|
||||
; CHECK-GI-NEXT: dup v0.8h, v0.h[2]
|
||||
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
|
||||
; CHECK-GI-NEXT: ret
|
||||
; CHECK-LABEL: test_vdup_laneq_s16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: dup v0.4h, v0.h[2]
|
||||
; CHECK-NEXT: ret
|
||||
%shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
|
||||
ret <4 x i16> %shuffle
|
||||
}
|
||||
|
||||
define <2 x i32> @test_vdup_laneq_s32(<4 x i32> %v1) #0 {
|
||||
; CHECK-SD-LABEL: test_vdup_laneq_s32:
|
||||
; CHECK-SD: // %bb.0:
|
||||
; CHECK-SD-NEXT: dup v0.2s, v0.s[1]
|
||||
; CHECK-SD-NEXT: ret
|
||||
;
|
||||
; CHECK-GI-LABEL: test_vdup_laneq_s32:
|
||||
; CHECK-GI: // %bb.0:
|
||||
; CHECK-GI-NEXT: dup v0.4s, v0.s[1]
|
||||
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
|
||||
; CHECK-GI-NEXT: ret
|
||||
; CHECK-LABEL: test_vdup_laneq_s32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: dup v0.2s, v0.s[1]
|
||||
; CHECK-NEXT: ret
|
||||
%shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
|
||||
ret <2 x i32> %shuffle
|
||||
}
|
||||
|
||||
@@ -569,16 +569,10 @@ define i32 @test_sqrdmlsh_extract_i32(i32 %acc, i32 %mhs, <4 x i32> %rhs) {
|
||||
; Using sqrdmlah intrinsics
|
||||
|
||||
define <4 x i16> @test_vqrdmlah_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
|
||||
; CHECK-SD-LABEL: test_vqrdmlah_laneq_s16:
|
||||
; CHECK-SD: // %bb.0: // %entry
|
||||
; CHECK-SD-NEXT: sqrdmlah v0.4h, v1.4h, v2.h[7]
|
||||
; CHECK-SD-NEXT: ret
|
||||
;
|
||||
; CHECK-GI-LABEL: test_vqrdmlah_laneq_s16:
|
||||
; CHECK-GI: // %bb.0: // %entry
|
||||
; CHECK-GI-NEXT: dup v2.8h, v2.h[7]
|
||||
; CHECK-GI-NEXT: sqrdmlah v0.4h, v1.4h, v2.4h
|
||||
; CHECK-GI-NEXT: ret
|
||||
; CHECK-LABEL: test_vqrdmlah_laneq_s16:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: sqrdmlah v0.4h, v1.4h, v2.h[7]
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%lane = shufflevector <8 x i16> %v, <8 x i16> poison, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
|
||||
%vqrdmlah_v3.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmlah.v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %lane) #4
|
||||
@@ -586,16 +580,10 @@ entry:
|
||||
}
|
||||
|
||||
define <2 x i32> @test_vqrdmlah_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
|
||||
; CHECK-SD-LABEL: test_vqrdmlah_laneq_s32:
|
||||
; CHECK-SD: // %bb.0: // %entry
|
||||
; CHECK-SD-NEXT: sqrdmlah v0.2s, v1.2s, v2.s[3]
|
||||
; CHECK-SD-NEXT: ret
|
||||
;
|
||||
; CHECK-GI-LABEL: test_vqrdmlah_laneq_s32:
|
||||
; CHECK-GI: // %bb.0: // %entry
|
||||
; CHECK-GI-NEXT: dup v2.4s, v2.s[3]
|
||||
; CHECK-GI-NEXT: sqrdmlah v0.2s, v1.2s, v2.2s
|
||||
; CHECK-GI-NEXT: ret
|
||||
; CHECK-LABEL: test_vqrdmlah_laneq_s32:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: sqrdmlah v0.2s, v1.2s, v2.s[3]
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%lane = shufflevector <4 x i32> %v, <4 x i32> poison, <2 x i32> <i32 3, i32 3>
|
||||
%vqrdmlah_v3.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmlah.v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %lane) #4
|
||||
@@ -700,22 +688,13 @@ entry:
|
||||
}
|
||||
|
||||
define i16 @test_vqrdmlahh_laneq_s16(i16 %a, i16 %b, <8 x i16> %c) {
|
||||
; CHECK-SD-LABEL: test_vqrdmlahh_laneq_s16:
|
||||
; CHECK-SD: // %bb.0: // %entry
|
||||
; CHECK-SD-NEXT: fmov s1, w0
|
||||
; CHECK-SD-NEXT: fmov s2, w1
|
||||
; CHECK-SD-NEXT: sqrdmlah v1.4h, v2.4h, v0.h[7]
|
||||
; CHECK-SD-NEXT: umov w0, v1.h[0]
|
||||
; CHECK-SD-NEXT: ret
|
||||
;
|
||||
; CHECK-GI-LABEL: test_vqrdmlahh_laneq_s16:
|
||||
; CHECK-GI: // %bb.0: // %entry
|
||||
; CHECK-GI-NEXT: dup v0.8h, v0.h[7]
|
||||
; CHECK-GI-NEXT: fmov s1, w0
|
||||
; CHECK-GI-NEXT: fmov s2, w1
|
||||
; CHECK-GI-NEXT: sqrdmlah v1.4h, v2.4h, v0.4h
|
||||
; CHECK-GI-NEXT: umov w0, v1.h[0]
|
||||
; CHECK-GI-NEXT: ret
|
||||
; CHECK-LABEL: test_vqrdmlahh_laneq_s16:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: fmov s1, w0
|
||||
; CHECK-NEXT: fmov s2, w1
|
||||
; CHECK-NEXT: sqrdmlah v1.4h, v2.4h, v0.h[7]
|
||||
; CHECK-NEXT: umov w0, v1.h[0]
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%0 = insertelement <4 x i16> undef, i16 %a, i64 0
|
||||
%1 = insertelement <4 x i16> undef, i16 %b, i64 0
|
||||
@@ -740,16 +719,10 @@ entry:
|
||||
}
|
||||
|
||||
define <4 x i16> @test_vqrdmlsh_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
|
||||
; CHECK-SD-LABEL: test_vqrdmlsh_laneq_s16:
|
||||
; CHECK-SD: // %bb.0: // %entry
|
||||
; CHECK-SD-NEXT: sqrdmlsh v0.4h, v1.4h, v2.h[7]
|
||||
; CHECK-SD-NEXT: ret
|
||||
;
|
||||
; CHECK-GI-LABEL: test_vqrdmlsh_laneq_s16:
|
||||
; CHECK-GI: // %bb.0: // %entry
|
||||
; CHECK-GI-NEXT: dup v2.8h, v2.h[7]
|
||||
; CHECK-GI-NEXT: sqrdmlsh v0.4h, v1.4h, v2.4h
|
||||
; CHECK-GI-NEXT: ret
|
||||
; CHECK-LABEL: test_vqrdmlsh_laneq_s16:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: sqrdmlsh v0.4h, v1.4h, v2.h[7]
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%lane = shufflevector <8 x i16> %v, <8 x i16> poison, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
|
||||
%vqrdmlsh_v3.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmlsh.v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %lane) #4
|
||||
@@ -757,16 +730,10 @@ entry:
|
||||
}
|
||||
|
||||
define <2 x i32> @test_vqrdmlsh_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
|
||||
; CHECK-SD-LABEL: test_vqrdmlsh_laneq_s32:
|
||||
; CHECK-SD: // %bb.0: // %entry
|
||||
; CHECK-SD-NEXT: sqrdmlsh v0.2s, v1.2s, v2.s[3]
|
||||
; CHECK-SD-NEXT: ret
|
||||
;
|
||||
; CHECK-GI-LABEL: test_vqrdmlsh_laneq_s32:
|
||||
; CHECK-GI: // %bb.0: // %entry
|
||||
; CHECK-GI-NEXT: dup v2.4s, v2.s[3]
|
||||
; CHECK-GI-NEXT: sqrdmlsh v0.2s, v1.2s, v2.2s
|
||||
; CHECK-GI-NEXT: ret
|
||||
; CHECK-LABEL: test_vqrdmlsh_laneq_s32:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: sqrdmlsh v0.2s, v1.2s, v2.s[3]
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%lane = shufflevector <4 x i32> %v, <4 x i32> poison, <2 x i32> <i32 3, i32 3>
|
||||
%vqrdmlsh_v3.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmlsh.v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %lane) #4
|
||||
@@ -871,22 +838,13 @@ entry:
|
||||
}
|
||||
|
||||
define i16 @test_vqrdmlshh_laneq_s16(i16 %a, i16 %b, <8 x i16> %c) {
|
||||
; CHECK-SD-LABEL: test_vqrdmlshh_laneq_s16:
|
||||
; CHECK-SD: // %bb.0: // %entry
|
||||
; CHECK-SD-NEXT: fmov s1, w0
|
||||
; CHECK-SD-NEXT: fmov s2, w1
|
||||
; CHECK-SD-NEXT: sqrdmlsh v1.4h, v2.4h, v0.h[7]
|
||||
; CHECK-SD-NEXT: umov w0, v1.h[0]
|
||||
; CHECK-SD-NEXT: ret
|
||||
;
|
||||
; CHECK-GI-LABEL: test_vqrdmlshh_laneq_s16:
|
||||
; CHECK-GI: // %bb.0: // %entry
|
||||
; CHECK-GI-NEXT: dup v0.8h, v0.h[7]
|
||||
; CHECK-GI-NEXT: fmov s1, w0
|
||||
; CHECK-GI-NEXT: fmov s2, w1
|
||||
; CHECK-GI-NEXT: sqrdmlsh v1.4h, v2.4h, v0.4h
|
||||
; CHECK-GI-NEXT: umov w0, v1.h[0]
|
||||
; CHECK-GI-NEXT: ret
|
||||
; CHECK-LABEL: test_vqrdmlshh_laneq_s16:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: fmov s1, w0
|
||||
; CHECK-NEXT: fmov s2, w1
|
||||
; CHECK-NEXT: sqrdmlsh v1.4h, v2.4h, v0.h[7]
|
||||
; CHECK-NEXT: umov w0, v1.h[0]
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%0 = insertelement <4 x i16> undef, i16 %a, i64 0
|
||||
%1 = insertelement <4 x i16> undef, i16 %b, i64 0
|
||||
|
||||
Reference in New Issue
Block a user