Similar to d39b4ce3ce
Using "eabi" or "gnueabi" for aarch64 targets is a common mistake and
warned by Clang Driver. We want to avoid them elsewhere as well. Just
use the common "aarch64" without other triple components.
355 lines
14 KiB
LLVM
355 lines
14 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=aarch64 -mattr=+v8.6a,+neon < %s | FileCheck %s
|
|
; RUN: llc -mtriple=aarch64 -mattr=+v8.6a,+neon,+bf16 < %s | FileCheck %s
|
|
; RUN: llc -mtriple=aarch64 -mattr=+v8.6a,+neon,+fullfp16,+bf16 < %s | FileCheck %s
|
|
|
|
%struct.float16x4x2_t = type { [2 x <4 x bfloat>] }
|
|
%struct.float16x8x2_t = type { [2 x <8 x bfloat>] }
|
|
|
|
define dso_local %struct.float16x4x2_t @test_vzip_bf16(<4 x bfloat> %a, <4 x bfloat> %b) {
|
|
; CHECK-LABEL: test_vzip_bf16:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: zip1 v2.4h, v0.4h, v1.4h
|
|
; CHECK-NEXT: zip2 v1.4h, v0.4h, v1.4h
|
|
; CHECK-NEXT: fmov d0, d2
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%vzip.i = shufflevector <4 x bfloat> %a, <4 x bfloat> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
|
|
%vzip1.i = shufflevector <4 x bfloat> %a, <4 x bfloat> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
|
|
%.fca.0.0.insert = insertvalue %struct.float16x4x2_t undef, <4 x bfloat> %vzip.i, 0, 0
|
|
%.fca.0.1.insert = insertvalue %struct.float16x4x2_t %.fca.0.0.insert, <4 x bfloat> %vzip1.i, 0, 1
|
|
ret %struct.float16x4x2_t %.fca.0.1.insert
|
|
}
|
|
|
|
define dso_local %struct.float16x8x2_t @test_vzipq_bf16(<8 x bfloat> %a, <8 x bfloat> %b) {
|
|
; CHECK-LABEL: test_vzipq_bf16:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: zip1 v2.8h, v0.8h, v1.8h
|
|
; CHECK-NEXT: zip2 v1.8h, v0.8h, v1.8h
|
|
; CHECK-NEXT: mov v0.16b, v2.16b
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%vzip.i = shufflevector <8 x bfloat> %a, <8 x bfloat> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
|
|
%vzip1.i = shufflevector <8 x bfloat> %a, <8 x bfloat> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
|
|
%.fca.0.0.insert = insertvalue %struct.float16x8x2_t undef, <8 x bfloat> %vzip.i, 0, 0
|
|
%.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x bfloat> %vzip1.i, 0, 1
|
|
ret %struct.float16x8x2_t %.fca.0.1.insert
|
|
}
|
|
|
|
define dso_local %struct.float16x4x2_t @test_vuzp_bf16(<4 x bfloat> %a, <4 x bfloat> %b) {
|
|
; CHECK-LABEL: test_vuzp_bf16:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: uzp1 v2.4h, v0.4h, v1.4h
|
|
; CHECK-NEXT: uzp2 v1.4h, v0.4h, v1.4h
|
|
; CHECK-NEXT: fmov d0, d2
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%vuzp.i = shufflevector <4 x bfloat> %a, <4 x bfloat> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
|
%vuzp1.i = shufflevector <4 x bfloat> %a, <4 x bfloat> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
|
|
%.fca.0.0.insert = insertvalue %struct.float16x4x2_t undef, <4 x bfloat> %vuzp.i, 0, 0
|
|
%.fca.0.1.insert = insertvalue %struct.float16x4x2_t %.fca.0.0.insert, <4 x bfloat> %vuzp1.i, 0, 1
|
|
ret %struct.float16x4x2_t %.fca.0.1.insert
|
|
}
|
|
|
|
define dso_local %struct.float16x8x2_t @test_vuzpq_bf16(<8 x bfloat> %a, <8 x bfloat> %b) {
|
|
; CHECK-LABEL: test_vuzpq_bf16:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: uzp1 v2.8h, v0.8h, v1.8h
|
|
; CHECK-NEXT: uzp2 v1.8h, v0.8h, v1.8h
|
|
; CHECK-NEXT: mov v0.16b, v2.16b
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%vuzp.i = shufflevector <8 x bfloat> %a, <8 x bfloat> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
|
|
%vuzp1.i = shufflevector <8 x bfloat> %a, <8 x bfloat> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
|
|
%.fca.0.0.insert = insertvalue %struct.float16x8x2_t undef, <8 x bfloat> %vuzp.i, 0, 0
|
|
%.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x bfloat> %vuzp1.i, 0, 1
|
|
ret %struct.float16x8x2_t %.fca.0.1.insert
|
|
}
|
|
|
|
define dso_local %struct.float16x4x2_t @test_vtrn_bf16(<4 x bfloat> %a, <4 x bfloat> %b) {
|
|
; CHECK-LABEL: test_vtrn_bf16:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: trn1 v2.4h, v0.4h, v1.4h
|
|
; CHECK-NEXT: trn2 v1.4h, v0.4h, v1.4h
|
|
; CHECK-NEXT: fmov d0, d2
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%vtrn.i = shufflevector <4 x bfloat> %a, <4 x bfloat> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
|
|
%vtrn1.i = shufflevector <4 x bfloat> %a, <4 x bfloat> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
|
|
%.fca.0.0.insert = insertvalue %struct.float16x4x2_t undef, <4 x bfloat> %vtrn.i, 0, 0
|
|
%.fca.0.1.insert = insertvalue %struct.float16x4x2_t %.fca.0.0.insert, <4 x bfloat> %vtrn1.i, 0, 1
|
|
ret %struct.float16x4x2_t %.fca.0.1.insert
|
|
}
|
|
|
|
define dso_local %struct.float16x8x2_t @test_vtrnq_bf16(<8 x bfloat> %a, <8 x bfloat> %b) {
|
|
; CHECK-LABEL: test_vtrnq_bf16:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: trn1 v2.8h, v0.8h, v1.8h
|
|
; CHECK-NEXT: trn2 v1.8h, v0.8h, v1.8h
|
|
; CHECK-NEXT: mov v0.16b, v2.16b
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%vtrn.i = shufflevector <8 x bfloat> %a, <8 x bfloat> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
|
|
%vtrn1.i = shufflevector <8 x bfloat> %a, <8 x bfloat> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
|
|
%.fca.0.0.insert = insertvalue %struct.float16x8x2_t undef, <8 x bfloat> %vtrn.i, 0, 0
|
|
%.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x bfloat> %vtrn1.i, 0, 1
|
|
ret %struct.float16x8x2_t %.fca.0.1.insert
|
|
}
|
|
|
|
define dso_local <4 x bfloat> @test_vmov_n_bf16(float %a.coerce) {
|
|
; CHECK-LABEL: test_vmov_n_bf16:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
|
|
; CHECK-NEXT: dup v0.4h, v0.h[0]
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%0 = bitcast float %a.coerce to i32
|
|
%tmp.0.extract.trunc = trunc i32 %0 to i16
|
|
%1 = bitcast i16 %tmp.0.extract.trunc to bfloat
|
|
%vecinit = insertelement <4 x bfloat> undef, bfloat %1, i32 0
|
|
%vecinit4 = shufflevector <4 x bfloat> %vecinit, <4 x bfloat> undef, <4 x i32> zeroinitializer
|
|
ret <4 x bfloat> %vecinit4
|
|
}
|
|
|
|
define dso_local <8 x bfloat> @test_vmovq_n_bf16(float %a.coerce) {
|
|
; CHECK-LABEL: test_vmovq_n_bf16:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
|
|
; CHECK-NEXT: dup v0.8h, v0.h[0]
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%0 = bitcast float %a.coerce to i32
|
|
%tmp.0.extract.trunc = trunc i32 %0 to i16
|
|
%1 = bitcast i16 %tmp.0.extract.trunc to bfloat
|
|
%vecinit = insertelement <8 x bfloat> undef, bfloat %1, i32 0
|
|
%vecinit8 = shufflevector <8 x bfloat> %vecinit, <8 x bfloat> undef, <8 x i32> zeroinitializer
|
|
ret <8 x bfloat> %vecinit8
|
|
}
|
|
|
|
define dso_local <4 x bfloat> @test_vdup_n_bf16(float %a.coerce) {
|
|
; CHECK-LABEL: test_vdup_n_bf16:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
|
|
; CHECK-NEXT: dup v0.4h, v0.h[0]
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%0 = bitcast float %a.coerce to i32
|
|
%tmp.0.extract.trunc = trunc i32 %0 to i16
|
|
%1 = bitcast i16 %tmp.0.extract.trunc to bfloat
|
|
%vecinit = insertelement <4 x bfloat> undef, bfloat %1, i32 0
|
|
%vecinit4 = shufflevector <4 x bfloat> %vecinit, <4 x bfloat> undef, <4 x i32> zeroinitializer
|
|
ret <4 x bfloat> %vecinit4
|
|
}
|
|
|
|
define dso_local <8 x bfloat> @test_vdupq_n_bf16(float %a.coerce) {
|
|
; CHECK-LABEL: test_vdupq_n_bf16:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
|
|
; CHECK-NEXT: dup v0.8h, v0.h[0]
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%0 = bitcast float %a.coerce to i32
|
|
%tmp.0.extract.trunc = trunc i32 %0 to i16
|
|
%1 = bitcast i16 %tmp.0.extract.trunc to bfloat
|
|
%vecinit = insertelement <8 x bfloat> undef, bfloat %1, i32 0
|
|
%vecinit8 = shufflevector <8 x bfloat> %vecinit, <8 x bfloat> undef, <8 x i32> zeroinitializer
|
|
ret <8 x bfloat> %vecinit8
|
|
}
|
|
|
|
define dso_local <4 x bfloat> @test_vdup_lane_bf16(<4 x bfloat> %a) {
|
|
; CHECK-LABEL: test_vdup_lane_bf16:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
; CHECK-NEXT: dup v0.4h, v0.h[3]
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%shuffle = shufflevector <4 x bfloat> %a, <4 x bfloat> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
|
|
ret <4 x bfloat> %shuffle
|
|
}
|
|
|
|
define dso_local <8 x bfloat> @test_vdupq_lane_bf16(<4 x bfloat> %a) {
|
|
; CHECK-LABEL: test_vdupq_lane_bf16:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
; CHECK-NEXT: dup v0.8h, v0.h[3]
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%shuffle = shufflevector <4 x bfloat> %a, <4 x bfloat> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
|
|
ret <8 x bfloat> %shuffle
|
|
}
|
|
|
|
define dso_local <4 x bfloat> @test_vext_bf16(<4 x bfloat> %a, <4 x bfloat> %b) {
|
|
; CHECK-LABEL: test_vext_bf16:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: ext v0.8b, v0.8b, v1.8b, #4
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%vext = shufflevector <4 x bfloat> %a, <4 x bfloat> %b, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
|
|
ret <4 x bfloat> %vext
|
|
}
|
|
|
|
define dso_local <8 x bfloat> @test_vextq_bf16(<8 x bfloat> %a, <8 x bfloat> %b) {
|
|
; CHECK-LABEL: test_vextq_bf16:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #10
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%vext = shufflevector <8 x bfloat> %a, <8 x bfloat> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
|
|
ret <8 x bfloat> %vext
|
|
}
|
|
|
|
define dso_local <4 x bfloat> @test_vext_aligned_bf16(<8 x bfloat> %a) {
|
|
; CHECK-LABEL: test_vext_aligned_bf16:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%vext = shufflevector <8 x bfloat> %a, <8 x bfloat> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
|
ret <4 x bfloat> %vext
|
|
}
|
|
|
|
define dso_local <4 x bfloat> @test_vext_unaligned_bf16(<8 x bfloat> %a) {
|
|
; CHECK-LABEL: test_vext_unaligned_bf16:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #6
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%vext = shufflevector <8 x bfloat> %a, <8 x bfloat> undef, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
|
|
ret <4 x bfloat> %vext
|
|
}
|
|
|
|
define <8 x bfloat> @shuffle3step0_bf16(<32 x bfloat> %src) {
|
|
; CHECK-LABEL: shuffle3step0_bf16:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: adrp x8, .LCPI16_0
|
|
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
|
|
; CHECK-NEXT: mov v3.16b, v2.16b
|
|
; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI16_0]
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
|
|
; CHECK-NEXT: adrp x8, .LCPI16_1
|
|
; CHECK-NEXT: tbl v2.16b, { v0.16b, v1.16b }, v4.16b
|
|
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI16_1]
|
|
; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%s1 = shufflevector <32 x bfloat> %src, <32 x bfloat> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
|
|
ret <8 x bfloat> %s1
|
|
}
|
|
|
|
define <8 x bfloat> @shuffle3step1_bf16(<32 x bfloat> %src) {
|
|
; CHECK-LABEL: shuffle3step1_bf16:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: adrp x8, .LCPI17_0
|
|
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
|
|
; CHECK-NEXT: mov v3.16b, v2.16b
|
|
; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI17_0]
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
|
|
; CHECK-NEXT: adrp x8, .LCPI17_1
|
|
; CHECK-NEXT: tbl v2.16b, { v0.16b, v1.16b }, v4.16b
|
|
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI17_1]
|
|
; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%s1 = shufflevector <32 x bfloat> %src, <32 x bfloat> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
|
|
ret <8 x bfloat> %s1
|
|
}
|
|
|
|
define <8 x bfloat> @shuffle3step2_bf16(<32 x bfloat> %src) {
|
|
; CHECK-LABEL: shuffle3step2_bf16:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: adrp x8, .LCPI18_0
|
|
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
|
|
; CHECK-NEXT: mov v3.16b, v2.16b
|
|
; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI18_0]
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
|
|
; CHECK-NEXT: adrp x8, .LCPI18_1
|
|
; CHECK-NEXT: tbl v2.16b, { v0.16b, v1.16b }, v4.16b
|
|
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI18_1]
|
|
; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%s1 = shufflevector <32 x bfloat> %src, <32 x bfloat> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
|
|
ret <8 x bfloat> %s1
|
|
}
|
|
|
|
|
|
define dso_local <4 x bfloat> @test_vrev64_bf16(<4 x bfloat> %a) {
|
|
; CHECK-LABEL: test_vrev64_bf16:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: rev64 v0.4h, v0.4h
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%shuffle.i = shufflevector <4 x bfloat> %a, <4 x bfloat> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
ret <4 x bfloat> %shuffle.i
|
|
}
|
|
|
|
define dso_local <8 x bfloat> @test_vrev64q_bf16(<8 x bfloat> %a) {
|
|
; CHECK-LABEL: test_vrev64q_bf16:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: rev64 v0.8h, v0.8h
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%shuffle.i = shufflevector <8 x bfloat> %a, <8 x bfloat> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
|
|
ret <8 x bfloat> %shuffle.i
|
|
}
|
|
|
|
define dso_local <4 x bfloat> @test_vrev32_bf16(<4 x bfloat> %a) {
|
|
; CHECK-LABEL: test_vrev32_bf16:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: rev32 v0.4h, v0.4h
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%shuffle.i = shufflevector <4 x bfloat> %a, <4 x bfloat> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
|
|
ret <4 x bfloat> %shuffle.i
|
|
}
|
|
|
|
define dso_local <8 x bfloat> @test_vrev32q_bf16(<8 x bfloat> %a) {
|
|
; CHECK-LABEL: test_vrev32q_bf16:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: rev32 v0.8h, v0.8h
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%shuffle.i = shufflevector <8 x bfloat> %a, <8 x bfloat> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
|
|
ret <8 x bfloat> %shuffle.i
|
|
}
|
|
|
|
define <4 x bfloat> @test_vld_dup1_4xbfloat(ptr %b) {
|
|
; CHECK-LABEL: test_vld_dup1_4xbfloat:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: ld1r { v0.4h }, [x0]
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%b1 = load bfloat, ptr %b, align 2
|
|
%vecinit = insertelement <4 x bfloat> undef, bfloat %b1, i32 0
|
|
%vecinit2 = insertelement <4 x bfloat> %vecinit, bfloat %b1, i32 1
|
|
%vecinit3 = insertelement <4 x bfloat> %vecinit2, bfloat %b1, i32 2
|
|
%vecinit4 = insertelement <4 x bfloat> %vecinit3, bfloat %b1, i32 3
|
|
ret <4 x bfloat> %vecinit4
|
|
}
|
|
|
|
define <8 x bfloat> @test_vld_dup1_8xbfloat(ptr %b) local_unnamed_addr {
|
|
; CHECK-LABEL: test_vld_dup1_8xbfloat:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: ld1r { v0.8h }, [x0]
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%b1 = load bfloat, ptr %b, align 2
|
|
%vecinit = insertelement <8 x bfloat> undef, bfloat %b1, i32 0
|
|
%vecinit8 = shufflevector <8 x bfloat> %vecinit, <8 x bfloat> undef, <8 x i32> zeroinitializer
|
|
ret <8 x bfloat> %vecinit8
|
|
}
|
|
|
|
define <8 x bfloat> @test_shufflevector8xbfloat(<4 x bfloat> %a) {
|
|
; CHECK-LABEL: test_shufflevector8xbfloat:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
; CHECK-NEXT: mov v0.d[1], v0.d[0]
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%r = shufflevector <4 x bfloat> %a, <4 x bfloat> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
ret <8 x bfloat> %r
|
|
}
|
|
|