Files
clang-p2996/llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll
Thomas Lively b69374ca58 [WebAssembly] Legalize vector types by widening
The default legalization of unsupported vector types is to promote the integers
in each lane, which leads to extra sign or zero extending and masking when
moving data into and out of vectors. Switch our preferred type legalization from
the default to vector widening, which keeps the data in the low lanes of the
vector rather than in the low bits of each lane. The unused high lanes can be
ignored.

Half-wide vectors are now loaded from memory into the low 64 bits of the v128
rather than spread out among the lanes. As a result, v128.load64_splat is a much
more common operation, so add new patterns to support it.

Differential Revision: https://reviews.llvm.org/D107502
2021-08-19 12:07:33 -07:00

1527 lines
50 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s
; Test loads and stores with custom alignment values.
target triple = "wasm32-unknown-unknown"
; ==============================================================================
; 16 x i8
; ==============================================================================
define <16 x i8> @load_v16i8_a1(<16 x i8> *%p) {
; CHECK-LABEL: load_v16i8_a1:
; CHECK: .functype load_v16i8_a1 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 0:p2align=0
; CHECK-NEXT: # fallthrough-return
%v = load <16 x i8>, <16 x i8>* %p, align 1
ret <16 x i8> %v
}
define <16 x i8> @load_v16i8_a4(<16 x i8> *%p) {
; CHECK-LABEL: load_v16i8_a4:
; CHECK: .functype load_v16i8_a4 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 0:p2align=2
; CHECK-NEXT: # fallthrough-return
%v = load <16 x i8>, <16 x i8>* %p, align 4
ret <16 x i8> %v
}
; 16 is the default alignment for v128 so no attribute is needed.
define <16 x i8> @load_v16i8_a16(<16 x i8> *%p) {
; CHECK-LABEL: load_v16i8_a16:
; CHECK: .functype load_v16i8_a16 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 0
; CHECK-NEXT: # fallthrough-return
%v = load <16 x i8>, <16 x i8>* %p, align 16
ret <16 x i8> %v
}
; 32 is greater than the default alignment so it is ignored.
define <16 x i8> @load_v16i8_a32(<16 x i8> *%p) {
; CHECK-LABEL: load_v16i8_a32:
; CHECK: .functype load_v16i8_a32 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 0
; CHECK-NEXT: # fallthrough-return
%v = load <16 x i8>, <16 x i8>* %p, align 32
ret <16 x i8> %v
}
define void @store_v16i8_a1(<16 x i8> *%p, <16 x i8> %v) {
; CHECK-LABEL: store_v16i8_a1:
; CHECK: .functype store_v16i8_a1 (i32, v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: v128.store 0:p2align=0
; CHECK-NEXT: # fallthrough-return
store <16 x i8> %v, <16 x i8>* %p, align 1
ret void
}
define void @store_v16i8_a4(<16 x i8> *%p, <16 x i8> %v) {
; CHECK-LABEL: store_v16i8_a4:
; CHECK: .functype store_v16i8_a4 (i32, v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: v128.store 0:p2align=2
; CHECK-NEXT: # fallthrough-return
store <16 x i8> %v, <16 x i8>* %p, align 4
ret void
}
; 16 is the default alignment for v128 so no attribute is needed.
define void @store_v16i8_a16(<16 x i8> *%p, <16 x i8> %v) {
; CHECK-LABEL: store_v16i8_a16:
; CHECK: .functype store_v16i8_a16 (i32, v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
store <16 x i8> %v, <16 x i8>* %p, align 16
ret void
}
; 32 is greater than the default alignment so it is ignored.
define void @store_v16i8_a32(<16 x i8> *%p, <16 x i8> %v) {
; CHECK-LABEL: store_v16i8_a32:
; CHECK: .functype store_v16i8_a32 (i32, v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
store <16 x i8> %v, <16 x i8>* %p, align 32
ret void
}
; 1 is the default alignment for v128.load8_splat so no attribute is needed.
define <16 x i8> @load_splat_v16i8_a1(i8* %p) {
; CHECK-LABEL: load_splat_v16i8_a1:
; CHECK: .functype load_splat_v16i8_a1 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load8_splat 0
; CHECK-NEXT: # fallthrough-return
%e = load i8, i8* %p, align 1
%v1 = insertelement <16 x i8> undef, i8 %e, i32 0
%v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
ret <16 x i8> %v2
}
; 2 is greater than the default alignment so it is ignored.
define <16 x i8> @load_splat_v16i8_a2(i8* %p) {
; CHECK-LABEL: load_splat_v16i8_a2:
; CHECK: .functype load_splat_v16i8_a2 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load8_splat 0
; CHECK-NEXT: # fallthrough-return
%e = load i8, i8* %p, align 2
%v1 = insertelement <16 x i8> undef, i8 %e, i32 0
%v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
ret <16 x i8> %v2
}
; 1 is the default alignment for v128.load8_lane so no attribute is needed.
define <16 x i8> @load_lane_i8_a1(i8* %p, <16 x i8> %v) {
; CHECK-LABEL: load_lane_i8_a1:
; CHECK: .functype load_lane_i8_a1 (i32, v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: v128.load8_lane 0, 0
; CHECK-NEXT: # fallthrough-return
%e = load i8, i8* %p, align 1
%v1 = insertelement <16 x i8> %v, i8 %e, i32 0
ret <16 x i8> %v1
}
; 2 is greater than the default alignment so it is ignored.
define <16 x i8> @load_lane_i8_a2(i8* %p, <16 x i8> %v) {
; CHECK-LABEL: load_lane_i8_a2:
; CHECK: .functype load_lane_i8_a2 (i32, v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: v128.load8_lane 0, 0
; CHECK-NEXT: # fallthrough-return
%e = load i8, i8* %p, align 2
%v1 = insertelement <16 x i8> %v, i8 %e, i32 0
ret <16 x i8> %v1
}
; 1 is the default alignment for v128.store8_lane so no attribute is needed.
define void @store_lane_i8_a1(<16 x i8> %v, i8* %p) {
; CHECK-LABEL: store_lane_i8_a1:
; CHECK: .functype store_lane_i8_a1 (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store8_lane 0, 0
; CHECK-NEXT: # fallthrough-return
%x = extractelement <16 x i8> %v, i32 0
store i8 %x, i8* %p, align 1
ret void
}
; 2 is greater than the default alignment so it is ignored.
define void @store_lane_i8_a2(<16 x i8> %v, i8* %p) {
; CHECK-LABEL: store_lane_i8_a2:
; CHECK: .functype store_lane_i8_a2 (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store8_lane 0, 0
; CHECK-NEXT: # fallthrough-return
%x = extractelement <16 x i8> %v, i32 0
store i8 %x, i8* %p, align 2
ret void
}
; ==============================================================================
; 8 x i16
; ==============================================================================
define <8 x i16> @load_v8i16_a1(<8 x i16> *%p) {
; CHECK-LABEL: load_v8i16_a1:
; CHECK: .functype load_v8i16_a1 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 0:p2align=0
; CHECK-NEXT: # fallthrough-return
%v = load <8 x i16>, <8 x i16>* %p, align 1
ret <8 x i16> %v
}
define <8 x i16> @load_v8i16_a4(<8 x i16> *%p) {
; CHECK-LABEL: load_v8i16_a4:
; CHECK: .functype load_v8i16_a4 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 0:p2align=2
; CHECK-NEXT: # fallthrough-return
%v = load <8 x i16>, <8 x i16>* %p, align 4
ret <8 x i16> %v
}
; 8 is the default alignment for v128 so no attribute is needed.
define <8 x i16> @load_v8i16_a16(<8 x i16> *%p) {
; CHECK-LABEL: load_v8i16_a16:
; CHECK: .functype load_v8i16_a16 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 0
; CHECK-NEXT: # fallthrough-return
%v = load <8 x i16>, <8 x i16>* %p, align 16
ret <8 x i16> %v
}
; 32 is greater than the default alignment so it is ignored.
define <8 x i16> @load_v8i16_a32(<8 x i16> *%p) {
; CHECK-LABEL: load_v8i16_a32:
; CHECK: .functype load_v8i16_a32 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 0
; CHECK-NEXT: # fallthrough-return
%v = load <8 x i16>, <8 x i16>* %p, align 32
ret <8 x i16> %v
}
define void @store_v8i16_a1(<8 x i16> *%p, <8 x i16> %v) {
; CHECK-LABEL: store_v8i16_a1:
; CHECK: .functype store_v8i16_a1 (i32, v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: v128.store 0:p2align=0
; CHECK-NEXT: # fallthrough-return
store <8 x i16> %v, <8 x i16>* %p, align 1
ret void
}
define void @store_v8i16_a4(<8 x i16> *%p, <8 x i16> %v) {
; CHECK-LABEL: store_v8i16_a4:
; CHECK: .functype store_v8i16_a4 (i32, v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: v128.store 0:p2align=2
; CHECK-NEXT: # fallthrough-return
store <8 x i16> %v, <8 x i16>* %p, align 4
ret void
}
; 16 is the default alignment for v128 so no attribute is needed.
define void @store_v8i16_a16(<8 x i16> *%p, <8 x i16> %v) {
; CHECK-LABEL: store_v8i16_a16:
; CHECK: .functype store_v8i16_a16 (i32, v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
store <8 x i16> %v, <8 x i16>* %p, align 16
ret void
}
; 32 is greater than the default alignment so it is ignored.
define void @store_v8i16_a32(<8 x i16> *%p, <8 x i16> %v) {
; CHECK-LABEL: store_v8i16_a32:
; CHECK: .functype store_v8i16_a32 (i32, v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
store <8 x i16> %v, <8 x i16>* %p, align 32
ret void
}
define <8 x i8> @load_ext_v8i16_a1(<8 x i8>* %p) {
; CHECK-LABEL: load_ext_v8i16_a1:
; CHECK: .functype load_ext_v8i16_a1 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load64_zero 0:p2align=0
; CHECK-NEXT: # fallthrough-return
%v = load <8 x i8>, <8 x i8>* %p, align 1
ret <8 x i8> %v
}
define <8 x i8> @load_ext_v8i16_a2(<8 x i8>* %p) {
; CHECK-LABEL: load_ext_v8i16_a2:
; CHECK: .functype load_ext_v8i16_a2 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load64_zero 0:p2align=1
; CHECK-NEXT: # fallthrough-return
%v = load <8 x i8>, <8 x i8>* %p, align 2
ret <8 x i8> %v
}
define <8 x i8> @load_ext_v8i16_a4(<8 x i8>* %p) {
; CHECK-LABEL: load_ext_v8i16_a4:
; CHECK: .functype load_ext_v8i16_a4 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load64_zero 0:p2align=2
; CHECK-NEXT: # fallthrough-return
%v = load <8 x i8>, <8 x i8>* %p, align 4
ret <8 x i8> %v
}
; 8 is the default alignment for v128 extending load so no attribute is needed.
define <8 x i8> @load_ext_v8i16_a8(<8 x i8>* %p) {
; CHECK-LABEL: load_ext_v8i16_a8:
; CHECK: .functype load_ext_v8i16_a8 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load64_zero 0
; CHECK-NEXT: # fallthrough-return
%v = load <8 x i8>, <8 x i8>* %p, align 8
ret <8 x i8> %v
}
; 16 is greater than the default alignment so it is ignored.
define <8 x i8> @load_ext_v8i16_a16(<8 x i8>* %p) {
; CHECK-LABEL: load_ext_v8i16_a16:
; CHECK: .functype load_ext_v8i16_a16 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 0
; CHECK-NEXT: # fallthrough-return
%v = load <8 x i8>, <8 x i8>* %p, align 16
ret <8 x i8> %v
}
define <8 x i16> @load_sext_v8i16_a1(<8 x i8>* %p) {
; CHECK-LABEL: load_sext_v8i16_a1:
; CHECK: .functype load_sext_v8i16_a1 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.load8x8_s 0:p2align=0
; CHECK-NEXT: # fallthrough-return
%v = load <8 x i8>, <8 x i8>* %p, align 1
%v2 = sext <8 x i8> %v to <8 x i16>
ret <8 x i16> %v2
}
define <8 x i16> @load_sext_v8i16_a2(<8 x i8>* %p) {
; CHECK-LABEL: load_sext_v8i16_a2:
; CHECK: .functype load_sext_v8i16_a2 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.load8x8_s 0:p2align=1
; CHECK-NEXT: # fallthrough-return
%v = load <8 x i8>, <8 x i8>* %p, align 2
%v2 = sext <8 x i8> %v to <8 x i16>
ret <8 x i16> %v2
}
define <8 x i16> @load_sext_v8i16_a4(<8 x i8>* %p) {
; CHECK-LABEL: load_sext_v8i16_a4:
; CHECK: .functype load_sext_v8i16_a4 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.load8x8_s 0:p2align=2
; CHECK-NEXT: # fallthrough-return
%v = load <8 x i8>, <8 x i8>* %p, align 4
%v2 = sext <8 x i8> %v to <8 x i16>
ret <8 x i16> %v2
}
; 8 is the default alignment for v128 extending load so no attribute is needed.
define <8 x i16> @load_sext_v8i16_a8(<8 x i8>* %p) {
; CHECK-LABEL: load_sext_v8i16_a8:
; CHECK: .functype load_sext_v8i16_a8 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.load8x8_s 0
; CHECK-NEXT: # fallthrough-return
%v = load <8 x i8>, <8 x i8>* %p, align 8
%v2 = sext <8 x i8> %v to <8 x i16>
ret <8 x i16> %v2
}
; 16 is greater than the default alignment so it is ignored.
define <8 x i16> @load_sext_v8i16_a16(<8 x i8>* %p) {
; CHECK-LABEL: load_sext_v8i16_a16:
; CHECK: .functype load_sext_v8i16_a16 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.load8x8_s 0
; CHECK-NEXT: # fallthrough-return
%v = load <8 x i8>, <8 x i8>* %p, align 16
%v2 = sext <8 x i8> %v to <8 x i16>
ret <8 x i16> %v2
}
define <8 x i16> @load_splat_v8i16_a1(i16* %p) {
; CHECK-LABEL: load_splat_v8i16_a1:
; CHECK: .functype load_splat_v8i16_a1 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load16_splat 0:p2align=0
; CHECK-NEXT: # fallthrough-return
%e = load i16, i16* %p, align 1
%v1 = insertelement <8 x i16> undef, i16 %e, i32 0
%v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
ret <8 x i16> %v2
}
; 2 is the default alignment for v128.load16_splat so no attribute is needed.
define <8 x i16> @load_splat_v8i16_a2(i16* %p) {
; CHECK-LABEL: load_splat_v8i16_a2:
; CHECK: .functype load_splat_v8i16_a2 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load16_splat 0
; CHECK-NEXT: # fallthrough-return
%e = load i16, i16* %p, align 2
%v1 = insertelement <8 x i16> undef, i16 %e, i32 0
%v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
ret <8 x i16> %v2
}
; 4 is greater than the default alignment so it is ignored.
define <8 x i16> @load_splat_v8i16_a4(i16* %p) {
; CHECK-LABEL: load_splat_v8i16_a4:
; CHECK: .functype load_splat_v8i16_a4 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load16_splat 0
; CHECK-NEXT: # fallthrough-return
%e = load i16, i16* %p, align 4
%v1 = insertelement <8 x i16> undef, i16 %e, i32 0
%v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
ret <8 x i16> %v2
}
define <8 x i16> @load_lane_i16_a1(i16* %p, <8 x i16> %v) {
; CHECK-LABEL: load_lane_i16_a1:
; CHECK: .functype load_lane_i16_a1 (i32, v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: v128.load16_lane 0:p2align=0, 0
; CHECK-NEXT: # fallthrough-return
%e = load i16, i16* %p, align 1
%v1 = insertelement <8 x i16> %v, i16 %e, i32 0
ret <8 x i16> %v1
}
; 2 is the default alignment for v128.load16_lane so no attribute is needed.
define <8 x i16> @load_lane_i16_a2(i16* %p, <8 x i16> %v) {
; CHECK-LABEL: load_lane_i16_a2:
; CHECK: .functype load_lane_i16_a2 (i32, v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: v128.load16_lane 0, 0
; CHECK-NEXT: # fallthrough-return
%e = load i16, i16* %p, align 2
%v1 = insertelement <8 x i16> %v, i16 %e, i32 0
ret <8 x i16> %v1
}
; 4 is greater than the default alignment so it is ignored.
define <8 x i16> @load_lane_i16_a4(i16* %p, <8 x i16> %v) {
; CHECK-LABEL: load_lane_i16_a4:
; CHECK: .functype load_lane_i16_a4 (i32, v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: v128.load16_lane 0, 0
; CHECK-NEXT: # fallthrough-return
%e = load i16, i16* %p, align 4
%v1 = insertelement <8 x i16> %v, i16 %e, i32 0
ret <8 x i16> %v1
}
define void @store_lane_i16_a1(<8 x i16> %v, i16* %p) {
; CHECK-LABEL: store_lane_i16_a1:
; CHECK: .functype store_lane_i16_a1 (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store16_lane 0:p2align=0, 0
; CHECK-NEXT: # fallthrough-return
%x = extractelement <8 x i16> %v, i32 0
store i16 %x, i16* %p, align 1
ret void
}
; 2 is the default alignment for v128.store16_lane so no attribute is needed.
define void @store_lane_i16_a2(<8 x i16> %v, i16* %p) {
; CHECK-LABEL: store_lane_i16_a2:
; CHECK: .functype store_lane_i16_a2 (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store16_lane 0, 0
; CHECK-NEXT: # fallthrough-return
%x = extractelement <8 x i16> %v, i32 0
store i16 %x, i16* %p, align 2
ret void
}
; 4 is greater than the default alignment so it is ignored.
define void @store_lane_i16_a4(<8 x i16> %v, i16* %p) {
; CHECK-LABEL: store_lane_i16_a4:
; CHECK: .functype store_lane_i16_a4 (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store16_lane 0, 0
; CHECK-NEXT: # fallthrough-return
%x = extractelement <8 x i16> %v, i32 0
store i16 %x, i16* %p, align 4
ret void
}
; ==============================================================================
; 4 x i32
; ==============================================================================
define <4 x i32> @load_v4i32_a1(<4 x i32> *%p) {
; CHECK-LABEL: load_v4i32_a1:
; CHECK: .functype load_v4i32_a1 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 0:p2align=0
; CHECK-NEXT: # fallthrough-return
%v = load <4 x i32>, <4 x i32>* %p, align 1
ret <4 x i32> %v
}
define <4 x i32> @load_v4i32_a4(<4 x i32> *%p) {
; CHECK-LABEL: load_v4i32_a4:
; CHECK: .functype load_v4i32_a4 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 0:p2align=2
; CHECK-NEXT: # fallthrough-return
%v = load <4 x i32>, <4 x i32>* %p, align 4
ret <4 x i32> %v
}
; 4 is the default alignment for v128 so no attribute is needed.
define <4 x i32> @load_v4i32_a16(<4 x i32> *%p) {
; CHECK-LABEL: load_v4i32_a16:
; CHECK: .functype load_v4i32_a16 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 0
; CHECK-NEXT: # fallthrough-return
%v = load <4 x i32>, <4 x i32>* %p, align 16
ret <4 x i32> %v
}
; 32 is greater than the default alignment so it is ignored.
define <4 x i32> @load_v4i32_a32(<4 x i32> *%p) {
; CHECK-LABEL: load_v4i32_a32:
; CHECK: .functype load_v4i32_a32 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 0
; CHECK-NEXT: # fallthrough-return
%v = load <4 x i32>, <4 x i32>* %p, align 32
ret <4 x i32> %v
}
define void @store_v4i32_a1(<4 x i32> *%p, <4 x i32> %v) {
; CHECK-LABEL: store_v4i32_a1:
; CHECK: .functype store_v4i32_a1 (i32, v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: v128.store 0:p2align=0
; CHECK-NEXT: # fallthrough-return
store <4 x i32> %v, <4 x i32>* %p, align 1
ret void
}
define void @store_v4i32_a4(<4 x i32> *%p, <4 x i32> %v) {
; CHECK-LABEL: store_v4i32_a4:
; CHECK: .functype store_v4i32_a4 (i32, v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: v128.store 0:p2align=2
; CHECK-NEXT: # fallthrough-return
store <4 x i32> %v, <4 x i32>* %p, align 4
ret void
}
; 16 is the default alignment for v128 so no attribute is needed.
define void @store_v4i32_a16(<4 x i32> *%p, <4 x i32> %v) {
; CHECK-LABEL: store_v4i32_a16:
; CHECK: .functype store_v4i32_a16 (i32, v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
store <4 x i32> %v, <4 x i32>* %p, align 16
ret void
}
; 32 is greater than the default alignment so it is ignored.
define void @store_v4i32_a32(<4 x i32> *%p, <4 x i32> %v) {
; CHECK-LABEL: store_v4i32_a32:
; CHECK: .functype store_v4i32_a32 (i32, v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
store <4 x i32> %v, <4 x i32>* %p, align 32
ret void
}
define <4 x i16> @load_ext_v4i32_a1(<4 x i16>* %p) {
; CHECK-LABEL: load_ext_v4i32_a1:
; CHECK: .functype load_ext_v4i32_a1 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load64_zero 0:p2align=0
; CHECK-NEXT: # fallthrough-return
%v = load <4 x i16>, <4 x i16>* %p, align 1
ret <4 x i16> %v
}
define <4 x i16> @load_ext_v4i32_a2(<4 x i16>* %p) {
; CHECK-LABEL: load_ext_v4i32_a2:
; CHECK: .functype load_ext_v4i32_a2 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load64_zero 0:p2align=1
; CHECK-NEXT: # fallthrough-return
%v = load <4 x i16>, <4 x i16>* %p, align 2
ret <4 x i16> %v
}
define <4 x i16> @load_ext_v4i32_a4(<4 x i16>* %p) {
; CHECK-LABEL: load_ext_v4i32_a4:
; CHECK: .functype load_ext_v4i32_a4 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load64_zero 0:p2align=2
; CHECK-NEXT: # fallthrough-return
%v = load <4 x i16>, <4 x i16>* %p, align 4
ret <4 x i16> %v
}
; 8 is the default alignment for v128 extending load so no attribute is needed.
define <4 x i16> @load_ext_v4i32_a8(<4 x i16>* %p) {
; CHECK-LABEL: load_ext_v4i32_a8:
; CHECK: .functype load_ext_v4i32_a8 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load64_zero 0
; CHECK-NEXT: # fallthrough-return
%v = load <4 x i16>, <4 x i16>* %p, align 8
ret <4 x i16> %v
}
; 16 is greater than the default alignment so it is ignored.
define <4 x i16> @load_ext_v4i32_a16(<4 x i16>* %p) {
; CHECK-LABEL: load_ext_v4i32_a16:
; CHECK: .functype load_ext_v4i32_a16 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 0
; CHECK-NEXT: # fallthrough-return
%v = load <4 x i16>, <4 x i16>* %p, align 16
ret <4 x i16> %v
}
define <4 x i32> @load_sext_v4i32_a1(<4 x i16>* %p) {
; CHECK-LABEL: load_sext_v4i32_a1:
; CHECK: .functype load_sext_v4i32_a1 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32x4.load16x4_s 0:p2align=0
; CHECK-NEXT: # fallthrough-return
%v = load <4 x i16>, <4 x i16>* %p, align 1
%v2 = sext <4 x i16> %v to <4 x i32>
ret <4 x i32> %v2
}
define <4 x i32> @load_sext_v4i32_a2(<4 x i16>* %p) {
; CHECK-LABEL: load_sext_v4i32_a2:
; CHECK: .functype load_sext_v4i32_a2 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32x4.load16x4_s 0:p2align=1
; CHECK-NEXT: # fallthrough-return
%v = load <4 x i16>, <4 x i16>* %p, align 2
%v2 = sext <4 x i16> %v to <4 x i32>
ret <4 x i32> %v2
}
define <4 x i32> @load_sext_v4i32_a4(<4 x i16>* %p) {
; CHECK-LABEL: load_sext_v4i32_a4:
; CHECK: .functype load_sext_v4i32_a4 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32x4.load16x4_s 0:p2align=2
; CHECK-NEXT: # fallthrough-return
%v = load <4 x i16>, <4 x i16>* %p, align 4
%v2 = sext <4 x i16> %v to <4 x i32>
ret <4 x i32> %v2
}
; 8 is the default alignment for v128 extending load so no attribute is needed.
define <4 x i32> @load_sext_v4i32_a8(<4 x i16>* %p) {
; CHECK-LABEL: load_sext_v4i32_a8:
; CHECK: .functype load_sext_v4i32_a8 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32x4.load16x4_s 0
; CHECK-NEXT: # fallthrough-return
%v = load <4 x i16>, <4 x i16>* %p, align 8
%v2 = sext <4 x i16> %v to <4 x i32>
ret <4 x i32> %v2
}
; 16 is greater than the default alignment so it is ignored.
define <4 x i32> @load_sext_v4i32_a16(<4 x i16>* %p) {
; CHECK-LABEL: load_sext_v4i32_a16:
; CHECK: .functype load_sext_v4i32_a16 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32x4.load16x4_s 0
; CHECK-NEXT: # fallthrough-return
%v = load <4 x i16>, <4 x i16>* %p, align 16
%v2 = sext <4 x i16> %v to <4 x i32>
ret <4 x i32> %v2
}
define <4 x i32> @load_splat_v4i32_a1(i32* %addr) {
; CHECK-LABEL: load_splat_v4i32_a1:
; CHECK: .functype load_splat_v4i32_a1 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load32_splat 0:p2align=0
; CHECK-NEXT: # fallthrough-return
%e = load i32, i32* %addr, align 1
%v1 = insertelement <4 x i32> undef, i32 %e, i32 0
%v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
ret <4 x i32> %v2
}
define <4 x i32> @load_splat_v4i32_a2(i32* %addr) {
; CHECK-LABEL: load_splat_v4i32_a2:
; CHECK: .functype load_splat_v4i32_a2 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load32_splat 0:p2align=1
; CHECK-NEXT: # fallthrough-return
%e = load i32, i32* %addr, align 2
%v1 = insertelement <4 x i32> undef, i32 %e, i32 0
%v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
ret <4 x i32> %v2
}
; 4 is the default alignment for v128.load32_splat so no attribute is needed.
define <4 x i32> @load_splat_v4i32_a4(i32* %addr) {
; CHECK-LABEL: load_splat_v4i32_a4:
; CHECK: .functype load_splat_v4i32_a4 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load32_splat 0
; CHECK-NEXT: # fallthrough-return
%e = load i32, i32* %addr, align 4
%v1 = insertelement <4 x i32> undef, i32 %e, i32 0
%v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
ret <4 x i32> %v2
}
; 8 is greater than the default alignment so it is ignored.
define <4 x i32> @load_splat_v4i32_a8(i32* %addr) {
; CHECK-LABEL: load_splat_v4i32_a8:
; CHECK: .functype load_splat_v4i32_a8 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load32_splat 0
; CHECK-NEXT: # fallthrough-return
%e = load i32, i32* %addr, align 8
%v1 = insertelement <4 x i32> undef, i32 %e, i32 0
%v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
ret <4 x i32> %v2
}
define <4 x i32> @load_lane_i32_a1(i32* %p, <4 x i32> %v) {
; CHECK-LABEL: load_lane_i32_a1:
; CHECK: .functype load_lane_i32_a1 (i32, v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: v128.load32_lane 0:p2align=0, 0
; CHECK-NEXT: # fallthrough-return
%e = load i32, i32* %p, align 1
%v1 = insertelement <4 x i32> %v, i32 %e, i32 0
ret <4 x i32> %v1
}
define <4 x i32> @load_lane_i32_a2(i32* %p, <4 x i32> %v) {
; CHECK-LABEL: load_lane_i32_a2:
; CHECK: .functype load_lane_i32_a2 (i32, v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: v128.load32_lane 0:p2align=1, 0
; CHECK-NEXT: # fallthrough-return
%e = load i32, i32* %p, align 2
%v1 = insertelement <4 x i32> %v, i32 %e, i32 0
ret <4 x i32> %v1
}
; 4 is the default alignment for v128.load32_lane so no attribute is needed.
define <4 x i32> @load_lane_i32_a4(i32* %p, <4 x i32> %v) {
; CHECK-LABEL: load_lane_i32_a4:
; CHECK: .functype load_lane_i32_a4 (i32, v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: v128.load32_lane 0, 0
; CHECK-NEXT: # fallthrough-return
%e = load i32, i32* %p, align 4
%v1 = insertelement <4 x i32> %v, i32 %e, i32 0
ret <4 x i32> %v1
}
; 8 is greater than the default alignment so it is ignored.
define <4 x i32> @load_lane_i32_a8(i32* %p, <4 x i32> %v) {
; CHECK-LABEL: load_lane_i32_a8:
; CHECK: .functype load_lane_i32_a8 (i32, v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: v128.load32_lane 0, 0
; CHECK-NEXT: # fallthrough-return
%e = load i32, i32* %p, align 8
%v1 = insertelement <4 x i32> %v, i32 %e, i32 0
ret <4 x i32> %v1
}
define void @store_lane_i32_a1(<4 x i32> %v, i32* %p) {
; CHECK-LABEL: store_lane_i32_a1:
; CHECK: .functype store_lane_i32_a1 (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store32_lane 0:p2align=0, 0
; CHECK-NEXT: # fallthrough-return
%x = extractelement <4 x i32> %v, i32 0
store i32 %x, i32* %p, align 1
ret void
}
define void @store_lane_i32_a2(<4 x i32> %v, i32* %p) {
; CHECK-LABEL: store_lane_i32_a2:
; CHECK: .functype store_lane_i32_a2 (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store32_lane 0:p2align=1, 0
; CHECK-NEXT: # fallthrough-return
%x = extractelement <4 x i32> %v, i32 0
store i32 %x, i32* %p, align 2
ret void
}
; 4 is the default alignment for v128.store32_lane so no attribute is needed.
define void @store_lane_i32_a4(<4 x i32> %v, i32* %p) {
; CHECK-LABEL: store_lane_i32_a4:
; CHECK: .functype store_lane_i32_a4 (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store32_lane 0, 0
; CHECK-NEXT: # fallthrough-return
%x = extractelement <4 x i32> %v, i32 0
store i32 %x, i32* %p, align 4
ret void
}
; 8 is greater than the default alignment so it is ignored.
define void @store_lane_i32_a8(<4 x i32> %v, i32* %p) {
; CHECK-LABEL: store_lane_i32_a8:
; CHECK: .functype store_lane_i32_a8 (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store32_lane 0, 0
; CHECK-NEXT: # fallthrough-return
%x = extractelement <4 x i32> %v, i32 0
store i32 %x, i32* %p, align 8
ret void
}
define <4 x i32> @load_zero_i32_a1(i32* %p) {
; CHECK-LABEL: load_zero_i32_a1:
; CHECK: .functype load_zero_i32_a1 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load32_zero 0:p2align=0
; CHECK-NEXT: # fallthrough-return
%x = load i32, i32* %p, align 1
%v = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0
ret <4 x i32> %v
}
define <4 x i32> @load_zero_i32_a2(i32* %p) {
; CHECK-LABEL: load_zero_i32_a2:
; CHECK: .functype load_zero_i32_a2 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load32_zero 0:p2align=1
; CHECK-NEXT: # fallthrough-return
%x = load i32, i32* %p, align 2
%v = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0
ret <4 x i32> %v
}
; 4 is the default alignment for v128.load32_zero so no attribute is needed.
define <4 x i32> @load_zero_i32_a4(i32* %p) {
; CHECK-LABEL: load_zero_i32_a4:
; CHECK: .functype load_zero_i32_a4 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load32_zero 0
; CHECK-NEXT: # fallthrough-return
%x = load i32, i32* %p, align 4
%v = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0
ret <4 x i32> %v
}
; 8 is greater than the default alignment so it is ignored.
define <4 x i32> @load_zero_i32_a8(i32* %p) {
; CHECK-LABEL: load_zero_i32_a8:
; CHECK: .functype load_zero_i32_a8 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load32_zero 0
; CHECK-NEXT: # fallthrough-return
%x = load i32, i32* %p, align 8
%v = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0
ret <4 x i32> %v
}
; ==============================================================================
; 2 x i64
; ==============================================================================
define <2 x i64> @load_v2i64_a1(<2 x i64> *%p) {
; CHECK-LABEL: load_v2i64_a1:
; CHECK: .functype load_v2i64_a1 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 0:p2align=0
; CHECK-NEXT: # fallthrough-return
%v = load <2 x i64>, <2 x i64>* %p, align 1
ret <2 x i64> %v
}
define <2 x i64> @load_v2i64_a4(<2 x i64> *%p) {
; CHECK-LABEL: load_v2i64_a4:
; CHECK: .functype load_v2i64_a4 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 0:p2align=2
; CHECK-NEXT: # fallthrough-return
%v = load <2 x i64>, <2 x i64>* %p, align 4
ret <2 x i64> %v
}
; 2 is the default alignment for v128 so no attribute is needed.
define <2 x i64> @load_v2i64_a16(<2 x i64> *%p) {
; CHECK-LABEL: load_v2i64_a16:
; CHECK: .functype load_v2i64_a16 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 0
; CHECK-NEXT: # fallthrough-return
%v = load <2 x i64>, <2 x i64>* %p, align 16
ret <2 x i64> %v
}
; 32 is greater than the default alignment so it is ignored.
define <2 x i64> @load_v2i64_a32(<2 x i64> *%p) {
; CHECK-LABEL: load_v2i64_a32:
; CHECK: .functype load_v2i64_a32 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 0
; CHECK-NEXT: # fallthrough-return
%v = load <2 x i64>, <2 x i64>* %p, align 32
ret <2 x i64> %v
}
define void @store_v2i64_a1(<2 x i64> *%p, <2 x i64> %v) {
; CHECK-LABEL: store_v2i64_a1:
; CHECK: .functype store_v2i64_a1 (i32, v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: v128.store 0:p2align=0
; CHECK-NEXT: # fallthrough-return
store <2 x i64> %v, <2 x i64>* %p, align 1
ret void
}
define void @store_v2i64_a4(<2 x i64> *%p, <2 x i64> %v) {
; CHECK-LABEL: store_v2i64_a4:
; CHECK: .functype store_v2i64_a4 (i32, v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: v128.store 0:p2align=2
; CHECK-NEXT: # fallthrough-return
store <2 x i64> %v, <2 x i64>* %p, align 4
ret void
}
; 16 is the default alignment for v128 so no attribute is needed.
define void @store_v2i64_a16(<2 x i64> *%p, <2 x i64> %v) {
; CHECK-LABEL: store_v2i64_a16:
; CHECK: .functype store_v2i64_a16 (i32, v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
store <2 x i64> %v, <2 x i64>* %p, align 16
ret void
}
; 32 is greater than the default alignment so it is ignored.
define void @store_v2i64_a32(<2 x i64> *%p, <2 x i64> %v) {
; CHECK-LABEL: store_v2i64_a32:
; CHECK: .functype store_v2i64_a32 (i32, v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
store <2 x i64> %v, <2 x i64>* %p, align 32
ret void
}
define <2 x i64> @load_splat_v2i64_a1(i64* %p) {
; CHECK-LABEL: load_splat_v2i64_a1:
; CHECK: .functype load_splat_v2i64_a1 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load64_splat 0:p2align=0
; CHECK-NEXT: # fallthrough-return
%e = load i64, i64* %p, align 1
%v1 = insertelement <2 x i64> undef, i64 %e, i32 0
%v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
ret <2 x i64> %v2
}
define <2 x i64> @load_splat_v2i64_a2(i64* %p) {
; CHECK-LABEL: load_splat_v2i64_a2:
; CHECK: .functype load_splat_v2i64_a2 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load64_splat 0:p2align=1
; CHECK-NEXT: # fallthrough-return
%e = load i64, i64* %p, align 2
%v1 = insertelement <2 x i64> undef, i64 %e, i32 0
%v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
ret <2 x i64> %v2
}
define <2 x i64> @load_splat_v2i64_a4(i64* %p) {
; CHECK-LABEL: load_splat_v2i64_a4:
; CHECK: .functype load_splat_v2i64_a4 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load64_splat 0:p2align=2
; CHECK-NEXT: # fallthrough-return
%e = load i64, i64* %p, align 4
%v1 = insertelement <2 x i64> undef, i64 %e, i32 0
%v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
ret <2 x i64> %v2
}
; 8 is the default alignment for v128.load64_splat so no attribute is needed.
define <2 x i64> @load_splat_v2i64_a8(i64* %p) {
; CHECK-LABEL: load_splat_v2i64_a8:
; CHECK: .functype load_splat_v2i64_a8 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load64_splat 0
; CHECK-NEXT: # fallthrough-return
%e = load i64, i64* %p, align 8
%v1 = insertelement <2 x i64> undef, i64 %e, i32 0
%v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
ret <2 x i64> %v2
}
; 16 is greater than the default alignment so it is ignored.
define <2 x i64> @load_splat_v2i64_a16(i64* %p) {
; CHECK-LABEL: load_splat_v2i64_a16:
; CHECK: .functype load_splat_v2i64_a16 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load64_splat 0
; CHECK-NEXT: # fallthrough-return
%e = load i64, i64* %p, align 16
%v1 = insertelement <2 x i64> undef, i64 %e, i32 0
%v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
ret <2 x i64> %v2
}
define <2 x i64> @load_lane_i64_a1(i64* %p, <2 x i64> %v) {
; CHECK-LABEL: load_lane_i64_a1:
; CHECK: .functype load_lane_i64_a1 (i32, v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: v128.load64_lane 0:p2align=0, 0
; CHECK-NEXT: # fallthrough-return
%e = load i64, i64* %p, align 1
%v1 = insertelement <2 x i64> %v, i64 %e, i32 0
ret <2 x i64> %v1
}
define <2 x i64> @load_lane_i64_a2(i64* %p, <2 x i64> %v) {
; CHECK-LABEL: load_lane_i64_a2:
; CHECK: .functype load_lane_i64_a2 (i32, v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: v128.load64_lane 0:p2align=1, 0
; CHECK-NEXT: # fallthrough-return
%e = load i64, i64* %p, align 2
%v1 = insertelement <2 x i64> %v, i64 %e, i32 0
ret <2 x i64> %v1
}
define <2 x i64> @load_lane_i64_a4(i64* %p, <2 x i64> %v) {
; CHECK-LABEL: load_lane_i64_a4:
; CHECK: .functype load_lane_i64_a4 (i32, v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: v128.load64_lane 0:p2align=2, 0
; CHECK-NEXT: # fallthrough-return
%e = load i64, i64* %p, align 4
%v1 = insertelement <2 x i64> %v, i64 %e, i32 0
ret <2 x i64> %v1
}
; 8 is the default alignment for v128.load64_lane so no attribute is needed.
define <2 x i64> @load_lane_i64_a8(i64* %p, <2 x i64> %v) {
; CHECK-LABEL: load_lane_i64_a8:
; CHECK: .functype load_lane_i64_a8 (i32, v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: v128.load64_lane 0, 0
; CHECK-NEXT: # fallthrough-return
%e = load i64, i64* %p, align 8
%v1 = insertelement <2 x i64> %v, i64 %e, i32 0
ret <2 x i64> %v1
}
; 16 is greater than the default alignment so it is ignored.
define <2 x i64> @load_lane_i64_a16(i64* %p, <2 x i64> %v) {
; CHECK-LABEL: load_lane_i64_a16:
; CHECK: .functype load_lane_i64_a16 (i32, v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: v128.load64_lane 0, 0
; CHECK-NEXT: # fallthrough-return
%e = load i64, i64* %p, align 16
%v1 = insertelement <2 x i64> %v, i64 %e, i32 0
ret <2 x i64> %v1
}
define void @store_lane_i64_a1(<2 x i64> %v, i64* %p) {
; CHECK-LABEL: store_lane_i64_a1:
; CHECK: .functype store_lane_i64_a1 (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store64_lane 0:p2align=0, 0
; CHECK-NEXT: # fallthrough-return
%x = extractelement <2 x i64> %v, i32 0
store i64 %x, i64* %p, align 1
ret void
}
define void @store_lane_i64_a2(<2 x i64> %v, i64* %p) {
; CHECK-LABEL: store_lane_i64_a2:
; CHECK: .functype store_lane_i64_a2 (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store64_lane 0:p2align=1, 0
; CHECK-NEXT: # fallthrough-return
%x = extractelement <2 x i64> %v, i32 0
store i64 %x, i64* %p, align 2
ret void
}
define void @store_lane_i64_a4(<2 x i64> %v, i64* %p) {
; CHECK-LABEL: store_lane_i64_a4:
; CHECK: .functype store_lane_i64_a4 (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store64_lane 0:p2align=2, 0
; CHECK-NEXT: # fallthrough-return
%x = extractelement <2 x i64> %v, i32 0
store i64 %x, i64* %p, align 4
ret void
}
; 8 is the default alignment for v128.store64_lane so no attribute is needed.
define void @store_lane_i64_a8(<2 x i64> %v, i64* %p) {
; CHECK-LABEL: store_lane_i64_a8:
; CHECK: .functype store_lane_i64_a8 (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store64_lane 0, 0
; CHECK-NEXT: # fallthrough-return
%x = extractelement <2 x i64> %v, i32 0
store i64 %x, i64* %p, align 8
ret void
}
; 16 is greater than the default alignment so it is ignored.
define void @store_lane_i64_a16(<2 x i64> %v, i64* %p) {
; CHECK-LABEL: store_lane_i64_a16:
; CHECK: .functype store_lane_i64_a16 (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.store64_lane 0, 0
; CHECK-NEXT: # fallthrough-return
%x = extractelement <2 x i64> %v, i32 0
store i64 %x, i64* %p, align 16
ret void
}
define <2 x i64> @load_zero_i64_a1(i64* %p) {
; CHECK-LABEL: load_zero_i64_a1:
; CHECK: .functype load_zero_i64_a1 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load64_zero 0:p2align=0
; CHECK-NEXT: # fallthrough-return
%x = load i64, i64* %p, align 1
%v = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
ret <2 x i64> %v
}
define <2 x i64> @load_zero_i64_a2(i64* %p) {
; CHECK-LABEL: load_zero_i64_a2:
; CHECK: .functype load_zero_i64_a2 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load64_zero 0:p2align=1
; CHECK-NEXT: # fallthrough-return
%x = load i64, i64* %p, align 2
%v = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
ret <2 x i64> %v
}
define <2 x i64> @load_zero_i64_a4(i64* %p) {
; CHECK-LABEL: load_zero_i64_a4:
; CHECK: .functype load_zero_i64_a4 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load64_zero 0:p2align=2
; CHECK-NEXT: # fallthrough-return
%x = load i64, i64* %p, align 4
%v = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
ret <2 x i64> %v
}
; 8 is the default alignment for v128.load64_zero so no attribute is needed.
define <2 x i64> @load_zero_i64_a8(i64* %p) {
; CHECK-LABEL: load_zero_i64_a8:
; CHECK: .functype load_zero_i64_a8 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load64_zero 0
; CHECK-NEXT: # fallthrough-return
%x = load i64, i64* %p, align 8
%v = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
ret <2 x i64> %v
}
; 16 is greater than the default alignment so it is ignored.
define <2 x i64> @load_zero_i64_a16(i64* %p) {
; CHECK-LABEL: load_zero_i64_a16:
; CHECK: .functype load_zero_i64_a16 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load64_zero 0
; CHECK-NEXT: # fallthrough-return
%x = load i64, i64* %p, align 16
%v = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
ret <2 x i64> %v
}
; ==============================================================================
; 4 x float
; ==============================================================================
define <4 x float> @load_v4f32_a1(<4 x float> *%p) {
; CHECK-LABEL: load_v4f32_a1:
; CHECK: .functype load_v4f32_a1 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 0:p2align=0
; CHECK-NEXT: # fallthrough-return
%v = load <4 x float>, <4 x float>* %p, align 1
ret <4 x float> %v
}
define <4 x float> @load_v4f32_a4(<4 x float> *%p) {
; CHECK-LABEL: load_v4f32_a4:
; CHECK: .functype load_v4f32_a4 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 0:p2align=2
; CHECK-NEXT: # fallthrough-return
%v = load <4 x float>, <4 x float>* %p, align 4
ret <4 x float> %v
}
; 4 is the default alignment for v128 so no attribute is needed.
define <4 x float> @load_v4f32_a16(<4 x float> *%p) {
; CHECK-LABEL: load_v4f32_a16:
; CHECK: .functype load_v4f32_a16 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 0
; CHECK-NEXT: # fallthrough-return
%v = load <4 x float>, <4 x float>* %p, align 16
ret <4 x float> %v
}
; 32 is greater than the default alignment so it is ignored.
define <4 x float> @load_v4f32_a32(<4 x float> *%p) {
; CHECK-LABEL: load_v4f32_a32:
; CHECK: .functype load_v4f32_a32 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 0
; CHECK-NEXT: # fallthrough-return
%v = load <4 x float>, <4 x float>* %p, align 32
ret <4 x float> %v
}
define void @store_v4f32_a1(<4 x float> *%p, <4 x float> %v) {
; CHECK-LABEL: store_v4f32_a1:
; CHECK: .functype store_v4f32_a1 (i32, v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: v128.store 0:p2align=0
; CHECK-NEXT: # fallthrough-return
store <4 x float> %v, <4 x float>* %p, align 1
ret void
}
define void @store_v4f32_a4(<4 x float> *%p, <4 x float> %v) {
; CHECK-LABEL: store_v4f32_a4:
; CHECK: .functype store_v4f32_a4 (i32, v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: v128.store 0:p2align=2
; CHECK-NEXT: # fallthrough-return
store <4 x float> %v, <4 x float>* %p, align 4
ret void
}
; 16 is the default alignment for v128 so no attribute is needed.
define void @store_v4f32_a16(<4 x float> *%p, <4 x float> %v) {
; CHECK-LABEL: store_v4f32_a16:
; CHECK: .functype store_v4f32_a16 (i32, v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
store <4 x float> %v, <4 x float>* %p, align 16
ret void
}
; 32 is greater than the default alignment so it is ignored.
define void @store_v4f32_a32(<4 x float> *%p, <4 x float> %v) {
; CHECK-LABEL: store_v4f32_a32:
; CHECK: .functype store_v4f32_a32 (i32, v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
store <4 x float> %v, <4 x float>* %p, align 32
ret void
}
; ==============================================================================
; 2 x double
; ==============================================================================
define <2 x double> @load_v2f64_a1(<2 x double> *%p) {
; CHECK-LABEL: load_v2f64_a1:
; CHECK: .functype load_v2f64_a1 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 0:p2align=0
; CHECK-NEXT: # fallthrough-return
%v = load <2 x double>, <2 x double>* %p, align 1
ret <2 x double> %v
}
define <2 x double> @load_v2f64_a4(<2 x double> *%p) {
; CHECK-LABEL: load_v2f64_a4:
; CHECK: .functype load_v2f64_a4 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 0:p2align=2
; CHECK-NEXT: # fallthrough-return
%v = load <2 x double>, <2 x double>* %p, align 4
ret <2 x double> %v
}
; 2 is the default alignment for v128 so no attribute is needed.
define <2 x double> @load_v2f64_a16(<2 x double> *%p) {
; CHECK-LABEL: load_v2f64_a16:
; CHECK: .functype load_v2f64_a16 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 0
; CHECK-NEXT: # fallthrough-return
%v = load <2 x double>, <2 x double>* %p, align 16
ret <2 x double> %v
}
; 32 is greater than the default alignment so it is ignored.
define <2 x double> @load_v2f64_a32(<2 x double> *%p) {
; CHECK-LABEL: load_v2f64_a32:
; CHECK: .functype load_v2f64_a32 (i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.load 0
; CHECK-NEXT: # fallthrough-return
%v = load <2 x double>, <2 x double>* %p, align 32
ret <2 x double> %v
}
define void @store_v2f64_a1(<2 x double> *%p, <2 x double> %v) {
; CHECK-LABEL: store_v2f64_a1:
; CHECK: .functype store_v2f64_a1 (i32, v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: v128.store 0:p2align=0
; CHECK-NEXT: # fallthrough-return
store <2 x double> %v, <2 x double>* %p, align 1
ret void
}
define void @store_v2f64_a4(<2 x double> *%p, <2 x double> %v) {
; CHECK-LABEL: store_v2f64_a4:
; CHECK: .functype store_v2f64_a4 (i32, v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: v128.store 0:p2align=2
; CHECK-NEXT: # fallthrough-return
store <2 x double> %v, <2 x double>* %p, align 4
ret void
}
; 16 is the default alignment for v128 so no attribute is needed.
define void @store_v2f64_a16(<2 x double> *%p, <2 x double> %v) {
; CHECK-LABEL: store_v2f64_a16:
; CHECK: .functype store_v2f64_a16 (i32, v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
store <2 x double> %v, <2 x double>* %p, align 16
ret void
}
; 32 is greater than the default alignment so it is ignored.
define void @store_v2f64_a32(<2 x double> *%p, <2 x double> %v) {
; CHECK-LABEL: store_v2f64_a32:
; CHECK: .functype store_v2f64_a32 (i32, v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: local.get 1
; CHECK-NEXT: v128.store 0
; CHECK-NEXT: # fallthrough-return
store <2 x double> %v, <2 x double>* %p, align 32
ret void
}