This is mostly NFC but some output does change due to consistently inserting into poison rather than undef and using i64 as the index type for inserts.
128 lines
5.7 KiB
LLVM
128 lines
5.7 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -passes=memcpyopt -S -verify-memoryssa | FileCheck %s
|
|
|
|
; Check that a call featuring a scalable-vector byval argument fed by a memcpy
|
|
; doesn't crash the compiler. It previously assumed the byval type's size could
|
|
; be represented as a known constant amount.
|
|
define void @byval_caller(ptr %P) {
|
|
; CHECK-LABEL: @byval_caller(
|
|
; CHECK-NEXT: [[A:%.*]] = alloca i8, align 1
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[A]], ptr align 4 [[P:%.*]], i64 8, i1 false)
|
|
; CHECK-NEXT: call void @byval_callee(ptr byval(<vscale x 1 x i8>) align 1 [[A]])
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%a = alloca i8
|
|
call void @llvm.memcpy.p0.p0.i64(ptr align 4 %a, ptr align 4 %P, i64 8, i1 false)
|
|
call void @byval_callee(ptr align 1 byval(<vscale x 1 x i8>) %a)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.memcpy.p0.p0.i64(ptr align 4, ptr align 4, i64, i1)
|
|
declare void @byval_callee(ptr align 1 byval(<vscale x 1 x i8>))
|
|
|
|
; Check that two scalable-vector stores (overlapping, with a constant offset)
|
|
; do not crash the compiler when checked whether or not they can be merged into
|
|
; a single memset. There was previously an assumption that the stored values'
|
|
; sizes could be represented by a known constant amount.
|
|
define void @merge_stores_both_scalable(ptr %ptr) {
|
|
; CHECK-LABEL: @merge_stores_both_scalable(
|
|
; CHECK-NEXT: store <vscale x 1 x i8> zeroinitializer, ptr [[PTR:%.*]], align 1
|
|
; CHECK-NEXT: [[PTR_NEXT:%.*]] = getelementptr i8, ptr [[PTR]], i64 1
|
|
; CHECK-NEXT: store <vscale x 1 x i8> zeroinitializer, ptr [[PTR_NEXT]], align 1
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
store <vscale x 1 x i8> zeroinitializer, ptr %ptr
|
|
%ptr.next = getelementptr i8, ptr %ptr, i64 1
|
|
store <vscale x 1 x i8> zeroinitializer, ptr %ptr.next
|
|
ret void
|
|
}
|
|
|
|
; As above, but where the base is scalable but the subsequent store(s) are not.
|
|
define void @merge_stores_first_scalable(ptr %ptr) {
|
|
; CHECK-LABEL: @merge_stores_first_scalable(
|
|
; CHECK-NEXT: store <vscale x 1 x i8> zeroinitializer, ptr [[PTR:%.*]], align 1
|
|
; CHECK-NEXT: [[PTR_NEXT:%.*]] = getelementptr i8, ptr [[PTR]], i64 1
|
|
; CHECK-NEXT: store i8 0, ptr [[PTR_NEXT]], align 1
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
store <vscale x 1 x i8> zeroinitializer, ptr %ptr
|
|
%ptr.next = getelementptr i8, ptr %ptr, i64 1
|
|
store i8 zeroinitializer, ptr %ptr.next
|
|
ret void
|
|
}
|
|
|
|
; As above, but where the base is not scalable but the subsequent store(s) are.
|
|
define void @merge_stores_second_scalable(ptr %ptr) {
|
|
; CHECK-LABEL: @merge_stores_second_scalable(
|
|
; CHECK-NEXT: store i8 0, ptr [[PTR:%.*]], align 1
|
|
; CHECK-NEXT: [[PTR_NEXT:%.*]] = getelementptr i8, ptr [[PTR]], i64 1
|
|
; CHECK-NEXT: store <vscale x 1 x i8> zeroinitializer, ptr [[PTR_NEXT]], align 1
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
store i8 zeroinitializer, ptr %ptr
|
|
%ptr.next = getelementptr i8, ptr %ptr, i64 1
|
|
store <vscale x 1 x i8> zeroinitializer, ptr %ptr.next
|
|
ret void
|
|
}
|
|
|
|
; Check that the call-slot optimization doesn't crash when encountering scalable types.
|
|
define void @callslotoptzn(<vscale x 4 x float> %val, ptr %out) {
|
|
; CHECK-LABEL: @callslotoptzn(
|
|
; CHECK-NEXT: [[ALLOC:%.*]] = alloca <vscale x 4 x float>, align 16
|
|
; CHECK-NEXT: [[IDX:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32()
|
|
; CHECK-NEXT: [[STRIDE:%.*]] = getelementptr inbounds float, ptr [[ALLOC]], <vscale x 4 x i32> [[IDX]]
|
|
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> [[VAL:%.*]], <vscale x 4 x ptr> [[STRIDE]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
|
|
; CHECK-NEXT: [[LI:%.*]] = load <vscale x 4 x float>, ptr [[ALLOC]], align 4
|
|
; CHECK-NEXT: store <vscale x 4 x float> [[LI]], ptr [[OUT:%.*]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%alloc = alloca <vscale x 4 x float>, align 16
|
|
%idx = tail call <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32()
|
|
%stride = getelementptr inbounds float, ptr %alloc, <vscale x 4 x i32> %idx
|
|
call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> %val, <vscale x 4 x ptr> %stride, i32 4, <vscale x 4 x i1> splat (i1 true))
|
|
%li = load <vscale x 4 x float>, ptr %alloc, align 4
|
|
store <vscale x 4 x float> %li, ptr %out, align 4
|
|
ret void
|
|
}
|
|
|
|
%0 = type { <vscale x 8 x i8> }
|
|
%1 = type { <vscale x 8 x i8>, <vscale x 8 x i8> }
|
|
|
|
define void @memmove_vector(ptr %a, ptr %b) {
|
|
; CHECK-LABEL: @memmove_vector(
|
|
; CHECK-NEXT: [[V:%.*]] = load <vscale x 8 x i8>, ptr [[A:%.*]], align 1
|
|
; CHECK-NEXT: store <vscale x 8 x i8> [[V]], ptr [[B:%.*]], align 1
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%v = load <vscale x 8 x i8>, ptr %a, align 1
|
|
store <vscale x 8 x i8> %v, ptr %b, align 1
|
|
ret void
|
|
}
|
|
|
|
define void @memmove_agg1(ptr %a, ptr %b) {
|
|
; CHECK-LABEL: @memmove_agg1(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 8
|
|
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[B:%.*]], ptr align 1 [[A:%.*]], i64 [[TMP2]], i1 false)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%v = load %0, ptr %a, align 1
|
|
store %0 %v, ptr %b, align 1
|
|
ret void
|
|
}
|
|
|
|
define void @memmove_agg2(ptr %a, ptr %b) {
|
|
; CHECK-LABEL: @memmove_agg2(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 16
|
|
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[B:%.*]], ptr align 1 [[A:%.*]], i64 [[TMP2]], i1 false)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%v = load %1, ptr %a, align 1
|
|
store %1 %v, ptr %b, align 1
|
|
ret void
|
|
}
|
|
|
|
declare <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32()
|
|
declare void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> , <vscale x 4 x ptr> , i32, <vscale x 4 x i1>)
|