[AArch64] Mark neon.stN intrinsics as writeonly (#145289)

I found this peculiar comment in EarlyCSE:

1c78d8d9d7/llvm/lib/Transforms/Scalar/EarlyCSE.cpp (L1620-L1624)

Looking back over history, this seems to be referring to the
aarch64.neon.stN intrinsics, which are indeed not marked writeonly
(though the ldN intrinsics are readonly).

Possibly I'm missing something special about these intrinsics, but I
think it is safe to mark them as writeonly.
This commit is contained in:
Nikita Popov
2025-07-01 15:56:02 +02:00
committed by GitHub
parent 1fe993c251
commit a6592ddf4e
5 changed files with 30 additions and 19 deletions

View File

@@ -612,7 +612,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
[IntrReadMem, IntrArgMemOnly]>;
class AdvSIMD_1Vec_Store_Lane_Intrinsic
: DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, llvm_i64_ty, llvm_anyptr_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<2>>]>;
[IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<2>>]>;
class AdvSIMD_2Vec_Load_Intrinsic
: DefaultAttrsIntrinsic<[LLVMMatchType<0>, llvm_anyvector_ty],
@@ -626,11 +626,11 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
class AdvSIMD_2Vec_Store_Intrinsic
: DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
llvm_anyptr_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<2>>]>;
[IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<2>>]>;
class AdvSIMD_2Vec_Store_Lane_Intrinsic
: DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
llvm_i64_ty, llvm_anyptr_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<3>>]>;
[IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<3>>]>;
class AdvSIMD_3Vec_Load_Intrinsic
: DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty],
@@ -644,12 +644,12 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
class AdvSIMD_3Vec_Store_Intrinsic
: DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
LLVMMatchType<0>, llvm_anyptr_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<3>>]>;
[IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<3>>]>;
class AdvSIMD_3Vec_Store_Lane_Intrinsic
: DefaultAttrsIntrinsic<[], [llvm_anyvector_ty,
LLVMMatchType<0>, LLVMMatchType<0>,
llvm_i64_ty, llvm_anyptr_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<4>>]>;
[IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<4>>]>;
class AdvSIMD_4Vec_Load_Intrinsic
: DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>,
@@ -667,12 +667,12 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
: DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
LLVMMatchType<0>, LLVMMatchType<0>,
llvm_anyptr_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<4>>]>;
[IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<4>>]>;
class AdvSIMD_4Vec_Store_Lane_Intrinsic
: DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
LLVMMatchType<0>, LLVMMatchType<0>,
llvm_i64_ty, llvm_anyptr_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<5>>]>;
[IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<5>>]>;
}
// Memory ops

View File

@@ -742,32 +742,33 @@ def int_arm_neon_vld4dup : DefaultAttrsIntrinsic<
// Interleaving vector stores from N-element structures.
// Source operands are: the address, the N vectors, and the alignment.
def int_arm_neon_vst1 : DefaultAttrsIntrinsic<
[], [llvm_anyptr_ty, llvm_anyvector_ty, llvm_i32_ty], [IntrArgMemOnly]>;
[], [llvm_anyptr_ty, llvm_anyvector_ty, llvm_i32_ty],
[IntrWriteMem, IntrArgMemOnly]>;
def int_arm_neon_vst2 : DefaultAttrsIntrinsic<
[], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty],
[IntrArgMemOnly]>;
[IntrWriteMem, IntrArgMemOnly]>;
def int_arm_neon_vst3 : DefaultAttrsIntrinsic<
[],
[llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>,
llvm_i32_ty],
[IntrArgMemOnly]>;
[IntrWriteMem, IntrArgMemOnly]>;
def int_arm_neon_vst4 : DefaultAttrsIntrinsic<
[],
[llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>,
LLVMMatchType<1>, llvm_i32_ty],
[IntrArgMemOnly]>;
[IntrWriteMem, IntrArgMemOnly]>;
def int_arm_neon_vst1x2 : DefaultAttrsIntrinsic<
[], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>],
[IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
[IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
def int_arm_neon_vst1x3 : DefaultAttrsIntrinsic<
[], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>],
[IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
[IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
def int_arm_neon_vst1x4 : DefaultAttrsIntrinsic<
[],
[llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>,
LLVMMatchType<1>],
[IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
[IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
// Vector store N-element structure from one lane.
// Source operands are: the address, the N vectors, the lane number, and
@@ -776,17 +777,17 @@ def int_arm_neon_vst2lane : DefaultAttrsIntrinsic<
[],
[llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty,
llvm_i32_ty],
[IntrArgMemOnly]>;
[IntrWriteMem, IntrArgMemOnly]>;
def int_arm_neon_vst3lane : DefaultAttrsIntrinsic<
[],
[llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>,
llvm_i32_ty, llvm_i32_ty],
[IntrArgMemOnly]>;
[IntrWriteMem, IntrArgMemOnly]>;
def int_arm_neon_vst4lane : DefaultAttrsIntrinsic<
[],
[llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>,
LLVMMatchType<1>, llvm_i32_ty, llvm_i32_ty],
[IntrArgMemOnly]>;
[IntrWriteMem, IntrArgMemOnly]>;
// Vector bitwise select.
def int_arm_neon_vbsl : DefaultAttrsIntrinsic<

View File

@@ -24,7 +24,7 @@ entry:
; CHECK: Just Ref: Ptr: i8* %p <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr %p, i32 16)
; CHECK: NoModRef: Ptr: i8* %q <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr %p, i32 16)
; CHECK: NoModRef: Ptr: i8* %p <-> call void @llvm.arm.neon.vst1.p0.v8i16(ptr %q, <8 x i16> %y, i32 16)
; CHECK: Both ModRef: Ptr: i8* %q <-> call void @llvm.arm.neon.vst1.p0.v8i16(ptr %q, <8 x i16> %y, i32 16)
; CHECK: Just Mod: Ptr: i8* %q <-> call void @llvm.arm.neon.vst1.p0.v8i16(ptr %q, <8 x i16> %y, i32 16)
; CHECK: Just Ref: Ptr: i8* %p <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr %p, i32 16)
; CHECK: NoModRef: Ptr: i8* %q <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr %p, i32 16)
; CHECK: NoModRef: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr %p, i32 16) #{{[0-9]+}} <-> call void @llvm.arm.neon.vst1.p0.v8i16(ptr %q, <8 x i16> %y, i32 16)

View File

@@ -27,5 +27,5 @@ declare <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr, i32) nounwind readonly
declare void @llvm.arm.neon.vst1.p0.v8i16(ptr, <8 x i16>, i32) nounwind
; CHECK: attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: read) }
; CHECK: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
; CHECK: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) }
; CHECK: attributes [[ATTR]] = { nounwind }

View File

@@ -22,5 +22,15 @@ declare <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32>, <4 x i32>)
; CHECK: declare <vscale x 4 x i32> @llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32) [[NO_CALLBACK_NOFREE_NOSYNC_NOUNWIND_READNONE_WILLRETURN]]
declare <vscale x 4 x i32> @llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
; CHECK: declare void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32>, <4 x i32>, ptr captures(none)) [[NO_CALLBACK_NOFREE_NOSYNC_NOUNWIND_WRITEONLY_WILLRETURN:#[0-9]+]]
declare void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32>, <4 x i32>, ptr)
; CHECK: declare void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32>, <4 x i32>, i64, ptr captures(none)) [[NO_CALLBACK_NOFREE_NOSYNC_NOUNWIND_WRITEONLY_WILLRETURN:#[0-9]+]]
declare void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32>, <4 x i32>, i64, ptr)
; CHECK: declare void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16>, <8 x i16>, ptr captures(none)) [[NO_CALLBACK_NOFREE_NOSYNC_NOUNWIND_WRITEONLY_WILLRETURN:#[0-9]+]]
declare void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16>, <8 x i16>, ptr)
; CHECK: attributes [[NOFREE_NOUNWIND_WILLRETURN]] = { nofree nounwind willreturn }
; CHECK: attributes [[NO_CALLBACK_NOFREE_NOSYNC_NOUNWIND_READNONE_WILLRETURN]] = { nocallback nofree nosync nounwind willreturn memory(none) }
; CHECK: attributes [[NO_CALLBACK_NOFREE_NOSYNC_NOUNWIND_WRITEONLY_WILLRETURN]] = { nocallback nofree nosync nounwind willreturn memory(argmem: write) }