From a6592ddf4ed9d819b9074ff9a29ed988e826e53d Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 1 Jul 2025 15:56:02 +0200 Subject: [PATCH] [AArch64] Mark neon.stN intrinsics as writeonly (#145289) I found this peculiar comment in EarlyCSE: https://github.com/llvm/llvm-project/blob/1c78d8d9d7bcb4b20910047ad7db35f177a17c8c/llvm/lib/Transforms/Scalar/EarlyCSE.cpp#L1620-L1624 Looking back over history, this seems to be referring to the aarch64.neon.stN intrinsics, which are indeed not marked writeonly (though the ldN intrinsics are readonly). Possibly I'm missing something special about these intrinsics, but I think it is safe to mark them as writeonly. --- llvm/include/llvm/IR/IntrinsicsAArch64.td | 14 ++++++------- llvm/include/llvm/IR/IntrinsicsARM.td | 21 ++++++++++--------- llvm/test/Analysis/BasicAA/cs-cs-arm.ll | 2 +- llvm/test/Analysis/BasicAA/intrinsics-arm.ll | 2 +- .../aarch64-intrinsics-attributes.ll | 10 +++++++++ 5 files changed, 30 insertions(+), 19 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 0ec5f5163118..3606bbe29eb9 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -612,7 +612,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". [IntrReadMem, IntrArgMemOnly]>; class AdvSIMD_1Vec_Store_Lane_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, llvm_i64_ty, llvm_anyptr_ty], - [IntrArgMemOnly, NoCapture>]>; + [IntrWriteMem, IntrArgMemOnly, NoCapture>]>; class AdvSIMD_2Vec_Load_Intrinsic : DefaultAttrsIntrinsic<[LLVMMatchType<0>, llvm_anyvector_ty], @@ -626,11 +626,11 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". class AdvSIMD_2Vec_Store_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>, llvm_anyptr_ty], - [IntrArgMemOnly, NoCapture>]>; + [IntrWriteMem, IntrArgMemOnly, NoCapture>]>; class AdvSIMD_2Vec_Store_Lane_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>, llvm_i64_ty, llvm_anyptr_ty], - [IntrArgMemOnly, NoCapture>]>; + [IntrWriteMem, IntrArgMemOnly, NoCapture>]>; class AdvSIMD_3Vec_Load_Intrinsic : DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty], @@ -644,12 +644,12 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". class AdvSIMD_3Vec_Store_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyptr_ty], - [IntrArgMemOnly, NoCapture>]>; + [IntrWriteMem, IntrArgMemOnly, NoCapture>]>; class AdvSIMD_3Vec_Store_Lane_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i64_ty, llvm_anyptr_ty], - [IntrArgMemOnly, NoCapture>]>; + [IntrWriteMem, IntrArgMemOnly, NoCapture>]>; class AdvSIMD_4Vec_Load_Intrinsic : DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>, @@ -667,12 +667,12 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyptr_ty], - [IntrArgMemOnly, NoCapture>]>; + [IntrWriteMem, IntrArgMemOnly, NoCapture>]>; class AdvSIMD_4Vec_Store_Lane_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i64_ty, llvm_anyptr_ty], - [IntrArgMemOnly, NoCapture>]>; + [IntrWriteMem, IntrArgMemOnly, NoCapture>]>; } // Memory ops diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td index dbc9477d4d4d..537cfe90a466 100644 --- a/llvm/include/llvm/IR/IntrinsicsARM.td +++ b/llvm/include/llvm/IR/IntrinsicsARM.td @@ -742,32 +742,33 @@ def int_arm_neon_vld4dup : DefaultAttrsIntrinsic< // Interleaving vector stores from N-element structures. // Source operands are: the address, the N vectors, and the alignment. def int_arm_neon_vst1 : DefaultAttrsIntrinsic< - [], [llvm_anyptr_ty, llvm_anyvector_ty, llvm_i32_ty], [IntrArgMemOnly]>; + [], [llvm_anyptr_ty, llvm_anyvector_ty, llvm_i32_ty], + [IntrWriteMem, IntrArgMemOnly]>; def int_arm_neon_vst2 : DefaultAttrsIntrinsic< [], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty], - [IntrArgMemOnly]>; + [IntrWriteMem, IntrArgMemOnly]>; def int_arm_neon_vst3 : DefaultAttrsIntrinsic< [], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>, llvm_i32_ty], - [IntrArgMemOnly]>; + [IntrWriteMem, IntrArgMemOnly]>; def int_arm_neon_vst4 : DefaultAttrsIntrinsic< [], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>, LLVMMatchType<1>, llvm_i32_ty], - [IntrArgMemOnly]>; + [IntrWriteMem, IntrArgMemOnly]>; def int_arm_neon_vst1x2 : DefaultAttrsIntrinsic< [], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>], - [IntrArgMemOnly, NoCapture>]>; + [IntrWriteMem, IntrArgMemOnly, NoCapture>]>; def int_arm_neon_vst1x3 : DefaultAttrsIntrinsic< [], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>], - [IntrArgMemOnly, NoCapture>]>; + [IntrWriteMem, IntrArgMemOnly, NoCapture>]>; def int_arm_neon_vst1x4 : DefaultAttrsIntrinsic< [], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>, LLVMMatchType<1>], - [IntrArgMemOnly, NoCapture>]>; + [IntrWriteMem, IntrArgMemOnly, NoCapture>]>; // Vector store N-element structure from one lane. // Source operands are: the address, the N vectors, the lane number, and @@ -776,17 +777,17 @@ def int_arm_neon_vst2lane : DefaultAttrsIntrinsic< [], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty, llvm_i32_ty], - [IntrArgMemOnly]>; + [IntrWriteMem, IntrArgMemOnly]>; def int_arm_neon_vst3lane : DefaultAttrsIntrinsic< [], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>, llvm_i32_ty, llvm_i32_ty], - [IntrArgMemOnly]>; + [IntrWriteMem, IntrArgMemOnly]>; def int_arm_neon_vst4lane : DefaultAttrsIntrinsic< [], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>, LLVMMatchType<1>, llvm_i32_ty, llvm_i32_ty], - [IntrArgMemOnly]>; + [IntrWriteMem, IntrArgMemOnly]>; // Vector bitwise select. def int_arm_neon_vbsl : DefaultAttrsIntrinsic< diff --git a/llvm/test/Analysis/BasicAA/cs-cs-arm.ll b/llvm/test/Analysis/BasicAA/cs-cs-arm.ll index 43e7be2ee20e..ac5e0db00111 100644 --- a/llvm/test/Analysis/BasicAA/cs-cs-arm.ll +++ b/llvm/test/Analysis/BasicAA/cs-cs-arm.ll @@ -24,7 +24,7 @@ entry: ; CHECK: Just Ref: Ptr: i8* %p <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr %p, i32 16) ; CHECK: NoModRef: Ptr: i8* %q <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr %p, i32 16) ; CHECK: NoModRef: Ptr: i8* %p <-> call void @llvm.arm.neon.vst1.p0.v8i16(ptr %q, <8 x i16> %y, i32 16) -; CHECK: Both ModRef: Ptr: i8* %q <-> call void @llvm.arm.neon.vst1.p0.v8i16(ptr %q, <8 x i16> %y, i32 16) +; CHECK: Just Mod: Ptr: i8* %q <-> call void @llvm.arm.neon.vst1.p0.v8i16(ptr %q, <8 x i16> %y, i32 16) ; CHECK: Just Ref: Ptr: i8* %p <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr %p, i32 16) ; CHECK: NoModRef: Ptr: i8* %q <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr %p, i32 16) ; CHECK: NoModRef: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr %p, i32 16) #{{[0-9]+}} <-> call void @llvm.arm.neon.vst1.p0.v8i16(ptr %q, <8 x i16> %y, i32 16) diff --git a/llvm/test/Analysis/BasicAA/intrinsics-arm.ll b/llvm/test/Analysis/BasicAA/intrinsics-arm.ll index 7772cca2777f..b7c64f18cdfa 100644 --- a/llvm/test/Analysis/BasicAA/intrinsics-arm.ll +++ b/llvm/test/Analysis/BasicAA/intrinsics-arm.ll @@ -27,5 +27,5 @@ declare <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr, i32) nounwind readonly declare void @llvm.arm.neon.vst1.p0.v8i16(ptr, <8 x i16>, i32) nounwind ; CHECK: attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: read) } -; CHECK: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +; CHECK: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) } ; CHECK: attributes [[ATTR]] = { nounwind } diff --git a/llvm/test/Assembler/aarch64-intrinsics-attributes.ll b/llvm/test/Assembler/aarch64-intrinsics-attributes.ll index 31b7101fba11..33f2758a4b18 100644 --- a/llvm/test/Assembler/aarch64-intrinsics-attributes.ll +++ b/llvm/test/Assembler/aarch64-intrinsics-attributes.ll @@ -22,5 +22,15 @@ declare <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32>, <4 x i32>) ; CHECK: declare @llvm.aarch64.sve.dup.nxv4i32(, , i32) [[NO_CALLBACK_NOFREE_NOSYNC_NOUNWIND_READNONE_WILLRETURN]] declare @llvm.aarch64.sve.dup.nxv4i32(, , i32) +; CHECK: declare void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32>, <4 x i32>, ptr captures(none)) [[NO_CALLBACK_NOFREE_NOSYNC_NOUNWIND_WRITEONLY_WILLRETURN:#[0-9]+]] +declare void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32>, <4 x i32>, ptr) + +; CHECK: declare void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32>, <4 x i32>, i64, ptr captures(none)) [[NO_CALLBACK_NOFREE_NOSYNC_NOUNWIND_WRITEONLY_WILLRETURN:#[0-9]+]] +declare void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32>, <4 x i32>, i64, ptr) + +; CHECK: declare void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16>, <8 x i16>, ptr captures(none)) [[NO_CALLBACK_NOFREE_NOSYNC_NOUNWIND_WRITEONLY_WILLRETURN:#[0-9]+]] +declare void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16>, <8 x i16>, ptr) + ; CHECK: attributes [[NOFREE_NOUNWIND_WILLRETURN]] = { nofree nounwind willreturn } ; CHECK: attributes [[NO_CALLBACK_NOFREE_NOSYNC_NOUNWIND_READNONE_WILLRETURN]] = { nocallback nofree nosync nounwind willreturn memory(none) } +; CHECK: attributes [[NO_CALLBACK_NOFREE_NOSYNC_NOUNWIND_WRITEONLY_WILLRETURN]] = { nocallback nofree nosync nounwind willreturn memory(argmem: write) }