This is a long standing problem that resurfaces once in a while [0].
There might actually be two problems because I'm not 100% sure if the
issue underlying https://reviews.llvm.org/D115302 would be solved by
this or not. Anyway.
In 2008 we thought intrinsics do not read/write globals passed to them:
d4133ac315
This is not correct given that intrinsics can synchronize threads and
cause effects to effectively become visible.
NOTE: I did not yet modify any tests but only tried out the reproducer
of https://github.com/llvm/llvm-project/issues/54851.
Fixes: https://github.com/llvm/llvm-project/issues/54851
[0] https://discourse.llvm.org/t/bug-gvn-memdep-bug-in-the-presence-of-intrinsics/59402
Differential Revision: https://reviews.llvm.org/D123531
39 lines
1.3 KiB
LLVM
39 lines
1.3 KiB
LLVM
; RUN: opt -globals-aa -gvn -S < %s | FileCheck %s
|
|
; RUN: opt -aa-pipeline=basic-aa,globals-aa -passes='require<globals-aa>,gvn' -S < %s | FileCheck %s
|
|
;
|
|
; Functions w/o `nosync` attribute may communicate via memory and must be
|
|
; treated conservatively. Taken from https://reviews.llvm.org/D115302.
|
|
|
|
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
|
|
target triple = "nvptx64-nvidia-cuda"
|
|
|
|
@s = internal local_unnamed_addr addrspace(3) global i32 undef, align 4
|
|
|
|
; CHECK-LABEL: @bar_sync
|
|
; CHECK: store
|
|
; CHECK: tail call void @llvm.nvvm.bar.sync(i32 0)
|
|
; CHECK: load
|
|
define dso_local i32 @bar_sync(i32 %0) local_unnamed_addr {
|
|
store i32 %0, i32* addrspacecast (i32 addrspace(3)* @s to i32*), align 4
|
|
tail call void @llvm.nvvm.bar.sync(i32 0)
|
|
%2 = load i32, i32* addrspacecast (i32 addrspace(3)* @s to i32*), align 4
|
|
ret i32 %2
|
|
}
|
|
|
|
declare void @llvm.nvvm.bar.sync(i32) #0
|
|
|
|
; CHECK-LABEL: @barrier0
|
|
; CHECK: store
|
|
; CHECK: tail call void @llvm.nvvm.barrier0()
|
|
; CHECK: load
|
|
define dso_local i32 @barrier0(i32 %0) local_unnamed_addr {
|
|
store i32 %0, i32* addrspacecast (i32 addrspace(3)* @s to i32*), align 4
|
|
tail call void @llvm.nvvm.barrier0()
|
|
%2 = load i32, i32* addrspacecast (i32 addrspace(3)* @s to i32*), align 4
|
|
ret i32 %2
|
|
}
|
|
|
|
declare void @llvm.nvvm.barrier0() #0
|
|
|
|
attributes #0 = { convergent nounwind }
|