Files
clang-p2996/llvm/test/CodeGen/AMDGPU/convergent-inlineasm.ll
Eli Friedman 1b39328d74 [CodeGen] Fix MachineInstr::isSafeToMove handling of inline asm. (#126807)
Even if an inline asm doesn't have memory effects, we can't assume it's
safe to speculate: it could trap, or cause undefined behavior. At the
LLVM IR level, this is handled correctly: we don't speculate inline asm
(unless it's marked "speculatable", but I don't think anyone does that).
Codegen also needs to respect this restriction.

This change stops Early If Conversion and similar passes from
speculating an INLINEASM MachineInstr.

Some uses of isSafeToMove probably could be switched to a different API:
isSafeToMove assumes you're hoisting, but we could handle some forms of
sinking more aggressively. But I'll leave that for a followup, if it
turns out to be relevant.

See also discussion on gcc bugtracker
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102150 .
2025-02-25 15:29:12 -08:00

47 lines
1.5 KiB
LLVM

; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
declare i32 @llvm.amdgcn.workitem.id.x() #0
; GCN-LABEL: {{^}}convergent_inlineasm:
; GCN: %bb.0:
; GCN: v_cmp_ne_u32_e64
; GCN: s_cbranch_execz
; GCN: ; %bb.{{[0-9]+}}:
define amdgpu_kernel void @convergent_inlineasm(ptr addrspace(1) nocapture %arg) {
bb:
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = tail call i64 asm "v_cmp_ne_u32_e64 $0, 0, $1", "=s,v"(i32 1) #1
%tmp2 = icmp eq i32 %tmp, 8
br i1 %tmp2, label %bb3, label %bb5
bb3: ; preds = %bb
%tmp4 = getelementptr i64, ptr addrspace(1) %arg, i32 %tmp
store i64 %tmp1, ptr addrspace(1) %arg, align 8
br label %bb5
bb5: ; preds = %bb3, %bb
ret void
}
; GCN-LABEL: {{^}}nonconvergent_inlineasm:
; GCN: v_cmp_ne_u32_e64
; GCN: s_cbranch_execz
define amdgpu_kernel void @nonconvergent_inlineasm(ptr addrspace(1) nocapture %arg) {
bb:
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = tail call i64 asm "v_cmp_ne_u32_e64 $0, 0, $1", "=s,v"(i32 1)
%tmp2 = icmp eq i32 %tmp, 8
br i1 %tmp2, label %bb3, label %bb5
bb3: ; preds = %bb
%tmp4 = getelementptr i64, ptr addrspace(1) %arg, i32 %tmp
store i64 %tmp1, ptr addrspace(1) %arg, align 8
br label %bb5
bb5: ; preds = %bb3, %bb
ret void
}
attributes #0 = { nounwind readnone }
attributes #1 = { convergent nounwind readnone }