Even if an inline asm doesn't have memory effects, we can't assume it's safe to speculate: it could trap, or cause undefined behavior. At the LLVM IR level, this is handled correctly: we don't speculate inline asm (unless it's marked "speculatable", but I don't think anyone does that). Codegen also needs to respect this restriction. This change stops Early If Conversion and similar passes from speculating an INLINEASM MachineInstr. Some uses of isSafeToMove probably could be switched to a different API: isSafeToMove assumes you're hoisting, but we could handle some forms of sinking more aggressively. But I'll leave that for a followup, if it turns out to be relevant. See also discussion on gcc bugtracker https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102150 .
47 lines
1.5 KiB
LLVM
47 lines
1.5 KiB
LLVM
; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
|
|
|
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
|
; GCN-LABEL: {{^}}convergent_inlineasm:
|
|
; GCN: %bb.0:
|
|
; GCN: v_cmp_ne_u32_e64
|
|
; GCN: s_cbranch_execz
|
|
; GCN: ; %bb.{{[0-9]+}}:
|
|
define amdgpu_kernel void @convergent_inlineasm(ptr addrspace(1) nocapture %arg) {
|
|
bb:
|
|
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tmp1 = tail call i64 asm "v_cmp_ne_u32_e64 $0, 0, $1", "=s,v"(i32 1) #1
|
|
%tmp2 = icmp eq i32 %tmp, 8
|
|
br i1 %tmp2, label %bb3, label %bb5
|
|
|
|
bb3: ; preds = %bb
|
|
%tmp4 = getelementptr i64, ptr addrspace(1) %arg, i32 %tmp
|
|
store i64 %tmp1, ptr addrspace(1) %arg, align 8
|
|
br label %bb5
|
|
|
|
bb5: ; preds = %bb3, %bb
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}nonconvergent_inlineasm:
|
|
; GCN: v_cmp_ne_u32_e64
|
|
; GCN: s_cbranch_execz
|
|
|
|
define amdgpu_kernel void @nonconvergent_inlineasm(ptr addrspace(1) nocapture %arg) {
|
|
bb:
|
|
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tmp1 = tail call i64 asm "v_cmp_ne_u32_e64 $0, 0, $1", "=s,v"(i32 1)
|
|
%tmp2 = icmp eq i32 %tmp, 8
|
|
br i1 %tmp2, label %bb3, label %bb5
|
|
|
|
bb3: ; preds = %bb
|
|
%tmp4 = getelementptr i64, ptr addrspace(1) %arg, i32 %tmp
|
|
store i64 %tmp1, ptr addrspace(1) %arg, align 8
|
|
br label %bb5
|
|
|
|
bb5: ; preds = %bb3, %bb
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { nounwind readnone }
|
|
attributes #1 = { convergent nounwind readnone }
|