Files
clang-p2996/llvm/test/CodeGen/AMDGPU/convergence-tokens.ll
Sameer Sahasrabuddhe 421557974a [AMDGPU] Use glue for convergence tokens at call-like operations (#86766)
The earlier implementation on AMDGPU used explicit token operands at
SI_CALL and SI_CALL_ISEL. This is now replaced with CONVERGENCECTRL_GLUE
operands, with the following effects:

- The treatment of tokens at call-like operations is now consistent with
the treatment at intrinsics.
- Support for tail calls using implicit tokens at SI_TCRETURN "just
works".
- The extra parameter at call-like instructions is eliminated, thus
restoring those instructions and their handling to the original state.

The new glue node is placed after the existing glue node for the
outgoing call parameters, which seems to not interfere with selection of
the call-like nodes.
2024-04-01 10:51:13 +05:30

115 lines
5.1 KiB
LLVM

; RUN: llc --amdgpu-disable-structurizer -stop-after=amdgpu-isel -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck --check-prefixes=CHECK,ISEL %s
; RUN: llc --amdgpu-disable-structurizer -stop-after=dead-mi-elimination -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck --check-prefixes=CHECK,DEADMI %s
; RUN: llc --amdgpu-disable-structurizer -global-isel -stop-after=irtranslator -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=CHECK,GISEL
; CHECK-LABEL: name: basic_call
; CHECK: [[TOKEN:%[0-9]+]]{{[^ ]*}} = CONVERGENCECTRL_ENTRY
; ISEL: {{.*}} SI_CALL_ISEL {{.*}}, @foo, csr_amdgpu, {{.*}}, implicit [[TOKEN]]
; DEADMI: {{.*}} SI_CALL {{.*}}, @foo, csr_amdgpu, {{.*}}, implicit [[TOKEN]]
; GISEL: {{.*}} G_SI_CALL {{.*}}, @foo, csr_amdgpu, {{.*}}, implicit [[TOKEN]]
define i32 @basic_call(i32 %src) #0 {
%t = call token @llvm.experimental.convergence.entry()
%r = call i32 @foo(i32 %src) [ "convergencectrl"(token %t) ]
ret i32 %r
}
; CHECK-LABEL: name: basic_intrinsic
; CHECK: [[TOKEN:%[0-9]+]]{{[^ ]*}} = CONVERGENCECTRL_ANCHOR
; ISEL: CONVERGENCECTRL_GLUE [[TOKEN]]
; DEADMI-NOT: CONVERGENCECTRL_GLUE
; ISEL: {{.*}} = V_READFIRSTLANE_B32 {{.*}}, implicit [[TOKEN]]
; GISEL: {{.*}} = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane){{.*}}, implicit [[TOKEN]]
define i32 @basic_intrinsic(i32 %src) #0 {
%t = call token @llvm.experimental.convergence.anchor()
%r = call i32 @llvm.amdgcn.readfirstlane(i32 %src) [ "convergencectrl"(token %t) ]
ret i32 %r
}
; There's nothing to check here. The test is just meant to catch any crashes
; when a convergent call has no token.
define i32 @uncontrolled_call(i32 %src) #0 {
%r = call i32 @foo(i32 %src)
ret i32 %r
}
; CHECK-LABEL: name: basic_branch
; CHECK: bb.[[#]].entry:
; CHECK: [[TOKEN:%[0-9]+]]{{[^ ]*}} = CONVERGENCECTRL_ANCHOR
; CHECK: bb.[[#]].then:
; ISEL: CONVERGENCECTRL_GLUE [[TOKEN]]
; DEADMI-NOT: CONVERGENCECTRL_GLUE
; ISEL: {{.*}} = V_READFIRSTLANE_B32 {{.*}}, implicit [[TOKEN]]
; GISEL: {{.*}} = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane){{.*}}, implicit [[TOKEN]]
define i32 @basic_branch(i32 %src, i1 %cond) #0 {
entry:
%t = call token @llvm.experimental.convergence.anchor()
%x = add i32 %src, 1
br i1 %cond, label %then, label %else
then:
%r = call i32 @llvm.amdgcn.readfirstlane(i32 %x) [ "convergencectrl"(token %t) ]
br label %else
else:
%p = phi i32 [%r, %then], [%x, %entry]
ret i32 %p
}
; CHECK-LABEL: name: basic_loop
; CHECK: [[TOKEN:%[0-9]+]]{{[^ ]*}} = CONVERGENCECTRL_ANCHOR
; CHECK: bb.[[#]].loop:
; CHECK: [[LOOP:%[0-9]+]]{{[^ ]*}} = CONVERGENCECTRL_LOOP [[TOKEN]]
; ISEL: CONVERGENCECTRL_GLUE [[LOOP]]
; DEADMI-NOT: CONVERGENCECTRL_GLUE
; ISEL: {{.*}} = V_READFIRSTLANE_B32 {{.*}}, implicit [[LOOP]]
; GISEL: {{.*}} = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane){{.*}}, implicit [[LOOP]]
define i32 @basic_loop(i32 %src, i1 %cond) #0 {
%t1 = call token @llvm.experimental.convergence.anchor()
br label %loop
loop:
%t2 = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %t1) ]
%r = call i32 @llvm.amdgcn.readfirstlane(i32 %src) [ "convergencectrl"(token %t2) ]
br i1 %cond, label %loop, label %end
end:
ret i32 %r
}
; CHECK-LABEL: name: nested
; CHECK: [[ENTRY:%[0-9]+]]{{[^ ]*}} = CONVERGENCECTRL_ENTRY
; CHECK: [[ANCHOR:%[0-9]+]]{{[^ ]*}} = CONVERGENCECTRL_ANCHOR
; ISEL: {{.*}} = V_READFIRSTLANE_B32 {{.*}}, implicit [[ANCHOR]]
; GISEL: {{.*}} = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane){{.*}}, implicit [[ANCHOR]]
; ISEL: {{.*}} = V_READFIRSTLANE_B32 {{.*}}, implicit [[ENTRY]]
; GISEL: {{.*}} = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane){{.*}}, implicit [[ENTRY]]
define i32 @nested(i32 %src) #0 {
%t1 = call token @llvm.experimental.convergence.entry()
%t2 = call token @llvm.experimental.convergence.anchor()
%r2 = call i32 @llvm.amdgcn.readfirstlane(i32 %src) [ "convergencectrl"(token %t2) ]
%r1 = call i32 @llvm.amdgcn.readfirstlane(i32 %src) [ "convergencectrl"(token %t1) ]
%sum = add i32 %r1, %r2
ret i32 %sum
}
; CHECK-LABEL: name: tail_call_void_func_void
; CHECK: [[TOKEN:%[0-9]+]]{{[^ ]*}} = CONVERGENCECTRL_ENTRY
; CHECK: {{.*}} SI_TCRETURN {{.*}}, @external_void_func_void, 0, csr_amdgpu, {{.*}}implicit [[TOKEN]]
define void @tail_call_void_func_void() #0 {
%t1 = call token @llvm.experimental.convergence.entry()
tail call void @external_void_func_void() [ "convergencectrl"(token %t1) ]
ret void
}
declare hidden void @external_void_func_void() #0
declare i32 @foo(i32 %x) #0
declare i32 @llvm.amdgcn.readfirstlane(i32) #0
declare token @llvm.experimental.convergence.entry()
declare token @llvm.experimental.convergence.anchor()
declare token @llvm.experimental.convergence.loop()
attributes #0 = { nounwind readnone convergent }
attributes #1 = { nounwind }