Files
clang-p2996/llvm/test/CodeGen/AMDGPU/convergence-laneops.ll
Vikram Hegde 5feb32ba92 [AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (#89217)
This patch is intended to be the first of a series with end goal to
adapt atomic optimizer pass to support i64 and f64 operations (along
with removing all unnecessary bitcasts). This legalizes 64 bit readlane,
writelane and readfirstlane ops pre-ISel

---------

Co-authored-by: vikramRH <vikhegde@amd.com>
2024-06-25 14:35:19 +05:30

71 lines
2.7 KiB
LLVM

; RUN: llc -stop-after=amdgpu-isel -mtriple=amdgcn-- -mcpu=gfx1100 -verify-machineinstrs -o - %s | FileCheck --check-prefixes=CHECK,ISEL %s
; RUN: not --crash llc -mtriple=amdgcn--amdhsa -mcpu=1100 -verify-machineinstrs < %s 2>&1 | FileCheck --check-prefix=CHECK-ERROR %s
; FIXME: Merge these tests with existing lane op tests (llvm.amdgcn.readlane.ll, llvm.amdgcn.writelane.ll ...) once the crash is fixed.
; CHECK-LABEL: name: basic_readfirstlane_i64
; CHECK: [[TOKEN:%[0-9]+]]{{[^ ]*}} = CONVERGENCECTRL_ANCHOR
; ISEL: CONVERGENCECTRL_GLUE [[TOKEN]]
; ISEL: {{.*}} = V_READFIRSTLANE_B32 {{.*}}, implicit [[TOKEN]]
; ISEL: CONVERGENCECTRL_GLUE [[TOKEN]]
; ISEL: {{.*}} = V_READFIRSTLANE_B32 {{.*}}, implicit [[TOKEN]]
define i64 @basic_readfirstlane_i64(i64 %src, i1 %cond) #0 {
entry:
%t = call token @llvm.experimental.convergence.anchor()
%x = add i64 %src, 1
br i1 %cond, label %then, label %else
then:
; CHECK-ERROR: Cannot mix controlled and uncontrolled convergence in the same function.
; CHECK-ERROR: V_READFIRSTLANE_B32
%r = call i64 @llvm.amdgcn.readfirstlane.i64(i64 %x) [ "convergencectrl"(token %t) ]
br label %else
else:
%p = phi i64 [%r, %then], [%x, %entry]
ret i64 %p
}
; CHECK-LABEL: name: basic_readlane_i64
; CHECK: [[TOKEN:%[0-9]+]]{{[^ ]*}} = CONVERGENCECTRL_ANCHOR
; ISEL: CONVERGENCECTRL_GLUE [[TOKEN]]
; ISEL: {{.*}} = V_READLANE_B32 {{.*}}, implicit [[TOKEN]]
; ISEL: CONVERGENCECTRL_GLUE [[TOKEN]]
; ISEL: {{.*}} = V_READLANE_B32 {{.*}}, implicit [[TOKEN]]
define i64 @basic_readlane_i64(i64 %src, i32 %lane, i1 %cond) #0 {
entry:
%t = call token @llvm.experimental.convergence.anchor()
%x = add i64 %src, 1
br i1 %cond, label %then, label %else
then:
%r = call i64 @llvm.amdgcn.readlane.i64(i64 %x, i32 %lane) [ "convergencectrl"(token %t) ]
br label %else
else:
%p = phi i64 [%r, %then], [%x, %entry]
ret i64 %p
}
; CHECK-LABEL: name: basic_writelane_i64
; CHECK: [[TOKEN:%[0-9]+]]{{[^ ]*}} = CONVERGENCECTRL_ANCHOR
; ISEL: CONVERGENCECTRL_GLUE [[TOKEN]]
; ISEL: {{.*}} = V_WRITELANE_B32 {{.*}}, implicit [[TOKEN]]
; ISEL: CONVERGENCECTRL_GLUE [[TOKEN]]
; ISEL: {{.*}} = V_WRITELANE_B32 {{.*}}, implicit [[TOKEN]]
define i64 @basic_writelane_i64(i64 %src, i1 %cond, i32 %lane, ptr addrspace(1) %out) #0 {
entry:
%old = load i64, ptr addrspace(1) %out
%t = call token @llvm.experimental.convergence.anchor()
%x = add i64 %src, 1
br i1 %cond, label %then, label %else
then:
%r = call i64 @llvm.amdgcn.writelane.i64(i64 %x, i32 %lane, i64 %old) [ "convergencectrl"(token %t) ]
br label %else
else:
%p = phi i64 [%r, %then], [%x, %entry]
ret i64 %p
}