Files
clang-p2996/llvm/test/CodeGen/AMDGPU/diverge-switch-default.ll
Fangrui Song 9e9907f1cf [AMDGPU,test] Change llc -march= to -mtriple= (#75982)
Similar to 806761a762.

For IR files without a target triple, -mtriple= specifies the full
target triple while -march= merely sets the architecture part of the
default target triple, leaving a target triple which may not make sense,
e.g. amdgpu-apple-darwin.

Therefore, -march= is error-prone and not recommended for tests without
a target triple. The issue has been benign as we recognize
$unknown-apple-darwin as ELF instead of rejecting it outrightly.

This patch changes AMDGPU tests to not rely on the default
OS/environment components. Tests that need fixes are not changed:

```
  LLVM :: CodeGen/AMDGPU/fabs.f64.ll
  LLVM :: CodeGen/AMDGPU/fabs.ll
  LLVM :: CodeGen/AMDGPU/floor.ll
  LLVM :: CodeGen/AMDGPU/fneg-fabs.f64.ll
  LLVM :: CodeGen/AMDGPU/fneg-fabs.ll
  LLVM :: CodeGen/AMDGPU/r600-infinite-loop-bug-while-reorganizing-vector.ll
  LLVM :: CodeGen/AMDGPU/schedule-if-2.ll
```
2024-01-16 21:54:58 -08:00

66 lines
2.2 KiB
LLVM

; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -print-after=si-annotate-control-flow %s -o /dev/null 2>&1 | FileCheck %s
target datalayout = "n32"
; CHECK-LABEL: @switch_unreachable_default
define amdgpu_kernel void @switch_unreachable_default(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #0 {
centry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
switch i32 %tid, label %sw.default [
i32 0, label %sw.bb0
i32 1, label %sw.bb1
]
sw.bb0:
br label %sw.epilog
sw.bb1:
br label %sw.epilog
sw.default:
unreachable
sw.epilog:
%ptr = phi ptr addrspace(1) [%in0, %sw.bb0], [%in1, %sw.bb1]
br label %sw.while
; The loop below is necessary to preserve the effect of the
; unreachable default on divergence analysis in the presence of other
; optimizations. The loop consists of a single block where the loop
; exit is divergent because it depends on the divergent phi at the
; start of the block. The checks below ensure that the loop exit is
; handled correctly as divergent. But the data-flow within the block
; is sensitive to optimizations; so we just ensure that the relevant
; operations in the block body are indeed in the same block.
; CHECK: [[PHI:%[a-zA-Z0-9._]+]] = phi i64
; CHECK-NOT: {{ br }}
; CHECK: load i8
; CHECK-NOT: {{ br }}
; CHECK: [[ICMP:%[a-zA-Z0-9._]+]] = icmp eq
; CHECK: [[IF:%[a-zA-Z0-9._]+]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[ICMP]], i64 [[PHI]])
; CHECK: [[LOOP:%[a-zA-Z0-9._]+]] = call i1 @llvm.amdgcn.loop.i64(i64 [[IF]])
; CHECK: br i1 [[LOOP]]
sw.while:
%p = phi ptr addrspace(1) [ %ptr, %sw.epilog ], [ %incdec.ptr, %sw.while ]
%count = phi i32 [ 0, %sw.epilog ], [ %count.inc, %sw.while ]
%char = load i8, ptr addrspace(1) %p, align 1
%tobool = icmp eq i8 %char, 0
%incdec.ptr = getelementptr inbounds i8, ptr addrspace(1) %p, i64 1
%count.inc = add i32 %count, 1
br i1 %tobool, label %sw.exit, label %sw.while
sw.exit:
%tid64 = zext i32 %tid to i64
%gep_out = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid64
store i32 %count, ptr addrspace(1) %gep_out, align 4
ret void
}
declare i32 @llvm.amdgcn.workitem.id.x() #0
attributes #0 = { nounwind readnone }
attributes #1 = { convergent noinline optnone }