Files
clang-p2996/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll
Sameer Sahasrabuddhe 475ce4c200 RFC: Uniformity Analysis for Irreducible Control Flow
Uniformity analysis is a generalization of divergence analysis to
include irreducible control flow:

  1. The proposed spec presents a notion of "maximal convergence" that
     captures the existing convention of converging threads at the
     headers of natual loops.

  2. Maximal convergence is then extended to irreducible cycles. The
     identity of irreducible cycles is determined by the choices made
     in a depth-first traversal of the control flow graph. Uniformity
     analysis uses criteria that depend only on closed paths and not
     cycles, to determine maximal convergence. This makes it a
     conservative analysis that is independent of the effect of DFS on
     CycleInfo.

  3. The analysis is implemented as a template that can be
     instantiated for both LLVM IR and Machine IR.

Validation:
  - passes existing tests for divergence analysis
  - passes new tests with irreducible control flow
  - passes equivalent tests in MIR and GMIR

Based on concepts originally outlined by
Nicolai Haehnle <nicolai.haehnle@amd.com>

With contributions from Ruiling Song <ruiling.song@amd.com> and
Jay Foad <jay.foad@amd.com>.

Support for GMIR and lit tests for GMIR/MIR added by
Yashwant Singh <yashwant.singh@amd.com>.

Differential Revision: https://reviews.llvm.org/D130746
2022-12-20 07:22:24 +05:30

59 lines
2.8 KiB
LLVM

; RUN: opt -mtriple amdgcn-- -passes='print<divergence>' -disable-output %s 2>&1 | FileCheck %s
; RUN: opt -mtriple amdgcn-- -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s
; CHECK: DIVERGENT: %orig = atomicrmw xchg ptr %ptr, i32 %val seq_cst
define amdgpu_kernel void @test1(ptr %ptr, i32 %val) #0 {
%orig = atomicrmw xchg ptr %ptr, i32 %val seq_cst
store i32 %orig, ptr %ptr
ret void
}
; CHECK: DIVERGENT: %orig = cmpxchg ptr %ptr, i32 %cmp, i32 %new seq_cst seq_cst
define amdgpu_kernel void @test2(ptr %ptr, i32 %cmp, i32 %new) {
%orig = cmpxchg ptr %ptr, i32 %cmp, i32 %new seq_cst seq_cst
%val = extractvalue { i32, i1 } %orig, 0
store i32 %val, ptr %ptr
ret void
}
; CHECK: DIVERGENT: %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr, i32 %val, i32 0, i32 0, i1 false)
define i32 @test_atomic_inc_i32(ptr addrspace(1) %ptr, i32 %val) #0 {
%ret = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr, i32 %val, i32 0, i32 0, i1 false)
ret i32 %ret
}
; CHECK: DIVERGENT: %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p1(ptr addrspace(1) %ptr, i64 %val, i32 0, i32 0, i1 false)
define i64 @test_atomic_inc_i64(ptr addrspace(1) %ptr, i64 %val) #0 {
%ret = call i64 @llvm.amdgcn.atomic.inc.i64.p1(ptr addrspace(1) %ptr, i64 %val, i32 0, i32 0, i1 false)
ret i64 %ret
}
; CHECK: DIVERGENT: %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %ptr, i32 %val, i32 0, i32 0, i1 false)
define i32 @test_atomic_dec_i32(ptr addrspace(1) %ptr, i32 %val) #0 {
%ret = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %ptr, i32 %val, i32 0, i32 0, i1 false)
ret i32 %ret
}
; CHECK: DIVERGENT: %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) %ptr, i64 %val, i32 0, i32 0, i1 false)
define i64 @test_atomic_dec_i64(ptr addrspace(1) %ptr, i64 %val) #0 {
%ret = call i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) %ptr, i64 %val, i32 0, i32 0, i1 false)
ret i64 %ret
}
declare i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) nocapture, i32, i32, i32, i1) #1
declare i64 @llvm.amdgcn.atomic.inc.i64.p1(ptr addrspace(1) nocapture, i64, i32, i32, i1) #1
declare i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) nocapture, i32, i32, i32, i1) #1
declare i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) nocapture, i64, i32, i32, i1) #1
; CHECK: DIVERGENT: %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) %ptr, i32 %val)
define amdgpu_kernel void @test_atomic_csub_i32(ptr addrspace(1) %ptr, i32 %val) #0 {
%ret = call i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) %ptr, i32 %val)
store i32 %ret, ptr addrspace(1) %ptr, align 4
ret void
}
declare i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) nocapture, i32) #1
attributes #0 = { nounwind }
attributes #1 = { argmemonly nounwind willreturn }