Files
clang-p2996/llvm/test/CodeGen/NVPTX/intr-range.ll
Alex MacLean 435addbf50 [NVPTX] Revamp NVVMIntrRange pass (#94422)
Revamp the NVVMIntrRange pass making the following updates:
- Use range attributes over range metadata. This is what instcombine has
move to for ranges on intrinsics in
https://github.com/llvm/llvm-project/pull/88776 and it seems a bit
cleaner.
- Consider the `!"maxntid{x,y,z}"` and `!"reqntid{x,y,z}"` function
metadata when adding ranges for `tid` srge instrinsics. This can allow
for smaller ranges and more optimization.
- When range attributes are already present, use the intersection of the
old and new range. This complements the metadata change by allowing
ranges to be shrunk when an intrinsic is in a function which is inlined
into a kernel with metadata. While we don't call this more then once
yet, we should consider adding a second call after inlining, once this
has had a chance to soak for a while and no issues have arisen.

I've also re-enabled this pass in the TM, it was disabled years ago due
to "numerical discrepancies" https://reviews.llvm.org/D96166. In our
testing we haven't seen any issues with adding ranges to intrinsics, and
I cannot find any further info about what issues were encountered.
2024-06-06 06:42:46 -07:00

89 lines
3.9 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --version 5
; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda -mcpu=sm_20 -passes=nvvm-intr-range | FileCheck %s
define i32 @test_maxntid() {
; CHECK-LABEL: define i32 @test_maxntid(
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[TMP1:%.*]] = call range(i32 0, 96) i32 @llvm.nvvm.read.ptx.sreg.tid.x()
; CHECK-NEXT: [[TMP3:%.*]] = call range(i32 0, 96) i32 @llvm.nvvm.read.ptx.sreg.tid.y()
; CHECK-NEXT: [[TMP2:%.*]] = call range(i32 0, 64) i32 @llvm.nvvm.read.ptx.sreg.tid.z()
; CHECK-NEXT: [[TMP11:%.*]] = call range(i32 1, 97) i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
; CHECK-NEXT: [[TMP4:%.*]] = call range(i32 1, 97) i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
; CHECK-NEXT: [[TMP6:%.*]] = call range(i32 1, 65) i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], [[TMP2]]
; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], [[TMP11]]
; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], [[TMP4]]
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP10]], [[TMP6]]
; CHECK-NEXT: ret i32 [[TMP5]]
;
%1 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
%2 = call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
%3 = call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
%4 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
%5 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
%6 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
%7 = add i32 %1, %2
%8 = add i32 %7, %3
%9 = add i32 %8, %4
%10 = add i32 %9, %5
%11 = add i32 %10, %6
ret i32 %11
}
define i32 @test_reqntid() {
; CHECK-LABEL: define i32 @test_reqntid(
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = call range(i32 0, 20) i32 @llvm.nvvm.read.ptx.sreg.tid.x()
; CHECK-NEXT: [[TMP5:%.*]] = call range(i32 0, 20) i32 @llvm.nvvm.read.ptx.sreg.tid.y()
; CHECK-NEXT: [[TMP2:%.*]] = call range(i32 0, 20) i32 @llvm.nvvm.read.ptx.sreg.tid.z()
; CHECK-NEXT: [[TMP4:%.*]] = call range(i32 1, 21) i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
; CHECK-NEXT: [[TMP3:%.*]] = call range(i32 1, 21) i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
; CHECK-NEXT: [[TMP6:%.*]] = call range(i32 1, 21) i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP1]], [[TMP5]]
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], [[TMP2]]
; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], [[TMP4]]
; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], [[TMP3]]
; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], [[TMP6]]
; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
%2 = call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
%3 = call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
%4 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
%5 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
%6 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
%7 = add i32 %1, %2
%8 = add i32 %7, %3
%9 = add i32 %8, %4
%10 = add i32 %9, %5
%11 = add i32 %10, %6
ret i32 %5
}
;; A case like this could occur if a function with the sreg intrinsic was
;; inlined into a kernel where the tid metadata is present, ensure the range is
;; updated.
define i32 @test_inlined() {
; CHECK-LABEL: define i32 @test_inlined(
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = call range(i32 0, 4) i32 @llvm.nvvm.read.ptx.sreg.tid.x()
; CHECK-NEXT: ret i32 [[TMP1]]
;
%1 = call range(i32 0, 1024) i32 @llvm.nvvm.read.ptx.sreg.tid.x()
ret i32 %1
}
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
declare i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
declare i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
!nvvm.annotations = !{!0, !1, !2}
!0 = !{ptr @test_maxntid, !"kernel", i32 1, !"maxntidx", i32 32, !"maxntidz", i32 3}
!1 = !{ptr @test_reqntid, !"kernel", i32 1, !"reqntidx", i32 20}
!2 = !{ptr @test_inlined, !"kernel", i32 1, !"maxntidx", i32 4}