Files
clang-p2996/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-workgroups.ll
Jun Wang c4e517f59c [AMDGPU] Adding the amdgpu_num_work_groups function attribute (#79035)
A new function attribute named amdgpu_num_work_groups is added. This
attribute, which consists of three integers, allows programmers to let
the compiler know the number of workgroups to be launched in each of the
three dimensions and do optimizations based on that information.

---------

Co-authored-by: Jun Wang <jun.wang7@amd.com>
2024-03-12 10:30:39 -07:00

85 lines
2.6 KiB
LLVM

; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
; Attribute not specified.
; CHECK-LABEL: {{^}}empty_no_attribute:
define amdgpu_kernel void @empty_no_attribute() {
entry:
ret void
}
; Ignore if number of work groups for x dimension is 0.
; CHECK-LABEL: {{^}}empty_max_num_workgroups_x0:
define amdgpu_kernel void @empty_max_num_workgroups_x0() #0 {
entry:
ret void
}
attributes #0 = {"amdgpu-max-num-workgroups"="0,2,3"}
; Ignore if number of work groups for y dimension is 0.
; CHECK-LABEL: {{^}}empty_max_num_workgroups_y0:
define amdgpu_kernel void @empty_max_num_workgroups_y0() #1 {
entry:
ret void
}
attributes #1 = {"amdgpu-max-num-workgroups"="1,0,3"}
; Ignore if number of work groups for z dimension is 0.
; CHECK-LABEL: {{^}}empty_max_num_workgroups_z0:
define amdgpu_kernel void @empty_max_num_workgroups_z0() #2 {
entry:
ret void
}
attributes #2 = {"amdgpu-max-num-workgroups"="1,2,0"}
; CHECK-LABEL: {{^}}empty_max_num_workgroups_1_2_3:
define amdgpu_kernel void @empty_max_num_workgroups_1_2_3() #3 {
entry:
ret void
}
attributes #3 = {"amdgpu-max-num-workgroups"="1,2,3"}
; CHECK-LABEL: {{^}}empty_max_num_workgroups_1024_1024_1024:
define amdgpu_kernel void @empty_max_num_workgroups_1024_1024_1024() #4 {
entry:
ret void
}
attributes #4 = {"amdgpu-max-num-workgroups"="1024,1024,1024"}
; CHECK: .amdgpu_metadata
; CHECK: - .args:
; CHECK: .max_flat_workgroup_size: 1024
; CHECK-NEXT: .name: empty_no_attribute
; CHECK-NEXT: .private_segment_fixed_size: 0
; CHECK: - .args:
; CHECK: .max_flat_workgroup_size: 1024
; CHECK-NEXT: .name: empty_max_num_workgroups_x0
; CHECK-NEXT: .private_segment_fixed_size: 0
; CHECK: - .args:
; CHECK: .max_flat_workgroup_size: 1024
; CHECK-NEXT: .name: empty_max_num_workgroups_y0
; CHECK-NEXT: .private_segment_fixed_size: 0
; CHECK: - .args:
; CHECK: .max_flat_workgroup_size: 1024
; CHECK-NEXT: .name: empty_max_num_workgroups_z0
; CHECK-NEXT: .private_segment_fixed_size: 0
; CHECK: - .args:
; CHECK: .max_flat_workgroup_size: 1024
; CHECK-NEXT: .max_num_workgroups_x: 1
; CHECK-NEXT: .max_num_workgroups_y: 2
; CHECK-NEXT: .max_num_workgroups_z: 3
; CHECK-NEXT: .name: empty_max_num_workgroups_1_2_3
; CHECK-NEXT: .private_segment_fixed_size: 0
; CHECK: - .args:
; CHECK: .max_flat_workgroup_size: 1024
; CHECK-NEXT: .max_num_workgroups_x: 1024
; CHECK-NEXT: .max_num_workgroups_y: 1024
; CHECK-NEXT: .max_num_workgroups_z: 1024
; CHECK-NEXT: .name: empty_max_num_workgroups_1024_1024_1024
; CHECK-NEXT: .private_segment_fixed_size: 0