[AMDGPU] Fix GCNSubtarget::getMinNumVGPRs, add unit test to check consistency between GCNSubtarget's getMinNumVGPRs, getMaxNumVGPRs and getOccupancyWithNumVGPRs.

```
  /// \returns Minimum number of VGPRs that meets given number of waves per
  /// execution unit requirement supported by the subtarget.
  unsigned getMinNumVGPRs(unsigned WavesPerEU) const;

  /// \returns Maximum number of VGPRs that meets given number of waves per
  /// execution unit requirement supported by the subtarget.
  unsigned getMaxNumVGPRs(unsigned WavesPerEU) const;

  /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
  /// VGPRs
  unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
```

While working on RP tracking issues I noticed that getMinNumVGPRs return incorrect
values: the problem is large VGPR granule sizes on GFX10+ architectures. Some of the
occupancies aren't reachable because require the same amount of VGPR granules as others.
For example 19 waves occupancy on gfx1010 require the same amount of granules as 20 waves
so the resultng occupancy would be 20.

SGPRs have the same issue and even have inconsistency between getMaxNumSGPRs and getOccupancyWithNumSGPRs.
It will be addressed in the next patch.

Legend:
  # MinVGPR and MaxVGPR are values returned by getMinNumVGPRs and getMaxNumVGPRs for a given Occ.
  # (ONumber) is the value returned by getOccupancyWithNumVGPRs for a given MinVGPR or MaxVGPR.
  # R means range problem: MinVGPR should be less than MaxVGPR and both should refer to the same occupancy.

Unit test output without the fix:
```
./build/unittests/Target/AMDGPU/AMDGPUTests --gtest_filter=AMDGPU.TestVGPRLimitsPerOccupancy --print-cpu-reg-limits

 gfx90a gfx940:
Occ    MinVGPR        MaxVGPR
  8        0 (O8)     64  (O8)
  7       65 (O7)     72  (O7)
  6       73 (O6)     80  (O6)
  5       81 (O5)     96  (O5)
  4       97 (O4)     128 (O4)
  3      129 (O3)     168 (O3)
  2      169 (O2)     256 (O2)
  1      257 (O1)     512 (O1)

 gfx600 gfx600 gfx601 gfx601 gfx601 gfx602 gfx602 gfx602 gfx700 gfx700 gfx701 gfx701 gfx702 gfx703 gfx703 gfx703 gfx704 gfx704 gfx705 gfx801 gfx801 gfx802 gfx802 gfx802 gfx803 gfx803 gfx803 gfx803 gfx805 gfx805 gfx810 gfx810 gfx900 gfx902 gfx904 gfx906 gfx908 gfx909 gfx90c:
Occ    MinVGPR        MaxVGPR
 10        0 (O10)    24  (O10)
  9       25 (O9)     28  (O9)
  8       29 (O8)     32  (O8)
  7       33 (O7)     36  (O7)
  6       37 (O6)     40  (O6)
  5       41 (O5)     48  (O5)
  4       49 (O4)     64  (O4)
  3       65 (O3)     84  (O3)
  2       85 (O2)     128 (O2)
  1      129 (O1)     256 (O1)

 gfx1030w64 gfx1031w64 gfx1032w64 gfx1033w64 gfx1034w64 gfx1035w64 gfx1036w64 gfx1102w64 gfx1103w64:
Occ    MinVGPR        MaxVGPR
 16        0 (O16)    32  (O16)
 15       33 (O12) R  32  (O16)
 14       33 (O12) R  32  (O16)
 13       33 (O12) R  32  (O16)
 12       33 (O12)    40  (O12)
 11       41 (O10) R  40  (O12)
 10       41 (O10)    48  (O10)
  9       49 (O9)     56  (O9)
  8       57 (O8)     64  (O8)
  7       65 (O7)     72  (O7)
  6       73 (O6)     80  (O6)
  5       81 (O5)     96  (O5)
  4       97 (O4)     128 (O4)
  3      129 (O3)     168 (O3)
  2      169 (O2)     256 (O2)
  1      256 (O2) R   256 (O2)

 gfx1100w64 gfx1101w64:
Occ    MinVGPR        MaxVGPR
 16        0 (O16)    48  (O16)
 15       49 (O12) R  48  (O16)
 14       49 (O12) R  48  (O16)
 13       49 (O12) R  48  (O16)
 12       49 (O12)    60  (O12)
 11       61 (O10) R  60  (O12)
 10       61 (O10)    72  (O10)
  9       73 (O9)     84  (O9)
  8       85 (O8)     96  (O8)
  7       97 (O7)     108 (O7)
  6      109 (O6)     120 (O6)
  5      121 (O5)     144 (O5)
  4      145 (O4)     192 (O4)
  3      193 (O3)     252 (O3)
  2      253 (O2)     256 (O2)
  1      256 (O2) R   256 (O2)

 gfx1030w32 gfx1031w32 gfx1032w32 gfx1033w32 gfx1034w32 gfx1035w32 gfx1036w32 gfx1102w32 gfx1103w32:
Occ    MinVGPR        MaxVGPR
 16        0 (O16)    64  (O16)
 15       65 (O12) R  64  (O16)
 14       65 (O12) R  64  (O16)
 13       65 (O12) R  64  (O16)
 12       65 (O12)    80  (O12)
 11       81 (O10) R  80  (O12)
 10       81 (O10)    96  (O10)
  9       97 (O9)     112 (O9)
  8      113 (O8)     128 (O8)
  7      129 (O7)     144 (O7)
  6      145 (O6)     160 (O6)
  5      161 (O5)     192 (O5)
  4      193 (O4)     256 (O4)
  3      256 (O4) R   256 (O4)
  2      256 (O4) R   256 (O4)
  1      256 (O4) R   256 (O4)

 gfx1100w32 gfx1101w32:
Occ    MinVGPR        MaxVGPR
 16        0 (O16)    96  (O16)
 15       97 (O12) R  96  (O16)
 14       97 (O12) R  96  (O16)
 13       97 (O12) R  96  (O16)
 12       97 (O12)    120 (O12)
 11      121 (O10) R  120 (O12)
 10      121 (O10)    144 (O10)
  9      145 (O9)     168 (O9)
  8      169 (O8)     192 (O8)
  7      193 (O7)     216 (O7)
  6      217 (O6)     240 (O6)
  5      241 (O5)     256 (O5)
  4      256 (O5) R   256 (O5)
  3      256 (O5) R   256 (O5)
  2      256 (O5) R   256 (O5)
  1      256 (O5) R   256 (O5)

 gfx1010w64 gfx1011w64 gfx1012w64 gfx1013w64:
Occ    MinVGPR        MaxVGPR
 20        0 (O20)    24  (O20)
 19       25 (O18) R  24  (O20)
 18       25 (O18)    28  (O18)
 17       29 (O16) R  28  (O18)
 16       29 (O16)    32  (O16)
 15       33 (O14) R  32  (O16)
 14       33 (O14)    36  (O14)
 13       37 (O12) R  36  (O14)
 12       37 (O12)    40  (O12)
 11       41 (O11)    44  (O11)
 10       45 (O10)    48  (O10)
  9       49 (O9)     56  (O9)
  8       57 (O8)     64  (O8)
  7       65 (O7)     72  (O7)
  6       73 (O6)     84  (O6)
  5       85 (O5)     100 (O5)
  4      101 (O4)     128 (O4)
  3      129 (O3)     168 (O3)
  2      169 (O2)     256 (O2)
  1      256 (O2) R   256 (O2)

 gfx1010w32 gfx1011w32 gfx1012w32 gfx1013w32:
Occ    MinVGPR        MaxVGPR
 20        0 (O20)    48  (O20)
 19       49 (O18) R  48  (O20)
 18       49 (O18)    56  (O18)
 17       57 (O16) R  56  (O18)
 16       57 (O16)    64  (O16)
 15       65 (O14) R  64  (O16)
 14       65 (O14)    72  (O14)
 13       73 (O12) R  72  (O14)
 12       73 (O12)    80  (O12)
 11       81 (O11)    88  (O11)
 10       89 (O10)    96  (O10)
  9       97 (O9)     112 (O9)
  8      113 (O8)     128 (O8)
  7      129 (O7)     144 (O7)
  6      145 (O6)     168 (O6)
  5      169 (O5)     200 (O5)
  4      201 (O4)     256 (O4)
  3      256 (O4) R   256 (O4)
  2      256 (O4) R   256 (O4)
  1      256 (O4) R   256 (O4)
```

After the fix:
```
 gfx90a gfx940:
Occ    MinVGPR        MaxVGPR
  8        0 (O8)     64  (O8)
  7       65 (O7)     72  (O7)
  6       73 (O6)     80  (O6)
  5       81 (O5)     96  (O5)
  4       97 (O4)     128 (O4)
  3      129 (O3)     168 (O3)
  2      169 (O2)     256 (O2)
  1      257 (O1)     512 (O1)

 gfx600 gfx600 gfx601 gfx601 gfx601 gfx602 gfx602 gfx602 gfx700 gfx700 gfx701 gfx701 gfx702 gfx703 gfx703 gfx703 gfx704 gfx704 gfx705 gfx801 gfx801 gfx802 gfx802 gfx802 gfx803 gfx803 gfx803 gfx803 gfx805 gfx805 gfx810 gfx810 gfx900 gfx902 gfx904 gfx906 gfx908 gfx909 gfx90c:
Occ    MinVGPR        MaxVGPR
 10        0 (O10)    24  (O10)
  9       25 (O9)     28  (O9)
  8       29 (O8)     32  (O8)
  7       33 (O7)     36  (O7)
  6       37 (O6)     40  (O6)
  5       41 (O5)     48  (O5)
  4       49 (O4)     64  (O4)
  3       65 (O3)     84  (O3)
  2       85 (O2)     128 (O2)
  1      129 (O1)     256 (O1)

 gfx1030w64 gfx1031w64 gfx1032w64 gfx1033w64 gfx1034w64 gfx1035w64 gfx1036w64 gfx1102w64 gfx1103w64:
Occ    MinVGPR        MaxVGPR
 16        0 (O16)    32  (O16)
 15        0 (O16)    32  (O16)
 14        0 (O16)    32  (O16)
 13        0 (O16)    32  (O16)
 12       33 (O12)    40  (O12)
 11       33 (O12)    40  (O12)
 10       41 (O10)    48  (O10)
  9       49 (O9)     56  (O9)
  8       57 (O8)     64  (O8)
  7       65 (O7)     72  (O7)
  6       73 (O6)     80  (O6)
  5       81 (O5)     96  (O5)
  4       97 (O4)     128 (O4)
  3      129 (O3)     168 (O3)
  2      169 (O2)     256 (O2)
  1      169 (O2)     256 (O2)

 gfx1100w64 gfx1101w64:
Occ    MinVGPR        MaxVGPR
 16        0 (O16)    48  (O16)
 15        0 (O16)    48  (O16)
 14        0 (O16)    48  (O16)
 13        0 (O16)    48  (O16)
 12       49 (O12)    60  (O12)
 11       49 (O12)    60  (O12)
 10       61 (O10)    72  (O10)
  9       73 (O9)     84  (O9)
  8       85 (O8)     96  (O8)
  7       97 (O7)     108 (O7)
  6      109 (O6)     120 (O6)
  5      121 (O5)     144 (O5)
  4      145 (O4)     192 (O4)
  3      193 (O3)     252 (O3)
  2      253 (O2)     256 (O2)
  1      253 (O2)     256 (O2)

 gfx1030w32 gfx1031w32 gfx1032w32 gfx1033w32 gfx1034w32 gfx1035w32 gfx1036w32 gfx1102w32 gfx1103w32:
Occ    MinVGPR        MaxVGPR
 16        0 (O16)    64  (O16)
 15        0 (O16)    64  (O16)
 14        0 (O16)    64  (O16)
 13        0 (O16)    64  (O16)
 12       65 (O12)    80  (O12)
 11       65 (O12)    80  (O12)
 10       81 (O10)    96  (O10)
  9       97 (O9)     112 (O9)
  8      113 (O8)     128 (O8)
  7      129 (O7)     144 (O7)
  6      145 (O6)     160 (O6)
  5      161 (O5)     192 (O5)
  4      193 (O4)     256 (O4)
  3      193 (O4)     256 (O4)
  2      193 (O4)     256 (O4)
  1      193 (O4)     256 (O4)

 gfx1100w32 gfx1101w32:
Occ    MinVGPR        MaxVGPR
 16        0 (O16)    96  (O16)
 15        0 (O16)    96  (O16)
 14        0 (O16)    96  (O16)
 13        0 (O16)    96  (O16)
 12       97 (O12)    120 (O12)
 11       97 (O12)    120 (O12)
 10      121 (O10)    144 (O10)
  9      145 (O9)     168 (O9)
  8      169 (O8)     192 (O8)
  7      193 (O7)     216 (O7)
  6      217 (O6)     240 (O6)
  5      241 (O5)     256 (O5)
  4      241 (O5)     256 (O5)
  3      241 (O5)     256 (O5)
  2      241 (O5)     256 (O5)
  1      241 (O5)     256 (O5)

 gfx1010w64 gfx1011w64 gfx1012w64 gfx1013w64:
Occ    MinVGPR        MaxVGPR
 20        0 (O20)    24  (O20)
 19        0 (O20)    24  (O20)
 18       25 (O18)    28  (O18)
 17       25 (O18)    28  (O18)
 16       29 (O16)    32  (O16)
 15       29 (O16)    32  (O16)
 14       33 (O14)    36  (O14)
 13       33 (O14)    36  (O14)
 12       37 (O12)    40  (O12)
 11       41 (O11)    44  (O11)
 10       45 (O10)    48  (O10)
  9       49 (O9)     56  (O9)
  8       57 (O8)     64  (O8)
  7       65 (O7)     72  (O7)
  6       73 (O6)     84  (O6)
  5       85 (O5)     100 (O5)
  4      101 (O4)     128 (O4)
  3      129 (O3)     168 (O3)
  2      169 (O2)     256 (O2)
  1      169 (O2)     256 (O2)

 gfx1010w32 gfx1011w32 gfx1012w32 gfx1013w32:
Occ    MinVGPR        MaxVGPR
 20        0 (O20)    48  (O20)
 19        0 (O20)    48  (O20)
 18       49 (O18)    56  (O18)
 17       49 (O18)    56  (O18)
 16       57 (O16)    64  (O16)
 15       57 (O16)    64  (O16)
 14       65 (O14)    72  (O14)
 13       65 (O14)    72  (O14)
 12       73 (O12)    80  (O12)
 11       81 (O11)    88  (O11)
 10       89 (O10)    96  (O10)
  9       97 (O9)     112 (O9)
  8      113 (O8)     128 (O8)
  7      129 (O7)     144 (O7)
  6      145 (O6)     168 (O6)
  5      169 (O5)     200 (O5)
  4      201 (O4)     256 (O4)
  3      201 (O4)     256 (O4)
  2      201 (O4)     256 (O4)
  1      201 (O4)     256 (O4)
```

Reviewed By: #amdgpu, arsenm

Differential Revision: https://reviews.llvm.org/D138443
This commit is contained in:
Valery Pykhtin
2022-11-21 17:15:32 +01:00
parent e823abab48
commit d09d834bb9
10 changed files with 232 additions and 67 deletions

View File

@@ -625,13 +625,8 @@ unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
return 5;
}
unsigned GCNSubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const {
unsigned MaxWaves = getMaxWavesPerEU();
unsigned Granule = getVGPRAllocGranule();
if (VGPRs < Granule)
return MaxWaves;
unsigned RoundedRegs = ((VGPRs + Granule - 1) / Granule) * Granule;
return std::min(std::max(getTotalNumVGPRs() / RoundedRegs, 1u), MaxWaves);
unsigned GCNSubtarget::getOccupancyWithNumVGPRs(unsigned NumVGPRs) const {
return AMDGPU::IsaInfo::getNumWavesPerEUWithNumVGPRs(this, NumVGPRs);
}
unsigned

View File

@@ -1218,14 +1218,14 @@ public:
return AMDGPU::IsaInfo::getAddressableNumVGPRs(this);
}
/// \returns Minimum number of VGPRs that meets given number of waves per
/// execution unit requirement supported by the subtarget.
/// \returns the minimum number of VGPRs that will prevent achieving more than
/// the specified number of waves \p WavesPerEU.
unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
}
/// \returns Maximum number of VGPRs that meets given number of waves per
/// execution unit requirement supported by the subtarget.
/// \returns the maximum number of VGPRs that can be used and still achieved
/// at least the specified number of waves \p WavesPerEU.
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
}

View File

@@ -1018,15 +1018,38 @@ unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) {
return 256;
}
unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI,
unsigned NumVGPRs) {
unsigned MaxWaves = getMaxWavesPerEU(STI);
unsigned Granule = getVGPRAllocGranule(STI);
if (NumVGPRs < Granule)
return MaxWaves;
unsigned RoundedRegs = alignTo(NumVGPRs, Granule);
return std::min(std::max(getTotalNumVGPRs(STI) / RoundedRegs, 1u), MaxWaves);
}
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
assert(WavesPerEU != 0);
if (WavesPerEU >= getMaxWavesPerEU(STI))
unsigned MaxWavesPerEU = getMaxWavesPerEU(STI);
if (WavesPerEU >= MaxWavesPerEU)
return 0;
unsigned MinNumVGPRs =
alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1),
getVGPRAllocGranule(STI)) + 1;
return std::min(MinNumVGPRs, getAddressableNumVGPRs(STI));
unsigned TotNumVGPRs = getTotalNumVGPRs(STI);
unsigned AddrsableNumVGPRs = getAddressableNumVGPRs(STI);
unsigned Granule = getVGPRAllocGranule(STI);
unsigned MaxNumVGPRs = alignDown(TotNumVGPRs / WavesPerEU, Granule);
if (MaxNumVGPRs == alignDown(TotNumVGPRs / MaxWavesPerEU, Granule))
return 0;
unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, AddrsableNumVGPRs);
if (WavesPerEU < MinWavesPerEU)
return getMinNumVGPRs(STI, MinWavesPerEU);
unsigned MaxNumVGPRsNext = alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule);
unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext);
return std::min(MinNumVGPRs, AddrsableNumVGPRs);
}
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {

View File

@@ -288,6 +288,11 @@ unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
/// execution unit requirement for given subtarget \p STI.
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
/// \returns Number of waves reachable for a given \p NumVGPRs usage for given
/// subtarget \p STI.
unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI,
unsigned NumVGPRs);
/// \returns Number of VGPR blocks needed for given subtarget \p STI when
/// \p NumVGPRs are used.
///

View File

@@ -40,7 +40,7 @@ define amdgpu_kernel void @limited_occupancy_18() #1 {
; GCN-LABEL: {{^}}limited_occupancy_19:
; GFX9: ; Occupancy: 10
; GFX1010: ; Occupancy: 18
; GFX1010: ; Occupancy: 20
; GFX1030: ; Occupancy: 16
; GFX1100: ; Occupancy: 16
define amdgpu_kernel void @limited_occupancy_19() #2 {

View File

@@ -0,0 +1,157 @@
//===--------- llvm/unittests/Target/AMDGPU/AMDGPUUnitTests.cpp -----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "AMDGPUUnitTests.h"
#include "AMDGPUTargetMachine.h"
#include "GCNSubtarget.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/TargetParser.h"
#include "llvm/Support/TargetSelect.h"
#include "gtest/gtest.h"
#include "AMDGPUGenSubtargetInfo.inc"
using namespace llvm;
std::once_flag flag;
void InitializeAMDGPUTarget() {
std::call_once(flag, []() {
LLVMInitializeAMDGPUTargetInfo();
LLVMInitializeAMDGPUTarget();
LLVMInitializeAMDGPUTargetMC();
});
}
std::unique_ptr<const GCNTargetMachine>
llvm::createAMDGPUTargetMachine(std::string TStr, StringRef CPU, StringRef FS) {
InitializeAMDGPUTarget();
std::string Error;
const Target *T = TargetRegistry::lookupTarget(TStr, Error);
if (!T)
return nullptr;
TargetOptions Options;
return std::unique_ptr<GCNTargetMachine>(
static_cast<GCNTargetMachine *>(T->createTargetMachine(
TStr, CPU, FS, Options, std::nullopt, std::nullopt)));
}
static cl::opt<bool> PrintCpuRegLimits(
"print-cpu-reg-limits", cl::NotHidden, cl::init(false),
cl::desc("force printing per AMDGPU CPU register limits"));
static bool checkMinMax(std::stringstream &OS, unsigned Occ, unsigned MinOcc,
unsigned MaxOcc,
std::function<unsigned(unsigned)> GetOcc,
std::function<unsigned(unsigned)> GetMinGPRs,
std::function<unsigned(unsigned)> GetMaxGPRs) {
bool MinValid = true, MaxValid = true, RangeValid = true;
unsigned MinGPRs = GetMinGPRs(Occ);
unsigned MaxGPRs = GetMaxGPRs(Occ);
unsigned RealOcc;
if (MinGPRs >= MaxGPRs)
RangeValid = false;
else {
RealOcc = GetOcc(MinGPRs);
for (unsigned NumRegs = MinGPRs + 1; NumRegs <= MaxGPRs; ++NumRegs) {
if (RealOcc != GetOcc(NumRegs)) {
RangeValid = false;
break;
}
}
}
if (RangeValid && RealOcc > MinOcc && RealOcc <= MaxOcc) {
if (MinGPRs > 0 && GetOcc(MinGPRs - 1) <= RealOcc)
MinValid = false;
if (GetOcc(MaxGPRs + 1) >= RealOcc)
MaxValid = false;
}
std::stringstream MinStr;
MinStr << (MinValid ? ' ' : '<') << ' ' << std::setw(3) << MinGPRs << " (O"
<< GetOcc(MinGPRs) << ") " << (RangeValid ? ' ' : 'R');
OS << std::left << std::setw(15) << MinStr.str() << std::setw(3) << MaxGPRs
<< " (O" << GetOcc(MaxGPRs) << ')' << (MaxValid ? "" : " >");
return MinValid && MaxValid && RangeValid;
}
static const std::pair<StringRef, StringRef>
EmptyFS = {"", ""},
W32FS = {"+wavefrontsize32", "w32"},
W64FS = {"+wavefrontsize64", "w64"};
static void testGPRLimits(
const char *RegName, bool TestW32W64,
std::function<bool(std::stringstream &, unsigned, GCNSubtarget &)> test) {
SmallVector<StringRef> CPUs;
AMDGPU::fillValidArchListAMDGCN(CPUs);
std::map<std::string, SmallVector<std::string>> TablePerCPUs;
for (auto CPUName : CPUs) {
auto CanonCPUName =
AMDGPU::getArchNameAMDGCN(AMDGPU::parseArchAMDGCN(CPUName));
auto *FS = &EmptyFS;
while (true) {
auto TM = createAMDGPUTargetMachine("amdgcn-amd-", CPUName, FS->first);
if (!TM)
break;
GCNSubtarget ST(TM->getTargetTriple(), std::string(TM->getTargetCPU()),
std::string(TM->getTargetFeatureString()), *TM);
if (TestW32W64 &&
ST.getFeatureBits().test(AMDGPU::FeatureWavefrontSize32))
FS = &W32FS;
std::stringstream Table;
bool Success = true;
unsigned MaxOcc = ST.getMaxWavesPerEU();
for (unsigned Occ = MaxOcc; Occ > 0; --Occ) {
Table << std::right << std::setw(3) << Occ << " ";
Success = test(Table, Occ, ST) && Success;
Table << '\n';
}
if (!Success || PrintCpuRegLimits)
TablePerCPUs[Table.str()].push_back((CanonCPUName + FS->second).str());
if (FS != &W32FS)
break;
FS = &W64FS;
}
}
std::stringstream OS;
for (auto &P : TablePerCPUs) {
for (auto &CPUName : P.second)
OS << ' ' << CPUName;
OS << ":\nOcc Min" << RegName << " Max" << RegName << '\n'
<< P.first << '\n';
}
auto ErrStr = OS.str();
EXPECT_TRUE(ErrStr.empty()) << ErrStr;
}
TEST(AMDGPU, TestVGPRLimitsPerOccupancy) {
testGPRLimits("VGPR", true, [](std::stringstream &OS, unsigned Occ,
GCNSubtarget &ST) {
unsigned MaxVGPRNum = ST.getAddressableNumVGPRs();
return checkMinMax(
OS, Occ, ST.getOccupancyWithNumVGPRs(MaxVGPRNum), ST.getMaxWavesPerEU(),
[&](unsigned NumGPRs) { return ST.getOccupancyWithNumVGPRs(NumGPRs); },
[&](unsigned Occ) { return ST.getMinNumVGPRs(Occ); },
[&](unsigned Occ) { return ST.getMaxNumVGPRs(Occ); });
});
}

View File

@@ -0,0 +1,25 @@
//===---------- llvm/unittests/Target/AMDGPU/AMDGPUUnitTests.h ------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_UNITTESTS_TARGET_AMDGPU_AMDGPUUNITTESTS_H
#define LLVM_UNITTESTS_TARGET_AMDGPU_AMDGPUUNITTESTS_H
#include <memory>
#include <string>
namespace llvm {
class GCNTargetMachine;
class StringRef;
std::unique_ptr<const GCNTargetMachine>
createAMDGPUTargetMachine(std::string TStr, StringRef CPU, StringRef FS);
} // end namespace llvm
#endif // LLVM_UNITTESTS_TARGET_AMDGPU_AMDGPUUNITTESTS_H

View File

@@ -7,6 +7,7 @@ set(LLVM_LINK_COMPONENTS
AMDGPUCodeGen
AMDGPUDesc
AMDGPUInfo
AMDGPUUtils
CodeGen
Core
MC
@@ -14,6 +15,7 @@ set(LLVM_LINK_COMPONENTS
)
add_llvm_target_unittest(AMDGPUTests
AMDGPUUnitTests.cpp
DwarfRegMappings.cpp
ExecMayBeModifiedBeforeAnyUse.cpp
)

View File

@@ -6,47 +6,16 @@
//
//===----------------------------------------------------------------------===//
#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Target/TargetMachine.h"
#include "AMDGPUUnitTests.h"
#include "gtest/gtest.h"
#include <thread>
using namespace llvm;
std::once_flag flag;
void InitializeAMDGPUTarget() {
std::call_once(flag, []() {
LLVMInitializeAMDGPUTargetInfo();
LLVMInitializeAMDGPUTarget();
LLVMInitializeAMDGPUTargetMC();
});
}
std::unique_ptr<const GCNTargetMachine>
createTargetMachine(std::string TStr, StringRef CPU, StringRef FS) {
InitializeAMDGPUTarget();
std::string Error;
const Target *T = TargetRegistry::lookupTarget(TStr, Error);
if (!T)
return nullptr;
TargetOptions Options;
return std::unique_ptr<GCNTargetMachine>(
static_cast<GCNTargetMachine *>(T->createTargetMachine(
TStr, CPU, FS, Options, std::nullopt, std::nullopt)));
}
TEST(AMDGPUDwarfRegMappingTests, TestWave64DwarfRegMapping) {
TEST(AMDGPU, TestWave64DwarfRegMapping) {
for (auto Triple :
{"amdgcn-amd-", "amdgcn-amd-amdhsa", "amdgcn-amd-amdpal"}) {
auto TM = createTargetMachine(Triple, "gfx1010", "+wavefrontsize64");
auto TM = createAMDGPUTargetMachine(Triple, "gfx1010", "+wavefrontsize64");
if (TM) {
GCNSubtarget ST(TM->getTargetTriple(), std::string(TM->getTargetCPU()),
std::string(TM->getTargetFeatureString()), *TM);
@@ -66,10 +35,10 @@ TEST(AMDGPUDwarfRegMappingTests, TestWave64DwarfRegMapping) {
}
}
TEST(AMDGPUDwarfRegMappingTests, TestWave32DwarfRegMapping) {
TEST(AMDGPU, TestWave32DwarfRegMapping) {
for (auto Triple :
{"amdgcn-amd-", "amdgcn-amd-amdhsa", "amdgcn-amd-amdpal"}) {
auto TM = createTargetMachine(Triple, "gfx1010", "+wavefrontsize32");
auto TM = createAMDGPUTargetMachine(Triple, "gfx1010", "+wavefrontsize32");
if (TM) {
GCNSubtarget ST(TM->getTargetTriple(), std::string(TM->getTargetCPU()),
std::string(TM->getTargetFeatureString()), *TM);

View File

@@ -7,25 +7,14 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUTargetMachine.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "AMDGPUUnitTests.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Target/TargetMachine.h"
#include "gtest/gtest.h"
#include <thread>
using namespace llvm;
// implementation is in the llvm/unittests/Target/AMDGPU/DwarfRegMappings.cpp
std::unique_ptr<const GCNTargetMachine>
createTargetMachine(std::string TStr, StringRef CPU, StringRef FS);
TEST(AMDGPUExecMayBeModifiedBeforeAnyUse, TheTest) {
auto TM = createTargetMachine("amdgcn-amd-", "gfx906", "");
TEST(AMDGPU, ExecMayBeModifiedBeforeAnyUse) {
auto TM = createAMDGPUTargetMachine("amdgcn-amd-", "gfx906", "");
if (!TM)
return;