AMDGPU/GlobalISel: Add skeletons for new register bank select passes (#112862)

New register bank select for AMDGPU will be split in two passes:
- AMDGPURegBankSelect: select banks based on machine uniformity analysis
- AMDGPURegBankLegalize: lower instructions that can't be inst-selected
  with register banks assigned by AMDGPURegBankSelect.
AMDGPURegBankLegalize is similar to legalizer but with context of
uniformity analysis. Does not change already assigned banks.
Main goal of AMDGPURegBankLegalize is to provide high level table-like
overview of how to lower generic instructions based on available target
features and uniformity info (uniform vs divergent).
See RegBankLegalizeRules.

Summary of new features:
At the moment register bank select assigns register bank to output
register using simple algorithm:
- one of the inputs is vgpr output is vgpr
- all inputs are sgpr output is sgpr.
When function does not contain divergent control flow propagating
register banks like this works. In general, first point is still correct
but second is not when function contains divergent control flow.
Examples:
- Phi with uniform inputs that go through divergent branch
- Instruction with temporal divergent use.
To fix this AMDGPURegBankSelect will use machine uniformity analysis
to assign vgpr to each divergent and sgpr to each uniform instruction.
But some instructions are only available on VALU (for example floating
point instructions before gfx1150) and we need to assign vgpr to them.
Since we are no longer propagating register banks we need to ensure that
uniform instructions get their inputs in sgpr in some way.
In AMDGPURegBankLegalize uniform instructions that are only available on
VALU will be reassigned to vgpr on all operands and read-any-lane vgpr
output to original sgpr output.
This commit is contained in:
Petar Avramovic
2024-12-03 22:02:00 +01:00
committed by GitHub
parent 9bf6365237
commit fef54d0393
11 changed files with 4048 additions and 2 deletions

View File

@@ -29,6 +29,9 @@ void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &);
FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone);
FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone);
void initializeAMDGPURegBankCombinerPass(PassRegistry &);
FunctionPass *createAMDGPUGlobalISelDivergenceLoweringPass();
FunctionPass *createAMDGPURegBankSelectPass();
FunctionPass *createAMDGPURegBankLegalizePass();
// SI Passes
FunctionPass *createGCNDPPCombinePass();
@@ -36,7 +39,6 @@ FunctionPass *createSIAnnotateControlFlowLegacyPass();
FunctionPass *createSIFoldOperandsLegacyPass();
FunctionPass *createSIPeepholeSDWALegacyPass();
FunctionPass *createSILowerI1CopiesLegacyPass();
FunctionPass *createAMDGPUGlobalISelDivergenceLoweringPass();
FunctionPass *createSIShrinkInstructionsLegacyPass();
FunctionPass *createSILoadStoreOptimizerLegacyPass();
FunctionPass *createSIWholeQuadModePass();
@@ -186,6 +188,12 @@ extern char &SILowerI1CopiesLegacyID;
void initializeAMDGPUGlobalISelDivergenceLoweringPass(PassRegistry &);
extern char &AMDGPUGlobalISelDivergenceLoweringID;
void initializeAMDGPURegBankSelectPass(PassRegistry &);
extern char &AMDGPURegBankSelectID;
void initializeAMDGPURegBankLegalizePass(PassRegistry &);
extern char &AMDGPURegBankLegalizeID;
void initializeAMDGPUMarkLastScratchLoadPass(PassRegistry &);
extern char &AMDGPUMarkLastScratchLoadID;

View File

@@ -0,0 +1,79 @@
//===-- AMDGPURegBankLegalize.cpp -----------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// Lower G_ instructions that can't be inst-selected with register bank
/// assignment from AMDGPURegBankSelect based on machine uniformity info.
/// Given types on all operands, some register bank assignments require lowering
/// while others do not.
/// Note: cases where all register bank assignments would require lowering are
/// lowered in legalizer.
/// For example vgpr S64 G_AND requires lowering to S32 while sgpr S64 does not.
/// Eliminate sgpr S1 by lowering to sgpr S32.
//
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/InitializePasses.h"
#define DEBUG_TYPE "amdgpu-regbanklegalize"
using namespace llvm;
namespace {
class AMDGPURegBankLegalize : public MachineFunctionPass {
public:
static char ID;
public:
AMDGPURegBankLegalize() : MachineFunctionPass(ID) {
initializeAMDGPURegBankLegalizePass(*PassRegistry::getPassRegistry());
}
bool runOnMachineFunction(MachineFunction &MF) override;
StringRef getPassName() const override {
return "AMDGPU Register Bank Legalize";
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
MachineFunctionPass::getAnalysisUsage(AU);
}
// If there were no phis and we do waterfall expansion machine verifier would
// fail.
MachineFunctionProperties getClearedProperties() const override {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::NoPHIs);
}
};
} // End anonymous namespace.
INITIALIZE_PASS_BEGIN(AMDGPURegBankLegalize, DEBUG_TYPE,
"AMDGPU Register Bank Legalize", false, false)
INITIALIZE_PASS_END(AMDGPURegBankLegalize, DEBUG_TYPE,
"AMDGPU Register Bank Legalize", false, false)
char AMDGPURegBankLegalize::ID = 0;
char &llvm::AMDGPURegBankLegalizeID = AMDGPURegBankLegalize::ID;
FunctionPass *llvm::createAMDGPURegBankLegalizePass() {
return new AMDGPURegBankLegalize();
}
using namespace AMDGPU;
bool AMDGPURegBankLegalize::runOnMachineFunction(MachineFunction &MF) {
if (MF.getProperties().hasProperty(
MachineFunctionProperties::Property::FailedISel))
return false;
return true;
}

View File

@@ -0,0 +1,74 @@
//===-- AMDGPURegBankSelect.cpp -------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// Assign register banks to all register operands of G_ instructions using
/// machine uniformity analysis.
/// Sgpr - uniform values and some lane masks
/// Vgpr - divergent, non S1, values
/// Vcc - divergent S1 values(lane masks)
/// However in some cases G_ instructions with this register bank assignment
/// can't be inst-selected. This is solved in AMDGPURegBankLegalize.
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/InitializePasses.h"
#define DEBUG_TYPE "amdgpu-regbankselect"
using namespace llvm;
namespace {
class AMDGPURegBankSelect : public MachineFunctionPass {
public:
static char ID;
AMDGPURegBankSelect() : MachineFunctionPass(ID) {
initializeAMDGPURegBankSelectPass(*PassRegistry::getPassRegistry());
}
bool runOnMachineFunction(MachineFunction &MF) override;
StringRef getPassName() const override {
return "AMDGPU Register Bank Select";
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
MachineFunctionPass::getAnalysisUsage(AU);
}
// This pass assigns register banks to all virtual registers, and we maintain
// this property in subsequent passes
MachineFunctionProperties getSetProperties() const override {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::RegBankSelected);
}
};
} // End anonymous namespace.
INITIALIZE_PASS_BEGIN(AMDGPURegBankSelect, DEBUG_TYPE,
"AMDGPU Register Bank Select", false, false)
INITIALIZE_PASS_END(AMDGPURegBankSelect, DEBUG_TYPE,
"AMDGPU Register Bank Select", false, false)
char AMDGPURegBankSelect::ID = 0;
char &llvm::AMDGPURegBankSelectID = AMDGPURegBankSelect::ID;
FunctionPass *llvm::createAMDGPURegBankSelectPass() {
return new AMDGPURegBankSelect();
}
bool AMDGPURegBankSelect::runOnMachineFunction(MachineFunction &MF) {
if (MF.getProperties().hasProperty(
MachineFunctionProperties::Property::FailedISel))
return false;
return true;
}

View File

@@ -448,6 +448,12 @@ static cl::opt<bool>
cl::desc("Enable AMDGPUAttributorPass"),
cl::init(true), cl::Hidden);
static cl::opt<bool> NewRegBankSelect(
"new-reg-bank-select",
cl::desc("Run amdgpu-regbankselect and amdgpu-regbanklegalize instead of "
"regbankselect"),
cl::init(false), cl::Hidden);
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
// Register the target
RegisterTargetMachine<R600TargetMachine> X(getTheR600Target());
@@ -464,6 +470,8 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeGCNDPPCombineLegacyPass(*PR);
initializeSILowerI1CopiesLegacyPass(*PR);
initializeAMDGPUGlobalISelDivergenceLoweringPass(*PR);
initializeAMDGPURegBankSelectPass(*PR);
initializeAMDGPURegBankLegalizePass(*PR);
initializeSILowerWWMCopiesPass(*PR);
initializeAMDGPUMarkLastScratchLoadPass(*PR);
initializeSILowerSGPRSpillsLegacyPass(*PR);
@@ -1385,7 +1393,12 @@ void GCNPassConfig::addPreRegBankSelect() {
}
bool GCNPassConfig::addRegBankSelect() {
addPass(new RegBankSelect());
if (NewRegBankSelect) {
addPass(createAMDGPURegBankSelectPass());
addPass(createAMDGPURegBankLegalizePass());
} else {
addPass(new RegBankSelect());
}
return false;
}

View File

@@ -92,6 +92,8 @@ add_llvm_target(AMDGPUCodeGen
AMDGPUPromoteAlloca.cpp
AMDGPUPromoteKernelArguments.cpp
AMDGPURegBankCombiner.cpp
AMDGPURegBankLegalize.cpp
AMDGPURegBankSelect.cpp
AMDGPURegisterBankInfo.cpp
AMDGPURemoveIncompatibleFunctions.cpp
AMDGPUReserveWWMRegs.cpp

View File

@@ -0,0 +1,858 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=none %s -verify-machineinstrs -o - | FileCheck %s
---
name: uniform_in_vgpr
legalized: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
; CHECK-LABEL: name: uniform_in_vgpr
; CHECK: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
; CHECK-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[COPY]](s32)
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[COPY1]]
; CHECK-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = COPY $vgpr0
%3:_(s32) = COPY $vgpr1
%4:_(p1) = G_MERGE_VALUES %2(s32), %3(s32)
%5:_(s32) = G_FPTOUI %0(s32)
%6:_(s32) = G_ADD %5, %1
G_STORE %6(s32), %4(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...
---
name: back_to_back_uniform_in_vgpr
legalized: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1
; CHECK-LABEL: name: back_to_back_uniform_in_vgpr
; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32)
; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]]
; CHECK-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD]](s32)
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[COPY2]]
; CHECK-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = COPY $sgpr2
%3:_(s32) = COPY $vgpr0
%4:_(s32) = COPY $vgpr1
%5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32)
%6:_(s32) = G_FADD %0, %1
%7:_(s32) = G_FPTOUI %6(s32)
%8:_(s32) = G_ADD %7, %2
G_STORE %8(s32), %5(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...
---
name: buffer_load_uniform
legalized: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
; CHECK-LABEL: name: buffer_load_uniform
; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr3
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr4
; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>)
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[C1]]
; CHECK-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = COPY $sgpr2
%3:_(s32) = COPY $sgpr3
%4:_(<4 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32), %2(s32), %3(s32)
%5:_(s32) = COPY $sgpr4
%6:_(s32) = COPY $vgpr0
%7:_(s32) = COPY $vgpr1
%8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32)
%9:_(s32) = G_CONSTANT i32 0
%10:_(<4 x s32>) = G_AMDGPU_BUFFER_LOAD %4(<4 x s32>), %9(s32), %5, %9, 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
%11:_(s32) = G_CONSTANT i32 1
%12:_(s32), %13:_(s32), %14:_(s32), %15:_(s32) = G_UNMERGE_VALUES %10(<4 x s32>)
%16:_(s32) = G_ADD %13, %11
G_STORE %16(s32), %8(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...
---
name: buffer_load_divergent
legalized: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2
; CHECK-LABEL: name: buffer_load_divergent
; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr3
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr2
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>)
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[C1]]
; CHECK-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = COPY $sgpr2
%3:_(s32) = COPY $sgpr3
%4:_(<4 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32), %2(s32), %3(s32)
%5:_(s32) = COPY $vgpr0
%6:_(s32) = COPY $vgpr1
%7:_(s32) = COPY $vgpr2
%8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32)
%9:_(s32) = G_CONSTANT i32 0
%10:_(<4 x s32>) = G_AMDGPU_BUFFER_LOAD %4(<4 x s32>), %9(s32), %5, %9, 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
%11:_(s32) = G_CONSTANT i32 1
%12:_(s32), %13:_(s32), %14:_(s32), %15:_(s32) = G_UNMERGE_VALUES %10(<4 x s32>)
%16:_(s32) = G_ADD %13, %11
G_STORE %16(s32), %8(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...
---
name: vgpr_and_i64
legalized: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; CHECK-LABEL: name: vgpr_and_i64
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
; CHECK-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[MV1]]
; CHECK-NEXT: G_STORE [[AND]](s64), [[MV2]](p1) :: (store (s64), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s64) = G_MERGE_VALUES %0(s32), %1(s32)
%3:_(s32) = COPY $vgpr2
%4:_(s32) = COPY $vgpr3
%5:_(s64) = G_MERGE_VALUES %3(s32), %4(s32)
%6:_(s32) = COPY $vgpr4
%7:_(s32) = COPY $vgpr5
%8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32)
%9:_(s64) = G_AND %2, %5
G_STORE %9(s64), %8(p1) :: (store (s64), addrspace 1)
S_ENDPGM 0
...
---
name: abs_sgpr_i16
legalized: true
body: |
bb.0:
liveins: $sgpr0, $vgpr0, $vgpr1
; CHECK-LABEL: name: abs_sgpr_i16
; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
; CHECK-NEXT: [[ABS:%[0-9]+]]:_(s16) = G_ABS [[TRUNC]]
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ABS]](s16)
; CHECK-NEXT: G_STORE [[ANYEXT]](s32), [[MV]](p1) :: (store (s16), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
%0:_(s32) = COPY $sgpr0
%1:_(s16) = G_TRUNC %0(s32)
%2:_(s32) = COPY $vgpr0
%3:_(s32) = COPY $vgpr1
%4:_(p1) = G_MERGE_VALUES %2(s32), %3(s32)
%5:_(s16) = G_ABS %1
%6:_(s32) = G_ANYEXT %5(s16)
G_STORE %6(s32), %4(p1) :: (store (s16), addrspace 1)
S_ENDPGM 0
...
---
name: uniform_i1_phi
legalized: true
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: uniform_i1_phi
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x30000000), %bb.2(0x50000000)
; CHECK-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr1
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY2]](s32), [[C]]
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY3]](s32), [[C1]]
; CHECK-NEXT: G_BRCOND [[ICMP1]](s1), %bb.2
; CHECK-NEXT: G_BR %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY2]](s32), [[C2]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[PHI]](s1)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C3]]
; CHECK-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1(0x30000000), %bb.2(0x50000000)
liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(p1) = G_MERGE_VALUES %0(s32), %1(s32)
%3:_(s32) = COPY $sgpr0
%4:_(s32) = COPY $sgpr1
%5:_(s32) = G_CONSTANT i32 6
%6:_(s1) = G_ICMP intpred(uge), %3(s32), %5
%7:_(s32) = G_CONSTANT i32 0
%8:_(s1) = G_ICMP intpred(ne), %4(s32), %7
G_BRCOND %8(s1), %bb.2
G_BR %bb.1
bb.1:
successors: %bb.2(0x80000000)
%9:_(s32) = G_CONSTANT i32 1
%10:_(s1) = G_ICMP intpred(ult), %3(s32), %9
bb.2:
%11:_(s1) = G_PHI %6(s1), %bb.0, %10(s1), %bb.1
%12:_(s32) = G_SEXT %11(s1)
%13:_(s32) = G_CONSTANT i32 2
%14:_(s32) = G_ADD %12, %13
G_STORE %14(s32), %2(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...
---
name: vcc_to_scc
legalized: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1
; CHECK-LABEL: name: vcc_to_scc
; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[C]]
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[COPY1]], [[COPY2]]
; CHECK-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = COPY $sgpr2
%3:_(s32) = COPY $vgpr0
%4:_(s32) = COPY $vgpr1
%5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32)
%6:_(s32) = G_FCONSTANT float 0.000000e+00
%7:_(s1) = G_FCMP floatpred(oeq), %0(s32), %6
%8:_(s32) = G_SELECT %7(s1), %1, %2
G_STORE %8(s32), %5(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...
---
name: scc_to_vcc
legalized: true
body: |
bb.0:
liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3
; CHECK-LABEL: name: scc_to_vcc
; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr3
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[COPY1]], [[COPY2]]
; CHECK-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $vgpr0
%2:_(s32) = COPY $vgpr1
%3:_(s32) = COPY $vgpr2
%4:_(s32) = COPY $vgpr3
%5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32)
%6:_(s32) = G_CONSTANT i32 0
%7:_(s1) = G_ICMP intpred(eq), %0(s32), %6
%8:_(s32) = G_SELECT %7(s1), %1, %2
G_STORE %8(s32), %5(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...
---
name: vgpr_to_vcc_trunc
legalized: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
; CHECK-LABEL: name: vgpr_to_vcc_trunc
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32)
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32)
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[COPY1]], [[COPY2]]
; CHECK-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $vgpr2
%3:_(s32) = COPY $vgpr3
%4:_(s32) = COPY $vgpr4
%5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32)
%6:_(s1) = G_TRUNC %0(s32)
%7:_(s32) = G_SELECT %6(s1), %1, %2
G_STORE %7(s32), %5(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...
---
name: zext
legalized: true
body: |
bb.0:
liveins: $sgpr0, $vgpr0, $vgpr1
; CHECK-LABEL: name: zext
; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1)
; CHECK-NEXT: G_STORE [[ZEXT]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $vgpr0
%2:_(s32) = COPY $vgpr1
%3:_(p1) = G_MERGE_VALUES %1(s32), %2(s32)
%4:_(s32) = G_CONSTANT i32 10
%5:_(s1) = G_ICMP intpred(eq), %0(s32), %4
%6:_(s32) = G_ZEXT %5(s1)
G_STORE %6(s32), %3(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...
---
name: sext
legalized: true
body: |
bb.0:
liveins: $sgpr0, $vgpr0, $vgpr1
; CHECK-LABEL: name: sext
; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
; CHECK-NEXT: G_STORE [[SEXT]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $vgpr0
%2:_(s32) = COPY $vgpr1
%3:_(p1) = G_MERGE_VALUES %1(s32), %2(s32)
%4:_(s32) = G_CONSTANT i32 10
%5:_(s1) = G_ICMP intpred(eq), %0(s32), %4
%6:_(s32) = G_SEXT %5(s1)
G_STORE %6(s32), %3(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...
---
name: and_i1_vcc
legalized: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
; CHECK-LABEL: name: and_i1_vcc
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY]](s32), [[C]]
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY1]](s32), [[C1]]
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP]], [[ICMP1]]
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[COPY]], [[COPY1]]
; CHECK-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $vgpr2
%3:_(s32) = COPY $vgpr3
%4:_(p1) = G_MERGE_VALUES %2(s32), %3(s32)
%5:_(s32) = G_CONSTANT i32 10
%6:_(s1) = G_ICMP intpred(uge), %0(s32), %5
%7:_(s32) = G_CONSTANT i32 20
%8:_(s1) = G_ICMP intpred(uge), %1(s32), %7
%9:_(s1) = G_AND %6, %8
%10:_(s32) = G_SELECT %9(s1), %0, %1
G_STORE %10(s32), %4(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...
---
name: and_i1_scc
legalized: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
; CHECK-LABEL: name: and_i1_scc
; CHECK: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY]](s32), [[C]]
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY1]](s32), [[C1]]
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP]], [[ICMP1]]
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[COPY]], [[COPY1]]
; CHECK-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = COPY $vgpr0
%3:_(s32) = COPY $vgpr1
%4:_(p1) = G_MERGE_VALUES %2(s32), %3(s32)
%5:_(s32) = G_CONSTANT i32 10
%6:_(s1) = G_ICMP intpred(uge), %0(s32), %5
%7:_(s32) = G_CONSTANT i32 20
%8:_(s1) = G_ICMP intpred(uge), %1(s32), %7
%9:_(s1) = G_AND %6, %8
%10:_(s32) = G_SELECT %9(s1), %0, %1
G_STORE %10(s32), %4(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...
---
name: divergent_phi_with_uniform_inputs
legalized: true
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: divergent_phi_with_uniform_inputs
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: G_BR %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, [[C1]](s32), %bb.1
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32)
; CHECK-NEXT: G_STORE [[PHI]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1(0x40000000), %bb.2(0x40000000)
liveins: $vgpr0, $vgpr1, $vgpr2
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $vgpr2
%3:_(p1) = G_MERGE_VALUES %1(s32), %2(s32)
%4:_(s32) = G_CONSTANT i32 0
%5:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), %0(s32), %4
%6:sreg_32_xm0_xexec(s32) = SI_IF %5(s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.1
bb.1:
successors: %bb.2(0x80000000)
%7:_(s32) = G_CONSTANT i32 1
bb.2:
%8:_(s32) = G_PHI %4(s32), %bb.0, %7(s32), %bb.1
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %6(s32)
G_STORE %8(s32), %3(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...
---
name: divergent_because_of_temporal_divergent_use
legalized: true
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: divergent_because_of_temporal_divergent_use
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0
; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C2]]
; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[ADD]](s32)
; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]]
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI]](s32)
; CHECK-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: G_BR %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.1
; CHECK-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.1
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s32)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[PHI2]], [[C3]]
; CHECK-NEXT: G_STORE [[MUL]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1(0x80000000)
liveins: $vgpr0, $vgpr1, $vgpr2
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $vgpr2
%3:_(p1) = G_MERGE_VALUES %1(s32), %2(s32)
%4:_(s32) = G_CONSTANT i32 -1
%5:_(s32) = G_CONSTANT i32 0
bb.1:
successors: %bb.2(0x04000000), %bb.1(0x7c000000)
%6:_(s32) = G_PHI %7(s32), %bb.1, %5(s32), %bb.0
%8:_(s32) = G_PHI %4(s32), %bb.0, %9(s32), %bb.1
%10:_(s32) = G_CONSTANT i32 1
%9:_(s32) = G_ADD %8, %10
%11:_(s32) = G_UITOFP %9(s32)
%12:_(s1) = G_FCMP floatpred(ogt), %11(s32), %0
%7:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %12(s1), %6(s32)
SI_LOOP %7(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.2
bb.2:
%13:_(s32) = G_PHI %9(s32), %bb.1
%14:_(s32) = G_PHI %7(s32), %bb.1
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %14(s32)
%15:_(s32) = G_CONSTANT i32 10
%16:_(s32) = G_MUL %13, %15
G_STORE %16(s32), %3(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...
---
name: loop_with_2breaks
legalized: true
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: loop_with_2breaks
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
; CHECK-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %13(s1), %bb.3
; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %15(s32), %bb.3, [[C]](s32), %bb.0
; CHECK-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.3
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI2]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C1]](s32)
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C2]]
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1)
; CHECK-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc
; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY7]](s1), implicit-def $scc
; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: G_BR %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C4]](s32)
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV2]], [[SHL1]](s64)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32), addrspace 1)
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD1]](s32), [[C5]]
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C6]](s1)
; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[COPY9]](s1)
; CHECK-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: G_BR %bb.4
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, %43(s1), %bb.5
; CHECK-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI %44(s32), %bb.5, [[DEF]](s32), %bb.1
; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1)
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY11]](s1), [[PHI1]](s32)
; CHECK-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: G_BR %bb.6
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C7]](s32)
; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL2]](s64)
; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s32), addrspace 1)
; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD2]], [[C8]]
; CHECK-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD2]](p1) :: (store (s32), addrspace 1)
; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C8]]
; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 100
; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI2]](s32), [[C9]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1)
; CHECK-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc
; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc
; CHECK-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[COPY9]](s1), %bb.2, [[S_OR_B32_1]](s1), %bb.4
; CHECK-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.4, [[DEF]](s32), %bb.2
; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1)
; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[COPY13]](s1)
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF1]](s32)
; CHECK-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc
; CHECK-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc
; CHECK-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
; CHECK-NEXT: G_BR %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.6:
; CHECK-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.3
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32)
; CHECK-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1(0x80000000)
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(p1) = G_MERGE_VALUES %0(s32), %1(s32)
%3:_(s32) = COPY $vgpr2
%4:_(s32) = COPY $vgpr3
%5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32)
%6:_(s32) = COPY $vgpr4
%7:_(s32) = COPY $vgpr5
%8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32)
%9:_(s32) = G_IMPLICIT_DEF
%10:_(s32) = G_CONSTANT i32 0
%11:sreg_32(s1) = IMPLICIT_DEF
bb.1:
successors: %bb.2(0x40000000), %bb.3(0x40000000)
%12:sreg_32(s1) = PHI %11(s1), %bb.0, %13(s1), %bb.3
%14:_(s32) = G_PHI %15(s32), %bb.3, %10(s32), %bb.0
%16:_(s32) = G_PHI %10(s32), %bb.0, %17(s32), %bb.3
%18:sreg_32(s1) = COPY %12(s1)
%19:_(s64) = G_SEXT %16(s32)
%20:_(s32) = G_CONSTANT i32 2
%21:_(s64) = G_SHL %19, %20(s32)
%22:_(p1) = G_PTR_ADD %5, %21(s64)
%23:_(s32) = G_LOAD %22(p1) :: (load (s32), addrspace 1)
%24:_(s32) = G_CONSTANT i32 0
%25:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %23(s32), %24
%26:_(s1) = G_CONSTANT i1 true
%27:sreg_32(s1) = COPY %26(s1)
%28:sreg_32(s1) = S_ANDN2_B32 %18(s1), $exec_lo, implicit-def $scc
%29:sreg_32(s1) = S_AND_B32 $exec_lo, %27(s1), implicit-def $scc
%30:sreg_32(s1) = S_OR_B32 %28(s1), %29(s1), implicit-def $scc
%31:sreg_32(s1) = COPY %30(s1)
%32:sreg_32_xm0_xexec(s32) = SI_IF %25(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.2
bb.2:
successors: %bb.4(0x40000000), %bb.5(0x40000000)
%33:_(s32) = G_CONSTANT i32 2
%34:_(s64) = G_SHL %19, %33(s32)
%35:_(p1) = G_PTR_ADD %8, %34(s64)
%36:_(s32) = G_LOAD %35(p1) :: (load (s32), addrspace 1)
%37:_(s32) = G_CONSTANT i32 0
%38:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %36(s32), %37
%39:_(s1) = G_CONSTANT i1 true
%40:sreg_32(s1) = COPY %39(s1)
%41:sreg_32(s1) = COPY %40(s1)
%42:sreg_32_xm0_xexec(s32) = SI_IF %38(s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.4
bb.3:
successors: %bb.6(0x04000000), %bb.1(0x7c000000)
%13:sreg_32(s1) = PHI %30(s1), %bb.1, %43(s1), %bb.5
%17:_(s32) = G_PHI %44(s32), %bb.5, %9(s32), %bb.1
%45:sreg_32(s1) = COPY %13(s1)
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %32(s32)
%15:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %45(s1), %14(s32)
SI_LOOP %15(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.6
bb.4:
successors: %bb.5(0x80000000)
%46:_(s32) = G_CONSTANT i32 2
%47:_(s64) = G_SHL %19, %46(s32)
%48:_(p1) = G_PTR_ADD %2, %47(s64)
%49:_(s32) = G_LOAD %48(p1) :: (load (s32), addrspace 1)
%50:_(s32) = G_CONSTANT i32 1
%51:_(s32) = G_ADD %49, %50
G_STORE %51(s32), %48(p1) :: (store (s32), addrspace 1)
%52:_(s32) = G_ADD %16, %50
%53:_(s32) = G_CONSTANT i32 100
%54:_(s1) = G_ICMP intpred(ult), %16(s32), %53
%55:sreg_32(s1) = COPY %54(s1)
%56:sreg_32(s1) = S_ANDN2_B32 %41(s1), $exec_lo, implicit-def $scc
%57:sreg_32(s1) = S_AND_B32 $exec_lo, %55(s1), implicit-def $scc
%58:sreg_32(s1) = S_OR_B32 %56(s1), %57(s1), implicit-def $scc
bb.5:
successors: %bb.3(0x80000000)
%59:sreg_32(s1) = PHI %40(s1), %bb.2, %58(s1), %bb.4
%44:_(s32) = G_PHI %52(s32), %bb.4, %9(s32), %bb.2
%60:sreg_32(s1) = COPY %59(s1)
%61:sreg_32(s1) = COPY %60(s1)
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %42(s32)
%62:sreg_32(s1) = S_ANDN2_B32 %31(s1), $exec_lo, implicit-def $scc
%63:sreg_32(s1) = S_AND_B32 $exec_lo, %61(s1), implicit-def $scc
%43:sreg_32(s1) = S_OR_B32 %62(s1), %63(s1), implicit-def $scc
G_BR %bb.3
bb.6:
%64:_(s32) = G_PHI %15(s32), %bb.3
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %64(s32)
S_ENDPGM 0
...

View File

@@ -0,0 +1,858 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=none %s -verify-machineinstrs -o - | FileCheck %s
---
name: uniform_in_vgpr
legalized: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
; CHECK-LABEL: name: uniform_in_vgpr
; CHECK: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
; CHECK-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[COPY]](s32)
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[COPY1]]
; CHECK-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = COPY $vgpr0
%3:_(s32) = COPY $vgpr1
%4:_(p1) = G_MERGE_VALUES %2(s32), %3(s32)
%5:_(s32) = G_FPTOUI %0(s32)
%6:_(s32) = G_ADD %5, %1
G_STORE %6(s32), %4(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...
---
name: back_to_back_uniform_in_vgpr
legalized: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1
; CHECK-LABEL: name: back_to_back_uniform_in_vgpr
; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32)
; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]]
; CHECK-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD]](s32)
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[COPY2]]
; CHECK-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = COPY $sgpr2
%3:_(s32) = COPY $vgpr0
%4:_(s32) = COPY $vgpr1
%5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32)
%6:_(s32) = G_FADD %0, %1
%7:_(s32) = G_FPTOUI %6(s32)
%8:_(s32) = G_ADD %7, %2
G_STORE %8(s32), %5(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...
---
name: buffer_load_uniform
legalized: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
; CHECK-LABEL: name: buffer_load_uniform
; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr3
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr4
; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>)
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[C1]]
; CHECK-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = COPY $sgpr2
%3:_(s32) = COPY $sgpr3
%4:_(<4 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32), %2(s32), %3(s32)
%5:_(s32) = COPY $sgpr4
%6:_(s32) = COPY $vgpr0
%7:_(s32) = COPY $vgpr1
%8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32)
%9:_(s32) = G_CONSTANT i32 0
%10:_(<4 x s32>) = G_AMDGPU_BUFFER_LOAD %4(<4 x s32>), %9(s32), %5, %9, 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
%11:_(s32) = G_CONSTANT i32 1
%12:_(s32), %13:_(s32), %14:_(s32), %15:_(s32) = G_UNMERGE_VALUES %10(<4 x s32>)
%16:_(s32) = G_ADD %13, %11
G_STORE %16(s32), %8(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...
---
name: buffer_load_divergent
legalized: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2
; CHECK-LABEL: name: buffer_load_divergent
; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr3
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr2
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>)
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[C1]]
; CHECK-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = COPY $sgpr2
%3:_(s32) = COPY $sgpr3
%4:_(<4 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32), %2(s32), %3(s32)
%5:_(s32) = COPY $vgpr0
%6:_(s32) = COPY $vgpr1
%7:_(s32) = COPY $vgpr2
%8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32)
%9:_(s32) = G_CONSTANT i32 0
%10:_(<4 x s32>) = G_AMDGPU_BUFFER_LOAD %4(<4 x s32>), %9(s32), %5, %9, 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
%11:_(s32) = G_CONSTANT i32 1
%12:_(s32), %13:_(s32), %14:_(s32), %15:_(s32) = G_UNMERGE_VALUES %10(<4 x s32>)
%16:_(s32) = G_ADD %13, %11
G_STORE %16(s32), %8(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...
---
name: vgpr_and_i64
legalized: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; CHECK-LABEL: name: vgpr_and_i64
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
; CHECK-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[MV1]]
; CHECK-NEXT: G_STORE [[AND]](s64), [[MV2]](p1) :: (store (s64), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s64) = G_MERGE_VALUES %0(s32), %1(s32)
%3:_(s32) = COPY $vgpr2
%4:_(s32) = COPY $vgpr3
%5:_(s64) = G_MERGE_VALUES %3(s32), %4(s32)
%6:_(s32) = COPY $vgpr4
%7:_(s32) = COPY $vgpr5
%8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32)
%9:_(s64) = G_AND %2, %5
G_STORE %9(s64), %8(p1) :: (store (s64), addrspace 1)
S_ENDPGM 0
...
---
name: abs_sgpr_i16
legalized: true
body: |
bb.0:
liveins: $sgpr0, $vgpr0, $vgpr1
; CHECK-LABEL: name: abs_sgpr_i16
; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
; CHECK-NEXT: [[ABS:%[0-9]+]]:_(s16) = G_ABS [[TRUNC]]
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ABS]](s16)
; CHECK-NEXT: G_STORE [[ANYEXT]](s32), [[MV]](p1) :: (store (s16), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
%0:_(s32) = COPY $sgpr0
%1:_(s16) = G_TRUNC %0(s32)
%2:_(s32) = COPY $vgpr0
%3:_(s32) = COPY $vgpr1
%4:_(p1) = G_MERGE_VALUES %2(s32), %3(s32)
%5:_(s16) = G_ABS %1
%6:_(s32) = G_ANYEXT %5(s16)
G_STORE %6(s32), %4(p1) :: (store (s16), addrspace 1)
S_ENDPGM 0
...
---
name: uniform_i1_phi
legalized: true
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: uniform_i1_phi
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x30000000), %bb.2(0x50000000)
; CHECK-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr1
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY2]](s32), [[C]]
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY3]](s32), [[C1]]
; CHECK-NEXT: G_BRCOND [[ICMP1]](s1), %bb.2
; CHECK-NEXT: G_BR %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY2]](s32), [[C2]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[PHI]](s1)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C3]]
; CHECK-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1(0x30000000), %bb.2(0x50000000)
liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(p1) = G_MERGE_VALUES %0(s32), %1(s32)
%3:_(s32) = COPY $sgpr0
%4:_(s32) = COPY $sgpr1
%5:_(s32) = G_CONSTANT i32 6
%6:_(s1) = G_ICMP intpred(uge), %3(s32), %5
%7:_(s32) = G_CONSTANT i32 0
%8:_(s1) = G_ICMP intpred(ne), %4(s32), %7
G_BRCOND %8(s1), %bb.2
G_BR %bb.1
bb.1:
successors: %bb.2(0x80000000)
%9:_(s32) = G_CONSTANT i32 1
%10:_(s1) = G_ICMP intpred(ult), %3(s32), %9
bb.2:
%11:_(s1) = G_PHI %6(s1), %bb.0, %10(s1), %bb.1
%12:_(s32) = G_SEXT %11(s1)
%13:_(s32) = G_CONSTANT i32 2
%14:_(s32) = G_ADD %12, %13
G_STORE %14(s32), %2(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...
---
name: vcc_to_scc
legalized: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1
; CHECK-LABEL: name: vcc_to_scc
; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[C]]
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[COPY1]], [[COPY2]]
; CHECK-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = COPY $sgpr2
%3:_(s32) = COPY $vgpr0
%4:_(s32) = COPY $vgpr1
%5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32)
%6:_(s32) = G_FCONSTANT float 0.000000e+00
%7:_(s1) = G_FCMP floatpred(oeq), %0(s32), %6
%8:_(s32) = G_SELECT %7(s1), %1, %2
G_STORE %8(s32), %5(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...
---
name: scc_to_vcc
legalized: true
body: |
bb.0:
liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3
; CHECK-LABEL: name: scc_to_vcc
; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr3
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[COPY1]], [[COPY2]]
; CHECK-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $vgpr0
%2:_(s32) = COPY $vgpr1
%3:_(s32) = COPY $vgpr2
%4:_(s32) = COPY $vgpr3
%5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32)
%6:_(s32) = G_CONSTANT i32 0
%7:_(s1) = G_ICMP intpred(eq), %0(s32), %6
%8:_(s32) = G_SELECT %7(s1), %1, %2
G_STORE %8(s32), %5(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...
---
name: vgpr_to_vcc_trunc
legalized: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
; CHECK-LABEL: name: vgpr_to_vcc_trunc
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32)
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32)
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[COPY1]], [[COPY2]]
; CHECK-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $vgpr2
%3:_(s32) = COPY $vgpr3
%4:_(s32) = COPY $vgpr4
%5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32)
%6:_(s1) = G_TRUNC %0(s32)
%7:_(s32) = G_SELECT %6(s1), %1, %2
G_STORE %7(s32), %5(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...
---
name: zext
legalized: true
body: |
bb.0:
liveins: $sgpr0, $vgpr0, $vgpr1
; CHECK-LABEL: name: zext
; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1)
; CHECK-NEXT: G_STORE [[ZEXT]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $vgpr0
%2:_(s32) = COPY $vgpr1
%3:_(p1) = G_MERGE_VALUES %1(s32), %2(s32)
%4:_(s32) = G_CONSTANT i32 10
%5:_(s1) = G_ICMP intpred(eq), %0(s32), %4
%6:_(s32) = G_ZEXT %5(s1)
G_STORE %6(s32), %3(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...
---
name: sext
legalized: true
body: |
bb.0:
liveins: $sgpr0, $vgpr0, $vgpr1
; CHECK-LABEL: name: sext
; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1)
; CHECK-NEXT: G_STORE [[SEXT]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $vgpr0
%2:_(s32) = COPY $vgpr1
%3:_(p1) = G_MERGE_VALUES %1(s32), %2(s32)
%4:_(s32) = G_CONSTANT i32 10
%5:_(s1) = G_ICMP intpred(eq), %0(s32), %4
%6:_(s32) = G_SEXT %5(s1)
G_STORE %6(s32), %3(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...
---
name: and_i1_vcc
legalized: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
; CHECK-LABEL: name: and_i1_vcc
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY]](s32), [[C]]
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY1]](s32), [[C1]]
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP]], [[ICMP1]]
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[COPY]], [[COPY1]]
; CHECK-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $vgpr2
%3:_(s32) = COPY $vgpr3
%4:_(p1) = G_MERGE_VALUES %2(s32), %3(s32)
%5:_(s32) = G_CONSTANT i32 10
%6:_(s1) = G_ICMP intpred(uge), %0(s32), %5
%7:_(s32) = G_CONSTANT i32 20
%8:_(s1) = G_ICMP intpred(uge), %1(s32), %7
%9:_(s1) = G_AND %6, %8
%10:_(s32) = G_SELECT %9(s1), %0, %1
G_STORE %10(s32), %4(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...
---
name: and_i1_scc
legalized: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
; CHECK-LABEL: name: and_i1_scc
; CHECK: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY]](s32), [[C]]
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY1]](s32), [[C1]]
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP]], [[ICMP1]]
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[COPY]], [[COPY1]]
; CHECK-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = COPY $vgpr0
%3:_(s32) = COPY $vgpr1
%4:_(p1) = G_MERGE_VALUES %2(s32), %3(s32)
%5:_(s32) = G_CONSTANT i32 10
%6:_(s1) = G_ICMP intpred(uge), %0(s32), %5
%7:_(s32) = G_CONSTANT i32 20
%8:_(s1) = G_ICMP intpred(uge), %1(s32), %7
%9:_(s1) = G_AND %6, %8
%10:_(s32) = G_SELECT %9(s1), %0, %1
G_STORE %10(s32), %4(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...
---
name: divergent_phi_with_uniform_inputs
legalized: true
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: divergent_phi_with_uniform_inputs
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: G_BR %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, [[C1]](s32), %bb.1
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32)
; CHECK-NEXT: G_STORE [[PHI]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1(0x40000000), %bb.2(0x40000000)
liveins: $vgpr0, $vgpr1, $vgpr2
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $vgpr2
%3:_(p1) = G_MERGE_VALUES %1(s32), %2(s32)
%4:_(s32) = G_CONSTANT i32 0
%5:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), %0(s32), %4
%6:sreg_32_xm0_xexec(s32) = SI_IF %5(s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.1
bb.1:
successors: %bb.2(0x80000000)
%7:_(s32) = G_CONSTANT i32 1
bb.2:
%8:_(s32) = G_PHI %4(s32), %bb.0, %7(s32), %bb.1
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %6(s32)
G_STORE %8(s32), %3(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...
---
name: divergent_because_of_temporal_divergent_use
legalized: true
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: divergent_because_of_temporal_divergent_use
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0
; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C2]]
; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[ADD]](s32)
; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]]
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI]](s32)
; CHECK-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: G_BR %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.1
; CHECK-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.1
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s32)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[PHI2]], [[C3]]
; CHECK-NEXT: G_STORE [[MUL]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1(0x80000000)
liveins: $vgpr0, $vgpr1, $vgpr2
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $vgpr2
%3:_(p1) = G_MERGE_VALUES %1(s32), %2(s32)
%4:_(s32) = G_CONSTANT i32 -1
%5:_(s32) = G_CONSTANT i32 0
bb.1:
successors: %bb.2(0x04000000), %bb.1(0x7c000000)
%6:_(s32) = G_PHI %7(s32), %bb.1, %5(s32), %bb.0
%8:_(s32) = G_PHI %4(s32), %bb.0, %9(s32), %bb.1
%10:_(s32) = G_CONSTANT i32 1
%9:_(s32) = G_ADD %8, %10
%11:_(s32) = G_UITOFP %9(s32)
%12:_(s1) = G_FCMP floatpred(ogt), %11(s32), %0
%7:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %12(s1), %6(s32)
SI_LOOP %7(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.2
bb.2:
%13:_(s32) = G_PHI %9(s32), %bb.1
%14:_(s32) = G_PHI %7(s32), %bb.1
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %14(s32)
%15:_(s32) = G_CONSTANT i32 10
%16:_(s32) = G_MUL %13, %15
G_STORE %16(s32), %3(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...
---
name: loop_with_2breaks
legalized: true
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: loop_with_2breaks
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
; CHECK-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %13(s1), %bb.3
; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %15(s32), %bb.3, [[C]](s32), %bb.0
; CHECK-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.3
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1)
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI2]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C1]](s32)
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C2]]
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1)
; CHECK-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc
; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY7]](s1), implicit-def $scc
; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: G_BR %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C4]](s32)
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV2]], [[SHL1]](s64)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32), addrspace 1)
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD1]](s32), [[C5]]
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C6]](s1)
; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[COPY9]](s1)
; CHECK-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: G_BR %bb.4
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, %43(s1), %bb.5
; CHECK-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI %44(s32), %bb.5, [[DEF]](s32), %bb.1
; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1)
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY11]](s1), [[PHI1]](s32)
; CHECK-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: G_BR %bb.6
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C7]](s32)
; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL2]](s64)
; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s32), addrspace 1)
; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD2]], [[C8]]
; CHECK-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD2]](p1) :: (store (s32), addrspace 1)
; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C8]]
; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 100
; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI2]](s32), [[C9]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1)
; CHECK-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc
; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc
; CHECK-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[COPY9]](s1), %bb.2, [[S_OR_B32_1]](s1), %bb.4
; CHECK-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.4, [[DEF]](s32), %bb.2
; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1)
; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[COPY13]](s1)
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF1]](s32)
; CHECK-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc
; CHECK-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc
; CHECK-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc
; CHECK-NEXT: G_BR %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.6:
; CHECK-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.3
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32)
; CHECK-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1(0x80000000)
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(p1) = G_MERGE_VALUES %0(s32), %1(s32)
%3:_(s32) = COPY $vgpr2
%4:_(s32) = COPY $vgpr3
%5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32)
%6:_(s32) = COPY $vgpr4
%7:_(s32) = COPY $vgpr5
%8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32)
%9:_(s32) = G_IMPLICIT_DEF
%10:_(s32) = G_CONSTANT i32 0
%11:sreg_32(s1) = IMPLICIT_DEF
bb.1:
successors: %bb.2(0x40000000), %bb.3(0x40000000)
%12:sreg_32(s1) = PHI %11(s1), %bb.0, %13(s1), %bb.3
%14:_(s32) = G_PHI %15(s32), %bb.3, %10(s32), %bb.0
%16:_(s32) = G_PHI %10(s32), %bb.0, %17(s32), %bb.3
%18:sreg_32(s1) = COPY %12(s1)
%19:_(s64) = G_SEXT %16(s32)
%20:_(s32) = G_CONSTANT i32 2
%21:_(s64) = G_SHL %19, %20(s32)
%22:_(p1) = G_PTR_ADD %5, %21(s64)
%23:_(s32) = G_LOAD %22(p1) :: (load (s32), addrspace 1)
%24:_(s32) = G_CONSTANT i32 0
%25:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %23(s32), %24
%26:_(s1) = G_CONSTANT i1 true
%27:sreg_32(s1) = COPY %26(s1)
%28:sreg_32(s1) = S_ANDN2_B32 %18(s1), $exec_lo, implicit-def $scc
%29:sreg_32(s1) = S_AND_B32 $exec_lo, %27(s1), implicit-def $scc
%30:sreg_32(s1) = S_OR_B32 %28(s1), %29(s1), implicit-def $scc
%31:sreg_32(s1) = COPY %30(s1)
%32:sreg_32_xm0_xexec(s32) = SI_IF %25(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.2
bb.2:
successors: %bb.4(0x40000000), %bb.5(0x40000000)
%33:_(s32) = G_CONSTANT i32 2
%34:_(s64) = G_SHL %19, %33(s32)
%35:_(p1) = G_PTR_ADD %8, %34(s64)
%36:_(s32) = G_LOAD %35(p1) :: (load (s32), addrspace 1)
%37:_(s32) = G_CONSTANT i32 0
%38:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %36(s32), %37
%39:_(s1) = G_CONSTANT i1 true
%40:sreg_32(s1) = COPY %39(s1)
%41:sreg_32(s1) = COPY %40(s1)
%42:sreg_32_xm0_xexec(s32) = SI_IF %38(s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.4
bb.3:
successors: %bb.6(0x04000000), %bb.1(0x7c000000)
%13:sreg_32(s1) = PHI %30(s1), %bb.1, %43(s1), %bb.5
%17:_(s32) = G_PHI %44(s32), %bb.5, %9(s32), %bb.1
%45:sreg_32(s1) = COPY %13(s1)
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %32(s32)
%15:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %45(s1), %14(s32)
SI_LOOP %15(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.6
bb.4:
successors: %bb.5(0x80000000)
%46:_(s32) = G_CONSTANT i32 2
%47:_(s64) = G_SHL %19, %46(s32)
%48:_(p1) = G_PTR_ADD %2, %47(s64)
%49:_(s32) = G_LOAD %48(p1) :: (load (s32), addrspace 1)
%50:_(s32) = G_CONSTANT i32 1
%51:_(s32) = G_ADD %49, %50
G_STORE %51(s32), %48(p1) :: (store (s32), addrspace 1)
%52:_(s32) = G_ADD %16, %50
%53:_(s32) = G_CONSTANT i32 100
%54:_(s1) = G_ICMP intpred(ult), %16(s32), %53
%55:sreg_32(s1) = COPY %54(s1)
%56:sreg_32(s1) = S_ANDN2_B32 %41(s1), $exec_lo, implicit-def $scc
%57:sreg_32(s1) = S_AND_B32 $exec_lo, %55(s1), implicit-def $scc
%58:sreg_32(s1) = S_OR_B32 %56(s1), %57(s1), implicit-def $scc
bb.5:
successors: %bb.3(0x80000000)
%59:sreg_32(s1) = PHI %40(s1), %bb.2, %58(s1), %bb.4
%44:_(s32) = G_PHI %52(s32), %bb.4, %9(s32), %bb.2
%60:sreg_32(s1) = COPY %59(s1)
%61:sreg_32(s1) = COPY %60(s1)
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %42(s32)
%62:sreg_32(s1) = S_ANDN2_B32 %31(s1), $exec_lo, implicit-def $scc
%63:sreg_32(s1) = S_AND_B32 $exec_lo, %61(s1), implicit-def $scc
%43:sreg_32(s1) = S_OR_B32 %62(s1), %63(s1), implicit-def $scc
G_BR %bb.3
bb.6:
%64:_(s32) = G_PHI %15(s32), %bb.3
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %64(s32)
S_ENDPGM 0
...

View File

@@ -0,0 +1,50 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=OLD_RBS_GFX10 %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=OLD_RBS_GFX12 %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=NEW_RBS_GFX10 %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=NEW_RBS_GFX12 %s
define amdgpu_ps void @salu_float(float inreg %a, float inreg %b, i32 inreg %c, ptr addrspace(1) %ptr) {
; OLD_RBS_GFX10-LABEL: salu_float:
; OLD_RBS_GFX10: ; %bb.0:
; OLD_RBS_GFX10-NEXT: v_add_f32_e64 v2, s0, s1
; OLD_RBS_GFX10-NEXT: v_cvt_u32_f32_e32 v2, v2
; OLD_RBS_GFX10-NEXT: v_add_nc_u32_e32 v2, s2, v2
; OLD_RBS_GFX10-NEXT: global_store_dword v[0:1], v2, off
; OLD_RBS_GFX10-NEXT: s_endpgm
;
; OLD_RBS_GFX12-LABEL: salu_float:
; OLD_RBS_GFX12: ; %bb.0:
; OLD_RBS_GFX12-NEXT: s_add_f32 s0, s0, s1
; OLD_RBS_GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_3)
; OLD_RBS_GFX12-NEXT: s_cvt_u32_f32 s0, s0
; OLD_RBS_GFX12-NEXT: s_add_co_i32 s0, s0, s2
; OLD_RBS_GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; OLD_RBS_GFX12-NEXT: v_mov_b32_e32 v2, s0
; OLD_RBS_GFX12-NEXT: global_store_b32 v[0:1], v2, off
; OLD_RBS_GFX12-NEXT: s_endpgm
;
; NEW_RBS_GFX10-LABEL: salu_float:
; NEW_RBS_GFX10: ; %bb.0:
; NEW_RBS_GFX10-NEXT: v_add_f32_e64 v2, s0, s1
; NEW_RBS_GFX10-NEXT: v_cvt_u32_f32_e32 v2, v2
; NEW_RBS_GFX10-NEXT: v_add_nc_u32_e32 v2, s2, v2
; NEW_RBS_GFX10-NEXT: global_store_dword v[0:1], v2, off
; NEW_RBS_GFX10-NEXT: s_endpgm
;
; NEW_RBS_GFX12-LABEL: salu_float:
; NEW_RBS_GFX12: ; %bb.0:
; NEW_RBS_GFX12-NEXT: s_add_f32 s0, s0, s1
; NEW_RBS_GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_3)
; NEW_RBS_GFX12-NEXT: s_cvt_u32_f32 s0, s0
; NEW_RBS_GFX12-NEXT: s_add_co_i32 s0, s0, s2
; NEW_RBS_GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; NEW_RBS_GFX12-NEXT: v_mov_b32_e32 v2, s0
; NEW_RBS_GFX12-NEXT: global_store_b32 v[0:1], v2, off
; NEW_RBS_GFX12-NEXT: s_endpgm
%add = fadd float %a, %b
%add.i32 = fptoui float %add to i32
%res = add i32 %add.i32, %c
store i32 %res, ptr addrspace(1) %ptr
ret void
}

View File

@@ -0,0 +1,92 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=regbankselect %s -o - | FileCheck %s -check-prefixes=OLD_RBS_GFX10
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=regbankselect %s -o - | FileCheck %s -check-prefixes=OLD_RBS_GFX12
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=regbankselect %s -o - | FileCheck %s -check-prefixes=NEW_RBS_GFX10
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=regbankselect %s -o - | FileCheck %s -check-prefixes=NEW_RBS_GFX12
---
name: salu_float
legalized: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1
; OLD_RBS_GFX10-LABEL: name: salu_float
; OLD_RBS_GFX10: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1
; OLD_RBS_GFX10-NEXT: {{ $}}
; OLD_RBS_GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; OLD_RBS_GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; OLD_RBS_GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; OLD_RBS_GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; OLD_RBS_GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; OLD_RBS_GFX10-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32)
; OLD_RBS_GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; OLD_RBS_GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; OLD_RBS_GFX10-NEXT: [[FADD:%[0-9]+]]:vgpr(s32) = G_FADD [[COPY5]], [[COPY6]]
; OLD_RBS_GFX10-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(s32) = G_FPTOUI [[FADD]](s32)
; OLD_RBS_GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
; OLD_RBS_GFX10-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[FPTOUI]], [[COPY7]]
; OLD_RBS_GFX10-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; OLD_RBS_GFX10-NEXT: S_ENDPGM 0
;
; OLD_RBS_GFX12-LABEL: name: salu_float
; OLD_RBS_GFX12: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1
; OLD_RBS_GFX12-NEXT: {{ $}}
; OLD_RBS_GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; OLD_RBS_GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; OLD_RBS_GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; OLD_RBS_GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; OLD_RBS_GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; OLD_RBS_GFX12-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32)
; OLD_RBS_GFX12-NEXT: [[FADD:%[0-9]+]]:sgpr(s32) = G_FADD [[COPY]], [[COPY1]]
; OLD_RBS_GFX12-NEXT: [[FPTOUI:%[0-9]+]]:sgpr(s32) = G_FPTOUI [[FADD]](s32)
; OLD_RBS_GFX12-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[FPTOUI]], [[COPY2]]
; OLD_RBS_GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32)
; OLD_RBS_GFX12-NEXT: G_STORE [[COPY5]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; OLD_RBS_GFX12-NEXT: S_ENDPGM 0
;
; NEW_RBS_GFX10-LABEL: name: salu_float
; NEW_RBS_GFX10: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1
; NEW_RBS_GFX10-NEXT: {{ $}}
; NEW_RBS_GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; NEW_RBS_GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; NEW_RBS_GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; NEW_RBS_GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; NEW_RBS_GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; NEW_RBS_GFX10-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32)
; NEW_RBS_GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; NEW_RBS_GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; NEW_RBS_GFX10-NEXT: [[FADD:%[0-9]+]]:vgpr(s32) = G_FADD [[COPY5]], [[COPY6]]
; NEW_RBS_GFX10-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(s32) = G_FPTOUI [[FADD]](s32)
; NEW_RBS_GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
; NEW_RBS_GFX10-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[FPTOUI]], [[COPY7]]
; NEW_RBS_GFX10-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; NEW_RBS_GFX10-NEXT: S_ENDPGM 0
;
; NEW_RBS_GFX12-LABEL: name: salu_float
; NEW_RBS_GFX12: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1
; NEW_RBS_GFX12-NEXT: {{ $}}
; NEW_RBS_GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; NEW_RBS_GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; NEW_RBS_GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; NEW_RBS_GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; NEW_RBS_GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; NEW_RBS_GFX12-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32)
; NEW_RBS_GFX12-NEXT: [[FADD:%[0-9]+]]:sgpr(s32) = G_FADD [[COPY]], [[COPY1]]
; NEW_RBS_GFX12-NEXT: [[FPTOUI:%[0-9]+]]:sgpr(s32) = G_FPTOUI [[FADD]](s32)
; NEW_RBS_GFX12-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[FPTOUI]], [[COPY2]]
; NEW_RBS_GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32)
; NEW_RBS_GFX12-NEXT: G_STORE [[COPY5]](s32), [[MV]](p1) :: (store (s32), addrspace 1)
; NEW_RBS_GFX12-NEXT: S_ENDPGM 0
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = COPY $sgpr2
%3:_(s32) = COPY $vgpr0
%4:_(s32) = COPY $vgpr1
%5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32)
%6:_(s32) = G_FADD %0, %1
%7:_(s32) = G_FPTOUI %6(s32)
%8:_(s32) = G_ADD %7, %2
G_STORE %8(s32), %5(p1) :: (store (s32), addrspace 1)
S_ENDPGM 0
...

View File

@@ -0,0 +1,635 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=OLD_RBS %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=NEW_RBS %s
; if instruction is uniform and there is available instruction, select SALU instruction
define amdgpu_ps void @uniform_in_vgpr(float inreg %a, i32 inreg %b, ptr addrspace(1) %ptr) {
; OLD_RBS-LABEL: uniform_in_vgpr:
; OLD_RBS: ; %bb.0:
; OLD_RBS-NEXT: v_cvt_u32_f32_e32 v2, s0
; OLD_RBS-NEXT: v_add_nc_u32_e32 v2, s1, v2
; OLD_RBS-NEXT: global_store_dword v[0:1], v2, off
; OLD_RBS-NEXT: s_endpgm
;
; NEW_RBS-LABEL: uniform_in_vgpr:
; NEW_RBS: ; %bb.0:
; NEW_RBS-NEXT: v_cvt_u32_f32_e32 v2, s0
; NEW_RBS-NEXT: v_add_nc_u32_e32 v2, s1, v2
; NEW_RBS-NEXT: global_store_dword v[0:1], v2, off
; NEW_RBS-NEXT: s_endpgm
%a.i32 = fptoui float %a to i32
%res = add i32 %a.i32, %b
store i32 %res, ptr addrspace(1) %ptr
ret void
}
; copy sgpr to vgpr + readfirstlane vgpr to sgpr combine from rb-legalize
define amdgpu_ps void @back_to_back_uniform_in_vgpr(float inreg %a, float inreg %b, i32 inreg %c, ptr addrspace(1) %ptr) {
; OLD_RBS-LABEL: back_to_back_uniform_in_vgpr:
; OLD_RBS: ; %bb.0:
; OLD_RBS-NEXT: v_add_f32_e64 v2, s0, s1
; OLD_RBS-NEXT: v_cvt_u32_f32_e32 v2, v2
; OLD_RBS-NEXT: v_add_nc_u32_e32 v2, s2, v2
; OLD_RBS-NEXT: global_store_dword v[0:1], v2, off
; OLD_RBS-NEXT: s_endpgm
;
; NEW_RBS-LABEL: back_to_back_uniform_in_vgpr:
; NEW_RBS: ; %bb.0:
; NEW_RBS-NEXT: v_add_f32_e64 v2, s0, s1
; NEW_RBS-NEXT: v_cvt_u32_f32_e32 v2, v2
; NEW_RBS-NEXT: v_add_nc_u32_e32 v2, s2, v2
; NEW_RBS-NEXT: global_store_dword v[0:1], v2, off
; NEW_RBS-NEXT: s_endpgm
%add = fadd float %a, %b
%add.i32 = fptoui float %add to i32
%res = add i32 %add.i32, %c
store i32 %res, ptr addrspace(1) %ptr
ret void
}
; fast rules for vector instructions
define amdgpu_cs void @buffer_load_uniform(<4 x i32> inreg %rsrc, i32 inreg %voffset, ptr addrspace(1) %ptr) {
; OLD_RBS-LABEL: buffer_load_uniform:
; OLD_RBS: ; %bb.0: ; %.entry
; OLD_RBS-NEXT: v_mov_b32_e32 v2, s4
; OLD_RBS-NEXT: buffer_load_dwordx4 v[2:5], v2, s[0:3], 0 offen
; OLD_RBS-NEXT: s_waitcnt vmcnt(0)
; OLD_RBS-NEXT: v_add_nc_u32_e32 v2, 1, v3
; OLD_RBS-NEXT: global_store_dword v[0:1], v2, off
; OLD_RBS-NEXT: s_endpgm
;
; NEW_RBS-LABEL: buffer_load_uniform:
; NEW_RBS: ; %bb.0: ; %.entry
; NEW_RBS-NEXT: v_mov_b32_e32 v2, s4
; NEW_RBS-NEXT: buffer_load_dwordx4 v[2:5], v2, s[0:3], 0 offen
; NEW_RBS-NEXT: s_waitcnt vmcnt(0)
; NEW_RBS-NEXT: v_add_nc_u32_e32 v2, 1, v3
; NEW_RBS-NEXT: global_store_dword v[0:1], v2, off
; NEW_RBS-NEXT: s_endpgm
.entry:
%vec = call <4 x i32> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %voffset, i32 0, i32 0)
%el1 = extractelement <4 x i32> %vec, i64 1
%res = add i32 %el1, 1
store i32 %res, ptr addrspace(1) %ptr
ret void
}
define amdgpu_cs void @buffer_load_divergent(<4 x i32> inreg %rsrc, i32 %voffset, ptr addrspace(1) %ptr) {
; OLD_RBS-LABEL: buffer_load_divergent:
; OLD_RBS: ; %bb.0: ; %.entry
; OLD_RBS-NEXT: buffer_load_dwordx4 v[3:6], v0, s[0:3], 0 offen
; OLD_RBS-NEXT: s_waitcnt vmcnt(0)
; OLD_RBS-NEXT: v_add_nc_u32_e32 v0, 1, v4
; OLD_RBS-NEXT: global_store_dword v[1:2], v0, off
; OLD_RBS-NEXT: s_endpgm
;
; NEW_RBS-LABEL: buffer_load_divergent:
; NEW_RBS: ; %bb.0: ; %.entry
; NEW_RBS-NEXT: buffer_load_dwordx4 v[3:6], v0, s[0:3], 0 offen
; NEW_RBS-NEXT: s_waitcnt vmcnt(0)
; NEW_RBS-NEXT: v_add_nc_u32_e32 v0, 1, v4
; NEW_RBS-NEXT: global_store_dword v[1:2], v0, off
; NEW_RBS-NEXT: s_endpgm
.entry:
%vec = call <4 x i32> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %voffset, i32 0, i32 0)
%el1 = extractelement <4 x i32> %vec, i64 1
%res = add i32 %el1, 1
store i32 %res, ptr addrspace(1) %ptr
ret void
}
;lowering in rb-legalize (sgpr S64 is legal, vgpr has to be split to S32)
define amdgpu_ps void @vgpr_and_i64(i64 %a, i64 %b, ptr addrspace(1) %ptr) {
; OLD_RBS-LABEL: vgpr_and_i64:
; OLD_RBS: ; %bb.0:
; OLD_RBS-NEXT: v_and_b32_e32 v0, v0, v2
; OLD_RBS-NEXT: v_and_b32_e32 v1, v1, v3
; OLD_RBS-NEXT: global_store_dwordx2 v[4:5], v[0:1], off
; OLD_RBS-NEXT: s_endpgm
;
; NEW_RBS-LABEL: vgpr_and_i64:
; NEW_RBS: ; %bb.0:
; NEW_RBS-NEXT: v_and_b32_e32 v0, v0, v2
; NEW_RBS-NEXT: v_and_b32_e32 v1, v1, v3
; NEW_RBS-NEXT: global_store_dwordx2 v[4:5], v[0:1], off
; NEW_RBS-NEXT: s_endpgm
%res = and i64 %a, %b
store i64 %res, ptr addrspace(1) %ptr
ret void
}
; It is up to user instruction to deal with potential truncated bits in reg.
; Here G_ABS needs to sign extend S16 in reg to S32 and then do S32 G_ABS.
define amdgpu_ps void @abs_sgpr_i16(i16 inreg %arg, ptr addrspace(1) %ptr) {
; OLD_RBS-LABEL: abs_sgpr_i16:
; OLD_RBS: ; %bb.0:
; OLD_RBS-NEXT: s_sext_i32_i16 s0, s0
; OLD_RBS-NEXT: s_abs_i32 s0, s0
; OLD_RBS-NEXT: v_mov_b32_e32 v2, s0
; OLD_RBS-NEXT: global_store_short v[0:1], v2, off
; OLD_RBS-NEXT: s_endpgm
;
; NEW_RBS-LABEL: abs_sgpr_i16:
; NEW_RBS: ; %bb.0:
; NEW_RBS-NEXT: s_sext_i32_i16 s0, s0
; NEW_RBS-NEXT: s_abs_i32 s0, s0
; NEW_RBS-NEXT: v_mov_b32_e32 v2, s0
; NEW_RBS-NEXT: global_store_short v[0:1], v2, off
; NEW_RBS-NEXT: s_endpgm
%res = call i16 @llvm.abs.i16(i16 %arg, i1 false)
store i16 %res, ptr addrspace(1) %ptr
ret void
}
define amdgpu_ps void @uniform_i1_phi(ptr addrspace(1) %out, i32 inreg %tid, i32 inreg %cond) {
; OLD_RBS-LABEL: uniform_i1_phi:
; OLD_RBS: ; %bb.0: ; %A
; OLD_RBS-NEXT: s_cmp_ge_u32 s0, 6
; OLD_RBS-NEXT: s_cselect_b32 s2, 1, 0
; OLD_RBS-NEXT: s_cmp_lg_u32 s1, 0
; OLD_RBS-NEXT: s_cbranch_scc1 .LBB6_2
; OLD_RBS-NEXT: ; %bb.1: ; %B
; OLD_RBS-NEXT: s_cmp_lt_u32 s0, 1
; OLD_RBS-NEXT: s_cselect_b32 s2, 1, 0
; OLD_RBS-NEXT: .LBB6_2: ; %exit
; OLD_RBS-NEXT: s_bfe_i32 s0, s2, 0x10000
; OLD_RBS-NEXT: s_add_i32 s0, s0, 2
; OLD_RBS-NEXT: v_mov_b32_e32 v2, s0
; OLD_RBS-NEXT: global_store_dword v[0:1], v2, off
; OLD_RBS-NEXT: s_endpgm
;
; NEW_RBS-LABEL: uniform_i1_phi:
; NEW_RBS: ; %bb.0: ; %A
; NEW_RBS-NEXT: s_cmp_ge_u32 s0, 6
; NEW_RBS-NEXT: s_cselect_b32 s2, 1, 0
; NEW_RBS-NEXT: s_cmp_lg_u32 s1, 0
; NEW_RBS-NEXT: s_cbranch_scc1 .LBB6_2
; NEW_RBS-NEXT: ; %bb.1: ; %B
; NEW_RBS-NEXT: s_cmp_lt_u32 s0, 1
; NEW_RBS-NEXT: s_cselect_b32 s2, 1, 0
; NEW_RBS-NEXT: .LBB6_2: ; %exit
; NEW_RBS-NEXT: s_bfe_i32 s0, s2, 0x10000
; NEW_RBS-NEXT: s_add_i32 s0, s0, 2
; NEW_RBS-NEXT: v_mov_b32_e32 v2, s0
; NEW_RBS-NEXT: global_store_dword v[0:1], v2, off
; NEW_RBS-NEXT: s_endpgm
A:
%val_A = icmp uge i32 %tid, 6
%cmp = icmp eq i32 %cond, 0
br i1 %cmp, label %B, label %exit
B:
%val_B = icmp ult i32 %tid, 1
br label %exit
exit:
%phi = phi i1 [ %val_A, %A ], [ %val_B, %B ]
%sel = select i1 %phi, i32 1, i32 2
store i32 %sel, ptr addrspace(1) %out
ret void
}
; this is kind of i1 readfirstlane
; uniform i1 result on instruction that is only available on VALU
define amdgpu_ps void @vcc_to_scc(float inreg %a, i32 inreg %b, i32 inreg %c, ptr addrspace(1) %ptr) {
; OLD_RBS-LABEL: vcc_to_scc:
; OLD_RBS: ; %bb.0:
; OLD_RBS-NEXT: v_mov_b32_e32 v2, s2
; OLD_RBS-NEXT: v_cmp_eq_f32_e64 s0, s0, 0
; OLD_RBS-NEXT: v_cndmask_b32_e64 v2, v2, s1, s0
; OLD_RBS-NEXT: global_store_dword v[0:1], v2, off
; OLD_RBS-NEXT: s_endpgm
;
; NEW_RBS-LABEL: vcc_to_scc:
; NEW_RBS: ; %bb.0:
; NEW_RBS-NEXT: v_mov_b32_e32 v2, s2
; NEW_RBS-NEXT: v_cmp_eq_f32_e64 s0, s0, 0
; NEW_RBS-NEXT: v_cndmask_b32_e64 v2, v2, s1, s0
; NEW_RBS-NEXT: global_store_dword v[0:1], v2, off
; NEW_RBS-NEXT: s_endpgm
%vcc_to_scc = fcmp oeq float %a, 0.0
%select = select i1 %vcc_to_scc, i32 %b, i32 %c
store i32 %select, ptr addrspace(1) %ptr
ret void
}
; combiner in rb-legalize recognizes sgpr S1 to vcc copy
define amdgpu_ps void @scc_to_vcc(i32 inreg %a, i32 %b, i32 %c, ptr addrspace(1) %ptr) {
; OLD_RBS-LABEL: scc_to_vcc:
; OLD_RBS: ; %bb.0:
; OLD_RBS-NEXT: s_cmp_eq_u32 s0, 0
; OLD_RBS-NEXT: s_cselect_b32 s0, 1, 0
; OLD_RBS-NEXT: s_and_b32 s0, 1, s0
; OLD_RBS-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0
; OLD_RBS-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; OLD_RBS-NEXT: global_store_dword v[2:3], v0, off
; OLD_RBS-NEXT: s_endpgm
;
; NEW_RBS-LABEL: scc_to_vcc:
; NEW_RBS: ; %bb.0:
; NEW_RBS-NEXT: s_cmp_eq_u32 s0, 0
; NEW_RBS-NEXT: s_cselect_b32 s0, 1, 0
; NEW_RBS-NEXT: s_and_b32 s0, 1, s0
; NEW_RBS-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0
; NEW_RBS-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; NEW_RBS-NEXT: global_store_dword v[2:3], v0, off
; NEW_RBS-NEXT: s_endpgm
%scc_to_vcc = icmp eq i32 %a, 0
%select = select i1 %scc_to_vcc, i32 %b, i32 %c
store i32 %select, ptr addrspace(1) %ptr
ret void
}
; this is only G_TRUNC that is not no-op in global-isel for AMDGPU
define amdgpu_ps void @vgpr_to_vcc_trunc(i32 %a, i32 %b, i32 %c, ptr addrspace(1) %ptr) {
; OLD_RBS-LABEL: vgpr_to_vcc_trunc:
; OLD_RBS: ; %bb.0:
; OLD_RBS-NEXT: v_and_b32_e32 v0, 1, v0
; OLD_RBS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
; OLD_RBS-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
; OLD_RBS-NEXT: global_store_dword v[3:4], v0, off
; OLD_RBS-NEXT: s_endpgm
;
; NEW_RBS-LABEL: vgpr_to_vcc_trunc:
; NEW_RBS: ; %bb.0:
; NEW_RBS-NEXT: v_and_b32_e32 v0, 1, v0
; NEW_RBS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
; NEW_RBS-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
; NEW_RBS-NEXT: global_store_dword v[3:4], v0, off
; NEW_RBS-NEXT: s_endpgm
%vcc = trunc i32 %a to i1
%select = select i1 %vcc, i32 %b, i32 %c
store i32 %select, ptr addrspace(1) %ptr
ret void
}
; i1 input to zext and sext is something that survived legalizer (not trunc)
; lower to select
define amdgpu_ps void @zext(i32 inreg %a, ptr addrspace(1) %ptr) {
; OLD_RBS-LABEL: zext:
; OLD_RBS: ; %bb.0:
; OLD_RBS-NEXT: s_cmp_eq_u32 s0, 10
; OLD_RBS-NEXT: s_cselect_b32 s0, 1, 0
; OLD_RBS-NEXT: v_mov_b32_e32 v2, s0
; OLD_RBS-NEXT: global_store_dword v[0:1], v2, off
; OLD_RBS-NEXT: s_endpgm
;
; NEW_RBS-LABEL: zext:
; NEW_RBS: ; %bb.0:
; NEW_RBS-NEXT: s_cmp_eq_u32 s0, 10
; NEW_RBS-NEXT: s_cselect_b32 s0, 1, 0
; NEW_RBS-NEXT: v_mov_b32_e32 v2, s0
; NEW_RBS-NEXT: global_store_dword v[0:1], v2, off
; NEW_RBS-NEXT: s_endpgm
%bool = icmp eq i32 %a, 10
%zext = zext i1 %bool to i32
store i32 %zext, ptr addrspace(1) %ptr
ret void
}
define amdgpu_ps void @sext(i32 inreg %a, ptr addrspace(1) %ptr) {
; OLD_RBS-LABEL: sext:
; OLD_RBS: ; %bb.0:
; OLD_RBS-NEXT: s_cmp_eq_u32 s0, 10
; OLD_RBS-NEXT: s_cselect_b32 s0, 1, 0
; OLD_RBS-NEXT: s_bfe_i32 s0, s0, 0x10000
; OLD_RBS-NEXT: v_mov_b32_e32 v2, s0
; OLD_RBS-NEXT: global_store_dword v[0:1], v2, off
; OLD_RBS-NEXT: s_endpgm
;
; NEW_RBS-LABEL: sext:
; NEW_RBS: ; %bb.0:
; NEW_RBS-NEXT: s_cmp_eq_u32 s0, 10
; NEW_RBS-NEXT: s_cselect_b32 s0, 1, 0
; NEW_RBS-NEXT: s_bfe_i32 s0, s0, 0x10000
; NEW_RBS-NEXT: v_mov_b32_e32 v2, s0
; NEW_RBS-NEXT: global_store_dword v[0:1], v2, off
; NEW_RBS-NEXT: s_endpgm
%bool = icmp eq i32 %a, 10
%sext = sext i1 %bool to i32
store i32 %sext, ptr addrspace(1) %ptr
ret void
}
; divergent i1 bitwise, i1 vcc.
; inst selected into s_and_b32 on wave32 or s_and_b64 on wave64.
define amdgpu_ps void @and_i1_vcc(i32 %a, i32 %b, ptr addrspace(1) %ptr) {
; OLD_RBS-LABEL: and_i1_vcc:
; OLD_RBS: ; %bb.0:
; OLD_RBS-NEXT: v_cmp_le_u32_e32 vcc_lo, 10, v0
; OLD_RBS-NEXT: v_cmp_le_u32_e64 s0, 20, v1
; OLD_RBS-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; OLD_RBS-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; OLD_RBS-NEXT: global_store_dword v[2:3], v0, off
; OLD_RBS-NEXT: s_endpgm
;
; NEW_RBS-LABEL: and_i1_vcc:
; NEW_RBS: ; %bb.0:
; NEW_RBS-NEXT: v_cmp_le_u32_e32 vcc_lo, 10, v0
; NEW_RBS-NEXT: v_cmp_le_u32_e64 s0, 20, v1
; NEW_RBS-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; NEW_RBS-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; NEW_RBS-NEXT: global_store_dword v[2:3], v0, off
; NEW_RBS-NEXT: s_endpgm
%cmp_a = icmp uge i32 %a, 10
%cmp_b = icmp uge i32 %b, 20
%cc = and i1 %cmp_a, %cmp_b
%res = select i1 %cc, i32 %a, i32 %b
store i32 %res, ptr addrspace(1) %ptr
ret void
}
; uniform i1 bitwise, i32 sgpr. inst selected into s_and_b32.
define amdgpu_ps void @and_i1_scc(i32 inreg %a, i32 inreg %b, ptr addrspace(1) %ptr) {
; OLD_RBS-LABEL: and_i1_scc:
; OLD_RBS: ; %bb.0:
; OLD_RBS-NEXT: s_cmp_ge_u32 s0, 10
; OLD_RBS-NEXT: s_cselect_b32 s2, 1, 0
; OLD_RBS-NEXT: s_cmp_ge_u32 s1, 20
; OLD_RBS-NEXT: s_cselect_b32 s3, 1, 0
; OLD_RBS-NEXT: s_and_b32 s2, s2, s3
; OLD_RBS-NEXT: s_and_b32 s2, s2, 1
; OLD_RBS-NEXT: s_cmp_lg_u32 s2, 0
; OLD_RBS-NEXT: s_cselect_b32 s0, s0, s1
; OLD_RBS-NEXT: v_mov_b32_e32 v2, s0
; OLD_RBS-NEXT: global_store_dword v[0:1], v2, off
; OLD_RBS-NEXT: s_endpgm
;
; NEW_RBS-LABEL: and_i1_scc:
; NEW_RBS: ; %bb.0:
; NEW_RBS-NEXT: s_cmp_ge_u32 s0, 10
; NEW_RBS-NEXT: s_cselect_b32 s2, 1, 0
; NEW_RBS-NEXT: s_cmp_ge_u32 s1, 20
; NEW_RBS-NEXT: s_cselect_b32 s3, 1, 0
; NEW_RBS-NEXT: s_and_b32 s2, s2, s3
; NEW_RBS-NEXT: s_and_b32 s2, s2, 1
; NEW_RBS-NEXT: s_cmp_lg_u32 s2, 0
; NEW_RBS-NEXT: s_cselect_b32 s0, s0, s1
; NEW_RBS-NEXT: v_mov_b32_e32 v2, s0
; NEW_RBS-NEXT: global_store_dword v[0:1], v2, off
; NEW_RBS-NEXT: s_endpgm
%cmp_a = icmp uge i32 %a, 10
%cmp_b = icmp uge i32 %b, 20
%cc = and i1 %cmp_a, %cmp_b
%res = select i1 %cc, i32 %a, i32 %b
store i32 %res, ptr addrspace(1) %ptr
ret void
}
; old RBS selects sgpr phi because it had sgpr inputs.
define amdgpu_ps void @divergent_phi_with_uniform_inputs(i32 %a, ptr addrspace(1) %out) {
; OLD_RBS-LABEL: divergent_phi_with_uniform_inputs:
; OLD_RBS: ; %bb.0: ; %A
; OLD_RBS-NEXT: s_mov_b32 s0, 0
; OLD_RBS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; OLD_RBS-NEXT: s_and_saveexec_b32 s1, vcc_lo
; OLD_RBS-NEXT: ; %bb.1: ; %B
; OLD_RBS-NEXT: s_mov_b32 s0, 1
; OLD_RBS-NEXT: ; %bb.2: ; %exit
; OLD_RBS-NEXT: s_or_b32 exec_lo, exec_lo, s1
; OLD_RBS-NEXT: v_mov_b32_e32 v0, s0
; OLD_RBS-NEXT: global_store_dword v[1:2], v0, off
; OLD_RBS-NEXT: s_endpgm
;
; NEW_RBS-LABEL: divergent_phi_with_uniform_inputs:
; NEW_RBS: ; %bb.0: ; %A
; NEW_RBS-NEXT: s_mov_b32 s0, 0
; NEW_RBS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; NEW_RBS-NEXT: s_and_saveexec_b32 s1, vcc_lo
; NEW_RBS-NEXT: ; %bb.1: ; %B
; NEW_RBS-NEXT: s_mov_b32 s0, 1
; NEW_RBS-NEXT: ; %bb.2: ; %exit
; NEW_RBS-NEXT: s_or_b32 exec_lo, exec_lo, s1
; NEW_RBS-NEXT: v_mov_b32_e32 v0, s0
; NEW_RBS-NEXT: global_store_dword v[1:2], v0, off
; NEW_RBS-NEXT: s_endpgm
A:
%cmp = icmp eq i32 %a, 0
br i1 %cmp, label %B, label %exit
B:
br label %exit
exit:
%phi = phi i32 [ 0, %A ], [ 1, %B ]
store i32 %phi, ptr addrspace(1) %out
ret void
}
; old RBS assigned vgpr to uniform phi (because one input had undetermined bank)
; and it propagated to mul, which was not wrong.
; new RBS assigns vgpr to destination of mul even though both inputs are sgpr.
; TODO: implement temporal divergence lowering
define amdgpu_ps void @divergent_because_of_temporal_divergent_use(float %val, ptr addrspace(1) %addr) {
; OLD_RBS-LABEL: divergent_because_of_temporal_divergent_use:
; OLD_RBS: ; %bb.0: ; %entry
; OLD_RBS-NEXT: s_mov_b32 s0, -1
; OLD_RBS-NEXT: v_mov_b32_e32 v3, s0
; OLD_RBS-NEXT: s_mov_b32 s0, 0
; OLD_RBS-NEXT: .LBB15_1: ; %loop
; OLD_RBS-NEXT: ; =>This Inner Loop Header: Depth=1
; OLD_RBS-NEXT: v_add_nc_u32_e32 v3, 1, v3
; OLD_RBS-NEXT: v_cvt_f32_u32_e32 v4, v3
; OLD_RBS-NEXT: v_cmp_gt_f32_e32 vcc_lo, v4, v0
; OLD_RBS-NEXT: s_or_b32 s0, vcc_lo, s0
; OLD_RBS-NEXT: s_andn2_b32 exec_lo, exec_lo, s0
; OLD_RBS-NEXT: s_cbranch_execnz .LBB15_1
; OLD_RBS-NEXT: ; %bb.2: ; %exit
; OLD_RBS-NEXT: s_or_b32 exec_lo, exec_lo, s0
; OLD_RBS-NEXT: v_mul_lo_u32 v0, v3, 10
; OLD_RBS-NEXT: global_store_dword v[1:2], v0, off
; OLD_RBS-NEXT: s_endpgm
;
; NEW_RBS-LABEL: divergent_because_of_temporal_divergent_use:
; NEW_RBS: ; %bb.0: ; %entry
; NEW_RBS-NEXT: s_mov_b32 s0, -1
; NEW_RBS-NEXT: v_mov_b32_e32 v3, s0
; NEW_RBS-NEXT: s_mov_b32 s0, 0
; NEW_RBS-NEXT: .LBB15_1: ; %loop
; NEW_RBS-NEXT: ; =>This Inner Loop Header: Depth=1
; NEW_RBS-NEXT: v_add_nc_u32_e32 v3, 1, v3
; NEW_RBS-NEXT: v_cvt_f32_u32_e32 v4, v3
; NEW_RBS-NEXT: v_cmp_gt_f32_e32 vcc_lo, v4, v0
; NEW_RBS-NEXT: s_or_b32 s0, vcc_lo, s0
; NEW_RBS-NEXT: s_andn2_b32 exec_lo, exec_lo, s0
; NEW_RBS-NEXT: s_cbranch_execnz .LBB15_1
; NEW_RBS-NEXT: ; %bb.2: ; %exit
; NEW_RBS-NEXT: s_or_b32 exec_lo, exec_lo, s0
; NEW_RBS-NEXT: v_mul_lo_u32 v0, v3, 10
; NEW_RBS-NEXT: global_store_dword v[1:2], v0, off
; NEW_RBS-NEXT: s_endpgm
entry:
br label %loop
loop:
%counter = phi i32 [ 0, %entry ], [ %counter.plus.1, %loop ]
%f.counter = uitofp i32 %counter to float
%cond = fcmp ogt float %f.counter, %val
%counter.plus.1 = add i32 %counter, 1
br i1 %cond, label %exit, label %loop
exit:
%ceilx10 = mul i32 %counter, 10
store i32 %ceilx10, ptr addrspace(1) %addr
ret void
}
; Variables that hande counter can be allocated to sgprs.
; Machine uniformity analysis claims some of those registers are divergent while
; LLVM-IR uniformity analysis claims corresponding values are uniform.
; TODO: fix this in Machine uniformity analysis.
define amdgpu_cs void @loop_with_2breaks(ptr addrspace(1) %x, ptr addrspace(1) %a, ptr addrspace(1) %b) {
; OLD_RBS-LABEL: loop_with_2breaks:
; OLD_RBS: ; %bb.0: ; %entry
; OLD_RBS-NEXT: s_mov_b32 s0, 0
; OLD_RBS-NEXT: ; implicit-def: $sgpr1
; OLD_RBS-NEXT: v_mov_b32_e32 v6, s0
; OLD_RBS-NEXT: s_branch .LBB16_3
; OLD_RBS-NEXT: .LBB16_1: ; %Flow3
; OLD_RBS-NEXT: ; in Loop: Header=BB16_3 Depth=1
; OLD_RBS-NEXT: s_waitcnt_depctr 0xffe3
; OLD_RBS-NEXT: s_or_b32 exec_lo, exec_lo, s3
; OLD_RBS-NEXT: s_andn2_b32 s1, s1, exec_lo
; OLD_RBS-NEXT: s_and_b32 s3, exec_lo, s4
; OLD_RBS-NEXT: s_or_b32 s1, s1, s3
; OLD_RBS-NEXT: .LBB16_2: ; %Flow
; OLD_RBS-NEXT: ; in Loop: Header=BB16_3 Depth=1
; OLD_RBS-NEXT: s_or_b32 exec_lo, exec_lo, s2
; OLD_RBS-NEXT: s_and_b32 s2, exec_lo, s1
; OLD_RBS-NEXT: s_or_b32 s0, s2, s0
; OLD_RBS-NEXT: s_andn2_b32 exec_lo, exec_lo, s0
; OLD_RBS-NEXT: s_cbranch_execz .LBB16_6
; OLD_RBS-NEXT: .LBB16_3: ; %A
; OLD_RBS-NEXT: ; =>This Inner Loop Header: Depth=1
; OLD_RBS-NEXT: v_ashrrev_i32_e32 v7, 31, v6
; OLD_RBS-NEXT: s_andn2_b32 s1, s1, exec_lo
; OLD_RBS-NEXT: s_and_b32 s2, exec_lo, -1
; OLD_RBS-NEXT: s_or_b32 s1, s1, s2
; OLD_RBS-NEXT: v_lshlrev_b64 v[7:8], 2, v[6:7]
; OLD_RBS-NEXT: v_add_co_u32 v9, vcc_lo, v2, v7
; OLD_RBS-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, v3, v8, vcc_lo
; OLD_RBS-NEXT: global_load_dword v9, v[9:10], off
; OLD_RBS-NEXT: s_waitcnt vmcnt(0)
; OLD_RBS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v9
; OLD_RBS-NEXT: s_and_saveexec_b32 s2, vcc_lo
; OLD_RBS-NEXT: s_cbranch_execz .LBB16_2
; OLD_RBS-NEXT: ; %bb.4: ; %B
; OLD_RBS-NEXT: ; in Loop: Header=BB16_3 Depth=1
; OLD_RBS-NEXT: v_add_co_u32 v9, vcc_lo, v4, v7
; OLD_RBS-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, v5, v8, vcc_lo
; OLD_RBS-NEXT: s_mov_b32 s4, -1
; OLD_RBS-NEXT: global_load_dword v9, v[9:10], off
; OLD_RBS-NEXT: s_waitcnt vmcnt(0)
; OLD_RBS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v9
; OLD_RBS-NEXT: s_and_saveexec_b32 s3, vcc_lo
; OLD_RBS-NEXT: s_cbranch_execz .LBB16_1
; OLD_RBS-NEXT: ; %bb.5: ; %loop.body
; OLD_RBS-NEXT: ; in Loop: Header=BB16_3 Depth=1
; OLD_RBS-NEXT: v_add_co_u32 v7, vcc_lo, v0, v7
; OLD_RBS-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, v1, v8, vcc_lo
; OLD_RBS-NEXT: v_add_nc_u32_e32 v10, 1, v6
; OLD_RBS-NEXT: v_cmp_gt_u32_e32 vcc_lo, 0x64, v6
; OLD_RBS-NEXT: s_andn2_b32 s4, -1, exec_lo
; OLD_RBS-NEXT: global_load_dword v9, v[7:8], off
; OLD_RBS-NEXT: v_mov_b32_e32 v6, v10
; OLD_RBS-NEXT: s_and_b32 s5, exec_lo, vcc_lo
; OLD_RBS-NEXT: s_or_b32 s4, s4, s5
; OLD_RBS-NEXT: s_waitcnt vmcnt(0)
; OLD_RBS-NEXT: v_add_nc_u32_e32 v9, 1, v9
; OLD_RBS-NEXT: global_store_dword v[7:8], v9, off
; OLD_RBS-NEXT: s_branch .LBB16_1
; OLD_RBS-NEXT: .LBB16_6: ; %exit
; OLD_RBS-NEXT: s_endpgm
;
; NEW_RBS-LABEL: loop_with_2breaks:
; NEW_RBS: ; %bb.0: ; %entry
; NEW_RBS-NEXT: s_mov_b32 s0, 0
; NEW_RBS-NEXT: ; implicit-def: $sgpr1
; NEW_RBS-NEXT: v_mov_b32_e32 v6, s0
; NEW_RBS-NEXT: s_branch .LBB16_3
; NEW_RBS-NEXT: .LBB16_1: ; %Flow3
; NEW_RBS-NEXT: ; in Loop: Header=BB16_3 Depth=1
; NEW_RBS-NEXT: s_waitcnt_depctr 0xffe3
; NEW_RBS-NEXT: s_or_b32 exec_lo, exec_lo, s3
; NEW_RBS-NEXT: s_andn2_b32 s1, s1, exec_lo
; NEW_RBS-NEXT: s_and_b32 s3, exec_lo, s4
; NEW_RBS-NEXT: s_or_b32 s1, s1, s3
; NEW_RBS-NEXT: .LBB16_2: ; %Flow
; NEW_RBS-NEXT: ; in Loop: Header=BB16_3 Depth=1
; NEW_RBS-NEXT: s_or_b32 exec_lo, exec_lo, s2
; NEW_RBS-NEXT: s_and_b32 s2, exec_lo, s1
; NEW_RBS-NEXT: s_or_b32 s0, s2, s0
; NEW_RBS-NEXT: s_andn2_b32 exec_lo, exec_lo, s0
; NEW_RBS-NEXT: s_cbranch_execz .LBB16_6
; NEW_RBS-NEXT: .LBB16_3: ; %A
; NEW_RBS-NEXT: ; =>This Inner Loop Header: Depth=1
; NEW_RBS-NEXT: v_ashrrev_i32_e32 v7, 31, v6
; NEW_RBS-NEXT: s_andn2_b32 s1, s1, exec_lo
; NEW_RBS-NEXT: s_and_b32 s2, exec_lo, -1
; NEW_RBS-NEXT: s_or_b32 s1, s1, s2
; NEW_RBS-NEXT: v_lshlrev_b64 v[7:8], 2, v[6:7]
; NEW_RBS-NEXT: v_add_co_u32 v9, vcc_lo, v2, v7
; NEW_RBS-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, v3, v8, vcc_lo
; NEW_RBS-NEXT: global_load_dword v9, v[9:10], off
; NEW_RBS-NEXT: s_waitcnt vmcnt(0)
; NEW_RBS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v9
; NEW_RBS-NEXT: s_and_saveexec_b32 s2, vcc_lo
; NEW_RBS-NEXT: s_cbranch_execz .LBB16_2
; NEW_RBS-NEXT: ; %bb.4: ; %B
; NEW_RBS-NEXT: ; in Loop: Header=BB16_3 Depth=1
; NEW_RBS-NEXT: v_add_co_u32 v9, vcc_lo, v4, v7
; NEW_RBS-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, v5, v8, vcc_lo
; NEW_RBS-NEXT: s_mov_b32 s4, -1
; NEW_RBS-NEXT: global_load_dword v9, v[9:10], off
; NEW_RBS-NEXT: s_waitcnt vmcnt(0)
; NEW_RBS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v9
; NEW_RBS-NEXT: s_and_saveexec_b32 s3, vcc_lo
; NEW_RBS-NEXT: s_cbranch_execz .LBB16_1
; NEW_RBS-NEXT: ; %bb.5: ; %loop.body
; NEW_RBS-NEXT: ; in Loop: Header=BB16_3 Depth=1
; NEW_RBS-NEXT: v_add_co_u32 v7, vcc_lo, v0, v7
; NEW_RBS-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, v1, v8, vcc_lo
; NEW_RBS-NEXT: v_add_nc_u32_e32 v10, 1, v6
; NEW_RBS-NEXT: v_cmp_gt_u32_e32 vcc_lo, 0x64, v6
; NEW_RBS-NEXT: s_andn2_b32 s4, -1, exec_lo
; NEW_RBS-NEXT: global_load_dword v9, v[7:8], off
; NEW_RBS-NEXT: v_mov_b32_e32 v6, v10
; NEW_RBS-NEXT: s_and_b32 s5, exec_lo, vcc_lo
; NEW_RBS-NEXT: s_or_b32 s4, s4, s5
; NEW_RBS-NEXT: s_waitcnt vmcnt(0)
; NEW_RBS-NEXT: v_add_nc_u32_e32 v9, 1, v9
; NEW_RBS-NEXT: global_store_dword v[7:8], v9, off
; NEW_RBS-NEXT: s_branch .LBB16_1
; NEW_RBS-NEXT: .LBB16_6: ; %exit
; NEW_RBS-NEXT: s_endpgm
entry:
br label %A
A:
%counter = phi i32 [ %counter.plus.1, %loop.body ], [ 0, %entry ]
%a.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %a, i32 %counter
%a.val = load i32, ptr addrspace(1) %a.plus.counter
%a.cond = icmp eq i32 %a.val, 0
br i1 %a.cond, label %exit, label %B
B:
%b.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %b, i32 %counter
%b.val = load i32, ptr addrspace(1) %b.plus.counter
%b.cond = icmp eq i32 %b.val, 0
br i1 %b.cond, label %exit, label %loop.body
loop.body:
%x.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %x, i32 %counter
%x.val = load i32, ptr addrspace(1) %x.plus.counter
%x.val.plus.1 = add i32 %x.val, 1
store i32 %x.val.plus.1, ptr addrspace(1) %x.plus.counter
%counter.plus.1 = add i32 %counter, 1
%x.cond = icmp ult i32 %counter, 100
br i1 %x.cond, label %exit, label %A
exit:
ret void
}
declare i16 @llvm.abs.i16(i16, i1)
declare <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32>, i32, i32, i32 immarg)

File diff suppressed because it is too large Load Diff