Files
clang-p2996/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
AtariDreams c5d000b1a8 [Thumb] Resolve FIXME: Use 'mov hi, $src; mov $dst, hi' (#81908)
Consider the following:

        ldr     r0, [r4]
        ldr     r7, [r0, #4]
        cmp     r7, r3
        bhi     .LBB0_6
        cmp     r0, r2
        push    {r0}
        pop     {r4}
        bne     .LBB0_3
        movs    r0, r6
        pop     {r4, r5, r6, r7}
        pop     {r1}
        bx      r1

Here is a snippet of the generated THUMB1 code of the K&R malloc
function that clang currently compiles to.

push    {r0} ends up being popped to pop {r4}.

movs r4, r0 would destroy the flags set by cmp right above.

The compiler has no alternative in this case, except one:
the only alternative is to transfer through a high register.

However, it seems like LLVM does not consider that this is a valid
approach, even though it is a free clobbering a high register.

This patch addresses the FIXME so the compiler can do that when it can
in r10 or r11, or r12.
2024-04-05 10:18:22 +01:00

203 lines
7.3 KiB
C++

//===-- Thumb1InstrInfo.cpp - Thumb-1 Instruction Information -------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains the Thumb-1 implementation of the TargetInstrInfo class.
//
//===----------------------------------------------------------------------===//
#include "Thumb1InstrInfo.h"
#include "ARMSubtarget.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstBuilder.h"
using namespace llvm;
Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI)
: ARMBaseInstrInfo(STI) {}
/// Return the noop instruction to use for a noop.
MCInst Thumb1InstrInfo::getNop() const {
return MCInstBuilder(ARM::tMOVr)
.addReg(ARM::R8)
.addReg(ARM::R8)
.addImm(ARMCC::AL)
.addReg(0);
}
unsigned Thumb1InstrInfo::getUnindexedOpcode(unsigned Opc) const {
return 0;
}
void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const DebugLoc &DL, MCRegister DestReg,
MCRegister SrcReg, bool KillSrc) const {
// Need to check the arch.
MachineFunction &MF = *MBB.getParent();
const ARMSubtarget &st = MF.getSubtarget<ARMSubtarget>();
assert(ARM::GPRRegClass.contains(DestReg, SrcReg) &&
"Thumb1 can only copy GPR registers");
if (st.hasV6Ops() || ARM::hGPRRegClass.contains(SrcReg) ||
!ARM::tGPRRegClass.contains(DestReg))
BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc))
.add(predOps(ARMCC::AL));
else {
const TargetRegisterInfo *RegInfo = st.getRegisterInfo();
LiveRegUnits UsedRegs(*RegInfo);
UsedRegs.addLiveOuts(MBB);
auto InstUpToI = MBB.end();
while (InstUpToI != I)
// The pre-decrement is on purpose here.
// We want to have the liveness right before I.
UsedRegs.stepBackward(*--InstUpToI);
if (UsedRegs.available(ARM::CPSR)) {
BuildMI(MBB, I, DL, get(ARM::tMOVSr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc))
->addRegisterDead(ARM::CPSR, RegInfo);
return;
}
// Use high register to move source to destination
// if movs is not an option.
BitVector Allocatable = RegInfo->getAllocatableSet(
MF, RegInfo->getRegClass(ARM::hGPRRegClassID));
Register TmpReg = ARM::NoRegister;
// Prefer R12 as it is known to not be preserved anyway
if (UsedRegs.available(ARM::R12) && Allocatable.test(ARM::R12)) {
TmpReg = ARM::R12;
} else {
for (Register Reg : Allocatable.set_bits()) {
if (UsedRegs.available(Reg)) {
TmpReg = Reg;
break;
}
}
}
if (TmpReg) {
BuildMI(MBB, I, DL, get(ARM::tMOVr), TmpReg)
.addReg(SrcReg, getKillRegState(KillSrc))
.add(predOps(ARMCC::AL));
BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
.addReg(TmpReg, getKillRegState(true))
.add(predOps(ARMCC::AL));
return;
}
// 'MOV lo, lo' is unpredictable on < v6, so use the stack to do it
BuildMI(MBB, I, DL, get(ARM::tPUSH))
.add(predOps(ARMCC::AL))
.addReg(SrcReg, getKillRegState(KillSrc));
BuildMI(MBB, I, DL, get(ARM::tPOP))
.add(predOps(ARMCC::AL))
.addReg(DestReg, getDefRegState(true));
}
}
void Thumb1InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
Register SrcReg, bool isKill, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI,
Register VReg) const {
assert((RC == &ARM::tGPRRegClass ||
(SrcReg.isPhysical() && isARMLowRegister(SrcReg))) &&
"Unknown regclass!");
if (RC == &ARM::tGPRRegClass ||
(SrcReg.isPhysical() && isARMLowRegister(SrcReg))) {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
BuildMI(MBB, I, DL, get(ARM::tSTRspi))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI)
.addImm(0)
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
}
}
void Thumb1InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
Register DestReg, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI,
Register VReg) const {
assert((RC->hasSuperClassEq(&ARM::tGPRRegClass) ||
(DestReg.isPhysical() && isARMLowRegister(DestReg))) &&
"Unknown regclass!");
if (RC->hasSuperClassEq(&ARM::tGPRRegClass) ||
(DestReg.isPhysical() && isARMLowRegister(DestReg))) {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
BuildMI(MBB, I, DL, get(ARM::tLDRspi), DestReg)
.addFrameIndex(FI)
.addImm(0)
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
}
}
void Thumb1InstrInfo::expandLoadStackGuard(
MachineBasicBlock::iterator MI) const {
MachineFunction &MF = *MI->getParent()->getParent();
const ARMSubtarget &ST = MF.getSubtarget<ARMSubtarget>();
const auto *GV = cast<GlobalValue>((*MI->memoperands_begin())->getValue());
assert(MF.getFunction().getParent()->getStackProtectorGuard() != "tls" &&
"TLS stack protector not supported for Thumb1 targets");
unsigned Instr;
if (!GV->isDSOLocal())
Instr = ARM::tLDRLIT_ga_pcrel;
else if (ST.genExecuteOnly() && ST.hasV8MBaselineOps())
Instr = ARM::t2MOVi32imm;
else if (ST.genExecuteOnly())
Instr = ARM::tMOVi32imm;
else
Instr = ARM::tLDRLIT_ga_abs;
expandLoadStackGuardBase(MI, Instr, ARM::tLDRi);
}
bool Thumb1InstrInfo::canCopyGluedNodeDuringSchedule(SDNode *N) const {
// In Thumb1 the scheduler may need to schedule a cross-copy between GPRS and CPSR
// but this is not always possible there, so allow the Scheduler to clone tADCS and tSBCS
// even if they have glue.
// FIXME. Actually implement the cross-copy where it is possible (post v6)
// because these copies entail more spilling.
unsigned Opcode = N->getMachineOpcode();
if (Opcode == ARM::tADCS || Opcode == ARM::tSBCS)
return true;
return false;
}