[PowerPC][AIX] Support ByVals with greater alignment then pointer size (#93341)
Implementation is NOT compatible with IBM XL C 16.1 and earlier but is compatible with GCC. It handles all ByVals with greater alignment then pointer width the same way IBM XL C handles Byvals that have vector members. For overaligned objects that do not contain vectors IBM XL C does not align them properly if they are passed in the GPR argument registers. This patch was originally written by Sean Fertile @mandlebug. Previously on Phabricator https://reviews.llvm.org/D105659
This commit is contained in:
@@ -6855,7 +6855,9 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
|
||||
const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
|
||||
State.getMachineFunction().getSubtarget());
|
||||
const bool IsPPC64 = Subtarget.isPPC64();
|
||||
const Align PtrAlign = IsPPC64 ? Align(8) : Align(4);
|
||||
const unsigned PtrSize = IsPPC64 ? 8 : 4;
|
||||
const Align PtrAlign(PtrSize);
|
||||
const Align StackAlign(16);
|
||||
const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
|
||||
|
||||
if (ValVT == MVT::f128)
|
||||
@@ -6876,12 +6878,16 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
|
||||
PPC::V6, PPC::V7, PPC::V8, PPC::V9,
|
||||
PPC::V10, PPC::V11, PPC::V12, PPC::V13};
|
||||
|
||||
const ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
|
||||
|
||||
if (ArgFlags.isByVal()) {
|
||||
if (ArgFlags.getNonZeroByValAlign() > PtrAlign)
|
||||
const Align ByValAlign(ArgFlags.getNonZeroByValAlign());
|
||||
if (ByValAlign > StackAlign)
|
||||
report_fatal_error("Pass-by-value arguments with alignment greater than "
|
||||
"register width are not supported.");
|
||||
"16 are not supported.");
|
||||
|
||||
const unsigned ByValSize = ArgFlags.getByValSize();
|
||||
const Align ObjAlign = ByValAlign > PtrAlign ? ByValAlign : PtrAlign;
|
||||
|
||||
// An empty aggregate parameter takes up no storage and no registers,
|
||||
// but needs a MemLoc for a stack slot for the formal arguments side.
|
||||
@@ -6891,11 +6897,23 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
|
||||
return false;
|
||||
}
|
||||
|
||||
const unsigned StackSize = alignTo(ByValSize, PtrAlign);
|
||||
unsigned Offset = State.AllocateStack(StackSize, PtrAlign);
|
||||
for (const unsigned E = Offset + StackSize; Offset < E;
|
||||
Offset += PtrAlign.value()) {
|
||||
if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
|
||||
// Shadow allocate any registers that are not properly aligned.
|
||||
unsigned NextReg = State.getFirstUnallocated(GPRs);
|
||||
while (NextReg != GPRs.size() &&
|
||||
!isGPRShadowAligned(GPRs[NextReg], ObjAlign)) {
|
||||
// Shadow allocate next registers since its aligment is not strict enough.
|
||||
unsigned Reg = State.AllocateReg(GPRs);
|
||||
// Allocate the stack space shadowed by said register.
|
||||
State.AllocateStack(PtrSize, PtrAlign);
|
||||
assert(Reg && "Alocating register unexpectedly failed.");
|
||||
(void)Reg;
|
||||
NextReg = State.getFirstUnallocated(GPRs);
|
||||
}
|
||||
|
||||
const unsigned StackSize = alignTo(ByValSize, ObjAlign);
|
||||
unsigned Offset = State.AllocateStack(StackSize, ObjAlign);
|
||||
for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) {
|
||||
if (unsigned Reg = State.AllocateReg(GPRs))
|
||||
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
|
||||
else {
|
||||
State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
|
||||
@@ -6917,12 +6935,12 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
|
||||
[[fallthrough]];
|
||||
case MVT::i1:
|
||||
case MVT::i32: {
|
||||
const unsigned Offset = State.AllocateStack(PtrAlign.value(), PtrAlign);
|
||||
const unsigned Offset = State.AllocateStack(PtrSize, PtrAlign);
|
||||
// AIX integer arguments are always passed in register width.
|
||||
if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
|
||||
LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
|
||||
: CCValAssign::LocInfo::ZExt;
|
||||
if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
|
||||
if (unsigned Reg = State.AllocateReg(GPRs))
|
||||
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
|
||||
else
|
||||
State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
|
||||
@@ -6942,8 +6960,8 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
|
||||
State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
|
||||
|
||||
// Reserve and initialize GPRs or initialize the PSA as required.
|
||||
for (unsigned I = 0; I < StoreSize; I += PtrAlign.value()) {
|
||||
if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
|
||||
for (unsigned I = 0; I < StoreSize; I += PtrSize) {
|
||||
if (unsigned Reg = State.AllocateReg(GPRs)) {
|
||||
assert(FReg && "An FPR should be available when a GPR is reserved.");
|
||||
if (State.isVarArg()) {
|
||||
// Successfully reserved GPRs are only initialized for vararg calls.
|
||||
@@ -6995,9 +7013,6 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
|
||||
return false;
|
||||
}
|
||||
|
||||
const unsigned PtrSize = IsPPC64 ? 8 : 4;
|
||||
ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
|
||||
|
||||
unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
|
||||
// Burn any underaligned registers and their shadowed stack space until
|
||||
// we reach the required alignment.
|
||||
@@ -7347,9 +7362,6 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
|
||||
const MCPhysReg ArgReg = VA.getLocReg();
|
||||
const PPCFrameLowering *FL = Subtarget.getFrameLowering();
|
||||
|
||||
if (Flags.getNonZeroByValAlign() > PtrByteSize)
|
||||
report_fatal_error("Over aligned byvals not supported yet.");
|
||||
|
||||
const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
|
||||
const int FI = MF.getFrameInfo().CreateFixedObject(
|
||||
StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
|
||||
|
||||
@@ -13,4 +13,4 @@ entry:
|
||||
|
||||
declare void @foo(i32, i32, i32, i32, i32, i32, i32, i32, ptr byval(%struct.S) align 32)
|
||||
|
||||
; CHECK: LLVM ERROR: Pass-by-value arguments with alignment greater than register width are not supported.
|
||||
; CHECK: LLVM ERROR: Pass-by-value arguments with alignment greater than 16 are not supported.
|
||||
|
||||
40
llvm/test/CodeGen/PowerPC/aix-vector-byval-callee.ll
Normal file
40
llvm/test/CodeGen/PowerPC/aix-vector-byval-callee.ll
Normal file
@@ -0,0 +1,40 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp -mcpu=pwr7 \
|
||||
; RUN: -mattr=-altivec -verify-machineinstrs < %s | \
|
||||
; RUN: FileCheck --check-prefix=32BIT %s
|
||||
|
||||
; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp -mcpu=pwr7 \
|
||||
; RUN: -mattr=-altivec -verify-machineinstrs < %s | \
|
||||
; RUN: FileCheck --check-prefix=64BIT %s
|
||||
|
||||
%struct.vec_struct = type { <4 x i32> }
|
||||
|
||||
; Function Attrs: norecurse nounwind readonly
|
||||
define i32 @vec_struct_test(i32 %i, ptr nocapture readonly byval(%struct.vec_struct) align 16 %vs) {
|
||||
; 32BIT-LABEL: name: vec_struct_test
|
||||
; 32BIT: bb.0.entry:
|
||||
; 32BIT-NEXT: liveins: $r3, $r5, $r6, $r7, $r8
|
||||
; 32BIT-NEXT: {{ $}}
|
||||
; 32BIT-NEXT: STW killed renamable $r7, 8, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 8, align 8)
|
||||
; 32BIT-NEXT: STW killed renamable $r6, 4, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 4)
|
||||
; 32BIT-NEXT: STW renamable $r5, 0, %fixed-stack.0 :: (store (s32) into %fixed-stack.0, align 16)
|
||||
; 32BIT-NEXT: STW killed renamable $r8, 12, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 12)
|
||||
; 32BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r5, killed renamable $r3
|
||||
; 32BIT-NEXT: BLR implicit $lr, implicit $rm, implicit $r3
|
||||
;
|
||||
; 64BIT-LABEL: name: vec_struct_test
|
||||
; 64BIT: bb.0.entry:
|
||||
; 64BIT-NEXT: liveins: $x3, $x5, $x6
|
||||
; 64BIT-NEXT: {{ $}}
|
||||
; 64BIT-NEXT: STD renamable $x5, 0, %fixed-stack.0 :: (store (s64) into %fixed-stack.0, align 16)
|
||||
; 64BIT-NEXT: STD killed renamable $x6, 8, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 + 8)
|
||||
; 64BIT-NEXT: renamable $x4 = RLDICL killed renamable $x5, 32, 32
|
||||
; 64BIT-NEXT: renamable $r3 = nsw ADD4 renamable $r4, renamable $r3, implicit killed $x3, implicit killed $x4, implicit-def $x3
|
||||
; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $x3
|
||||
entry:
|
||||
%vsi = getelementptr inbounds i8, ptr %vs, i32 0
|
||||
%0 = load <4 x i32>, ptr %vsi, align 16
|
||||
%vecext = extractelement <4 x i32> %0, i32 0
|
||||
%add = add nsw i32 %vecext, %i
|
||||
ret i32 %add
|
||||
}
|
||||
49
llvm/test/CodeGen/PowerPC/aix-vector-byval.ll
Normal file
49
llvm/test/CodeGen/PowerPC/aix-vector-byval.ll
Normal file
@@ -0,0 +1,49 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
; RUN: llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp -mcpu=pwr7 \
|
||||
; RUN: -mattr=-altivec -verify-machineinstrs < %s | \
|
||||
; RUN: FileCheck --check-prefix=32BIT %s
|
||||
|
||||
; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp -mcpu=pwr7 \
|
||||
; RUN: -mattr=-altivec -verify-machineinstrs < %s | \
|
||||
; RUN: FileCheck --check-prefix=64BIT %s
|
||||
|
||||
%struct.B = type { <8 x i16>, i32, i32, [8 x i8] }
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define i32 @caller() {
|
||||
; 32BIT-LABEL: name: caller
|
||||
; 32BIT: bb.0.entry:
|
||||
; 32BIT-NEXT: renamable $r3 = LWZ 28, %stack.0.vs :: (load (s32) from unknown-address + 4)
|
||||
; 32BIT-NEXT: STW killed renamable $r3, 60, $r1 :: (store (s32) into unknown-address + 4, basealign 16)
|
||||
; 32BIT-NEXT: renamable $r3 = LWZ 24, %stack.0.vs :: (load (s32) from %stack.0.vs + 24, align 8, basealign 16)
|
||||
; 32BIT-NEXT: STW killed renamable $r3, 56, $r1 :: (store (s32), align 16)
|
||||
; 32BIT-NEXT: ADJCALLSTACKDOWN 64, 0, implicit-def dead $r1, implicit $r1
|
||||
; 32BIT-NEXT: renamable $r10 = LWZ 20, %stack.0.vs :: (load (s32) from %stack.0.vs + 20)
|
||||
; 32BIT-NEXT: renamable $r9 = LWZ 16, %stack.0.vs :: (load (s32) from %stack.0.vs + 16, align 16)
|
||||
; 32BIT-NEXT: renamable $r8 = LWZ 12, %stack.0.vs :: (load (s32) from %stack.0.vs + 12)
|
||||
; 32BIT-NEXT: renamable $r7 = LWZ 8, %stack.0.vs :: (load (s32) from %stack.0.vs + 8, align 8)
|
||||
; 32BIT-NEXT: renamable $r6 = LWZ 4, %stack.0.vs :: (load (s32) from %stack.0.vs + 4)
|
||||
; 32BIT-NEXT: renamable $r5 = LWZ 0, %stack.0.vs :: (load (s32) from %stack.0.vs, align 16)
|
||||
; 32BIT-NEXT: $r3 = LI 0
|
||||
; 32BIT-NEXT: BL_NOP <mcsymbol .vec_struct_test[PR]>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $r10, implicit $r2, implicit-def $r1, implicit-def $r3
|
||||
; 32BIT-NEXT: ADJCALLSTACKUP 64, 0, implicit-def dead $r1, implicit $r1
|
||||
; 32BIT-NEXT: BLR implicit $lr, implicit $rm, implicit $r3
|
||||
;
|
||||
; 64BIT-LABEL: name: caller
|
||||
; 64BIT: bb.0.entry:
|
||||
; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
|
||||
; 64BIT-NEXT: renamable $x8 = LD 24, %stack.0.vs :: (load (s64) from %stack.0.vs + 24)
|
||||
; 64BIT-NEXT: renamable $x7 = LD 16, %stack.0.vs :: (load (s64) from %stack.0.vs + 16, align 16)
|
||||
; 64BIT-NEXT: renamable $x6 = LD 8, %stack.0.vs :: (load (s64) from %stack.0.vs + 8)
|
||||
; 64BIT-NEXT: renamable $x5 = LD 0, %stack.0.vs :: (load (s64) from %stack.0.vs, align 16)
|
||||
; 64BIT-NEXT: $x3 = LI8 0
|
||||
; 64BIT-NEXT: BL8_NOP <mcsymbol .vec_struct_test[PR]>, csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x2, implicit-def $r1, implicit-def $x3
|
||||
; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
|
||||
; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $x3
|
||||
entry:
|
||||
%vs = alloca %struct.B, align 16
|
||||
%call = tail call i32 @vec_struct_test(i32 0, ptr nonnull byval(%struct.B) align 16 %vs)
|
||||
ret i32 %call
|
||||
}
|
||||
|
||||
declare i32 @vec_struct_test(i32, ptr byval(%struct.B) align 16)
|
||||
Reference in New Issue
Block a user