BPF: remove intrindics @llvm.stacksave() and @llvm.stackrestore()

Paul Chaignon reported a bpf verifier failure ([1]) due to using
non-ABI register R11. For the test case, llvm11 is okay while
llvm12 and later generates verifier unfriendly code.

The failure is related to variable length array size.
The following mimics the variable length array definition
in the test case:

struct t { char a[20]; };
void foo(void *);
int test() {
   const int a = 8;
   char tmp[AA + sizeof(struct t) + a];
   foo(tmp);
   ...
}

Paul helped bisect that the following llvm commit is
responsible:

552c6c2328 ("PR44406: Follow behavior of array bound constant
              folding in more recent versions of GCC.")

Basically, before the above commit, clang frontend did constant
folding for array size "AA + sizeof(struct t) + a" to be 68,
so used alloca for stack allocation. After the above commit,
clang frontend didn't do constant folding for array size
any more, which results in a VLA and llvm.stacksave/llvm.stackrestore
is generated.

BPF architecture API does not support stack pointer (sp) register.
The LLVM internally used R11 to indicate sp register but it should
not be in the final code. Otherwise, kernel verifier will reject it.

The early patch ([2]) tried to fix the issue in clang frontend.
But the upstream discussion considered frontend fix is really a
hack and the backend should properly undo llvm.stacksave/llvm.stackrestore.
This patch implemented a bpf IR phase to remove these intrinsics
unconditionally. If eventually the alloca can be resolved with
constant size, r11 will not be generated. If alloca cannot be
resolved with constant size, SelectionDag will complain, the same
as without this patch.

 [1] https://lore.kernel.org/bpf/20210809151202.GB1012999@Mem/
 [2] https://reviews.llvm.org/D107882

Differential Revision: https://reviews.llvm.org/D111897
This commit is contained in:
Yonghong Song
2021-10-14 21:16:19 -07:00
parent 63f8099e23
commit 009f3a89d8
5 changed files with 243 additions and 0 deletions

View File

@@ -21,6 +21,7 @@ ModulePass *createBPFCheckAndAdjustIR();
FunctionPass *createBPFAbstractMemberAccess(BPFTargetMachine *TM);
FunctionPass *createBPFPreserveDIType();
FunctionPass *createBPFIRPeephole();
FunctionPass *createBPFISelDag(BPFTargetMachine &TM);
FunctionPass *createBPFMISimplifyPatchablePass();
FunctionPass *createBPFMIPeepholePass();
@@ -33,6 +34,7 @@ void initializeBPFCheckAndAdjustIRPass(PassRegistry&);
void initializeBPFAbstractMemberAccessLegacyPassPass(PassRegistry &);
void initializeBPFPreserveDITypePass(PassRegistry&);
void initializeBPFIRPeepholePass(PassRegistry&);
void initializeBPFMISimplifyPatchablePass(PassRegistry&);
void initializeBPFMIPeepholePass(PassRegistry&);
void initializeBPFMIPeepholeTruncElimPass(PassRegistry&);
@@ -57,6 +59,13 @@ public:
static bool isRequired() { return true; }
};
class BPFIRPeepholePass : public PassInfoMixin<BPFIRPeepholePass> {
public:
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
static bool isRequired() { return true; }
};
class BPFAdjustOptPass : public PassInfoMixin<BPFAdjustOptPass> {
public:
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);

View File

@@ -0,0 +1,115 @@
//===------------ BPFIRPeephole.cpp - IR Peephole Transformation ----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// IR level peephole optimization, specifically removing @llvm.stacksave() and
// @llvm.stackrestore().
//
//===----------------------------------------------------------------------===//
#include "BPF.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
#define DEBUG_TYPE "bpf-ir-peephole"
using namespace llvm;
namespace {
static bool BPFIRPeepholeImpl(Function &F) {
LLVM_DEBUG(dbgs() << "******** BPF IR Peephole ********\n");
Instruction *ToErase = nullptr;
for (auto &BB : F) {
for (auto &I : BB) {
// The following code pattern is handled:
// %3 = call i8* @llvm.stacksave()
// store i8* %3, i8** %saved_stack, align 8
// ...
// %4 = load i8*, i8** %saved_stack, align 8
// call void @llvm.stackrestore(i8* %4)
// ...
// The goal is to remove the above four instructions,
// so we won't have instructions with r11 (stack pointer)
// if eventually there is no variable length stack allocation.
// InstrCombine also tries to remove the above instructions,
// if it is proven safe (constant alloca etc.), but depending
// on code pattern, it may still miss some.
//
// With unconditionally removing these instructions, if alloca is
// constant, we are okay then. Otherwise, SelectionDag will complain
// since BPF does not support dynamic allocation yet.
if (ToErase) {
ToErase->eraseFromParent();
ToErase = nullptr;
}
if (auto *Call = dyn_cast<CallInst>(&I)) {
if (auto *GV = dyn_cast<GlobalValue>(Call->getCalledOperand())) {
if (!GV->getName().equals("llvm.stacksave"))
continue;
if (!Call->hasOneUser())
continue;
auto *Inst = cast<Instruction>(*Call->user_begin());
LLVM_DEBUG(dbgs() << "Remove:"; I.dump());
LLVM_DEBUG(dbgs() << "Remove:"; Inst->dump(); dbgs() << '\n');
Inst->eraseFromParent();
ToErase = &I;
}
continue;
}
if (auto *LD = dyn_cast<LoadInst>(&I)) {
if (!LD->hasOneUser())
continue;
auto *Call = dyn_cast<CallInst>(*LD->user_begin());
if (!Call)
continue;
auto *GV = dyn_cast<GlobalValue>(Call->getCalledOperand());
if (!GV)
continue;
if (!GV->getName().equals("llvm.stackrestore"))
continue;
LLVM_DEBUG(dbgs() << "Remove:"; I.dump());
LLVM_DEBUG(dbgs() << "Remove:"; Call->dump(); dbgs() << '\n');
Call->eraseFromParent();
ToErase = &I;
}
}
}
return false;
}
class BPFIRPeephole final : public FunctionPass {
bool runOnFunction(Function &F) override;
public:
static char ID;
BPFIRPeephole() : FunctionPass(ID) {}
};
} // End anonymous namespace
char BPFIRPeephole::ID = 0;
INITIALIZE_PASS(BPFIRPeephole, DEBUG_TYPE, "BPF IR Peephole", false, false)
FunctionPass *llvm::createBPFIRPeephole() { return new BPFIRPeephole(); }
bool BPFIRPeephole::runOnFunction(Function &F) { return BPFIRPeepholeImpl(F); }
PreservedAnalyses BPFIRPeepholePass::run(Function &F,
FunctionAnalysisManager &AM) {
return BPFIRPeepholeImpl(F) ? PreservedAnalyses::none()
: PreservedAnalyses::all();
}

View File

@@ -43,6 +43,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFTarget() {
PassRegistry &PR = *PassRegistry::getPassRegistry();
initializeBPFAbstractMemberAccessLegacyPassPass(PR);
initializeBPFPreserveDITypePass(PR);
initializeBPFIRPeepholePass(PR);
initializeBPFAdjustOptPass(PR);
initializeBPFCheckAndAdjustIRPass(PR);
initializeBPFMIPeepholePass(PR);
@@ -107,6 +108,7 @@ void BPFTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
[&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
PM.add(createBPFAbstractMemberAccess(this));
PM.add(createBPFPreserveDIType());
PM.add(createBPFIRPeephole());
});
Builder.addExtension(
@@ -128,6 +130,7 @@ void BPFTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
FunctionPassManager FPM;
FPM.addPass(BPFAbstractMemberAccessPass(this));
FPM.addPass(BPFPreserveDITypePass());
FPM.addPass(BPFIRPeepholePass());
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
});
PB.registerPeepholeEPCallback([=](FunctionPassManager &FPM,

View File

@@ -21,6 +21,7 @@ add_llvm_target(BPFCodeGen
BPFCheckAndAdjustIR.cpp
BPFFrameLowering.cpp
BPFInstrInfo.cpp
BPFIRPeephole.cpp
BPFISelDAGToDAG.cpp
BPFISelLowering.cpp
BPFMCInstLower.cpp

View File

@@ -0,0 +1,115 @@
; RUN: opt --bpf-ir-peephole -mtriple=bpf-pc-linux -S %s | FileCheck %s
; Source:
; #define AA 40
; struct t {
; char a[20];
; };
; void foo(void *);
;
; int test1() {
; const int a = 8;
; char tmp[AA + sizeof(struct t) + a];
; foo(tmp);
; return 0;
; }
;
; int test2(int b) {
; const int a = 8;
; char tmp[a + b];
; foo(tmp);
; return 0;
; }
; Compilation flag:
; clang -target bpf -O2 -S -emit-llvm t.c -Xclang -disable-llvm-passes
source_filename = "t.c"
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
target triple = "bpf"
; Function Attrs: nounwind
define dso_local i32 @test1() #0 {
entry:
%a = alloca i32, align 4
%saved_stack = alloca i8*, align 8
%0 = bitcast i32* %a to i8*
call void @llvm.lifetime.start.p0i8(i64 4, i8* %0) #4
store i32 8, i32* %a, align 4, !tbaa !3
%1 = call i8* @llvm.stacksave()
store i8* %1, i8** %saved_stack, align 8
%vla = alloca i8, i64 68, align 1
call void @foo(i8* %vla)
%2 = load i8*, i8** %saved_stack, align 8
call void @llvm.stackrestore(i8* %2)
%3 = bitcast i32* %a to i8*
call void @llvm.lifetime.end.p0i8(i64 4, i8* %3) #4
ret i32 0
}
; CHECK: define dso_local i32 @test1
; CHECK-NOT: %[[#]] = call i8* @llvm.stacksave()
; CHECK-NOT: store i8* %[[#]], i8** %saved_stack, align 8
; CHECK-NOT: %[[#]] = load i8*, i8** %saved_stack, align 8
; CHECK-NOT: call void @llvm.stackrestore(i8* %[[#]])
; Function Attrs: argmemonly nofree nosync nounwind willreturn
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
; Function Attrs: nofree nosync nounwind willreturn
declare i8* @llvm.stacksave() #2
declare dso_local void @foo(i8*) #3
; Function Attrs: nofree nosync nounwind willreturn
declare void @llvm.stackrestore(i8*) #2
; Function Attrs: argmemonly nofree nosync nounwind willreturn
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
; Function Attrs: nounwind
define dso_local i32 @test2(i32 %b) #0 {
entry:
%b.addr = alloca i32, align 4
%a = alloca i32, align 4
%saved_stack = alloca i8*, align 8
%__vla_expr0 = alloca i64, align 8
store i32 %b, i32* %b.addr, align 4, !tbaa !3
%0 = bitcast i32* %a to i8*
call void @llvm.lifetime.start.p0i8(i64 4, i8* %0) #4
store i32 8, i32* %a, align 4, !tbaa !3
%1 = load i32, i32* %b.addr, align 4, !tbaa !3
%add = add nsw i32 8, %1
%2 = zext i32 %add to i64
%3 = call i8* @llvm.stacksave()
store i8* %3, i8** %saved_stack, align 8
%vla = alloca i8, i64 %2, align 1
store i64 %2, i64* %__vla_expr0, align 8
call void @foo(i8* %vla)
%4 = load i8*, i8** %saved_stack, align 8
call void @llvm.stackrestore(i8* %4)
%5 = bitcast i32* %a to i8*
call void @llvm.lifetime.end.p0i8(i64 4, i8* %5) #4
ret i32 0
}
; CHECK: define dso_local i32 @test2
; CHECK-NOT: %[[#]] = call i8* @llvm.stacksave()
; CHECK-NOT: store i8* %[[#]], i8** %saved_stack, align 8
; CHECK-NOT: %[[#]] = load i8*, i8** %saved_stack, align 8
; CHECK-NOT: call void @llvm.stackrestore(i8* %[[#]])
attributes #0 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
attributes #1 = { argmemonly nofree nosync nounwind willreturn }
attributes #2 = { nofree nosync nounwind willreturn }
attributes #3 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
attributes #4 = { nounwind }
!llvm.module.flags = !{!0, !1}
!llvm.ident = !{!2}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"frame-pointer", i32 2}
!2 = !{!"clang version 14.0.0 (https://github.com/llvm/llvm-project.git 64c5d5c671fb5b5f25c464652a4eec2cf743af0d)"}
!3 = !{!4, !4, i64 0}
!4 = !{!"int", !5, i64 0}
!5 = !{!"omnipotent char", !6, i64 0}
!6 = !{!"Simple C/C++ TBAA"}