Bitcode: Introduce BitcodeWriter interface.

This interface allows clients to write multiple modules to a single
bitcode file. Also introduce the llvm-cat utility which can be used
to create a bitcode file containing multiple modules.

Differential Revision: https://reviews.llvm.org/D26179

llvm-svn: 288195
This commit is contained in:
Peter Collingbourne
2016-11-29 20:43:47 +00:00
parent d34c26eb08
commit 5a0a2e648c
11 changed files with 254 additions and 94 deletions

View File

@@ -65,36 +65,20 @@ enum {
};
/// Abstract class to manage the bitcode writing, subclassed for each bitcode
/// file type. Owns the BitstreamWriter, and includes the main entry point for
/// writing.
class BitcodeWriter {
/// file type.
class BitcodeWriterBase {
protected:
/// Pointer to the buffer allocated by caller for bitcode writing.
const SmallVectorImpl<char> &Buffer;
/// The stream created and owned by the BitodeWriter.
BitstreamWriter Stream;
/// The stream created and owned by the client.
BitstreamWriter &Stream;
/// Saves the offset of the VSTOffset record that must eventually be
/// backpatched with the offset of the actual VST.
uint64_t VSTOffsetPlaceholder = 0;
public:
/// Constructs a BitcodeWriter object, and initializes a BitstreamRecord,
/// writing to the provided \p Buffer.
BitcodeWriter(SmallVectorImpl<char> &Buffer)
: Buffer(Buffer), Stream(Buffer) {}
virtual ~BitcodeWriter() = default;
/// Main entry point to write the bitcode file, which writes the bitcode
/// header and will then invoke the virtual writeBlocks() method.
void write();
private:
/// Derived classes must implement this to write the corresponding blocks for
/// that bitcode file type.
virtual void writeBlocks() = 0;
/// Constructs a BitcodeWriterBase object that writes to the provided
/// \p Stream.
BitcodeWriterBase(BitstreamWriter &Stream) : Stream(Stream) {}
protected:
bool hasVSTOffsetPlaceholder() { return VSTOffsetPlaceholder != 0; }
@@ -103,7 +87,10 @@ protected:
};
/// Class to manage the bitcode writing for a module.
class ModuleBitcodeWriter : public BitcodeWriter {
class ModuleBitcodeWriter : public BitcodeWriterBase {
/// Pointer to the buffer allocated by caller for bitcode writing.
const SmallVectorImpl<char> &Buffer;
/// The Module to write to bitcode.
const Module &M;
@@ -116,8 +103,8 @@ class ModuleBitcodeWriter : public BitcodeWriter {
/// True if a module hash record should be written.
bool GenerateHash;
/// The start bit of the module block, for use in generating a module hash
uint64_t BitcodeStartBit = 0;
/// The start bit of the identification block.
uint64_t BitcodeStartBit;
/// Map that holds the correspondence between GUIDs in the summary index,
/// that came from indirect call profiles, and a value id generated by this
@@ -131,16 +118,11 @@ public:
/// Constructs a ModuleBitcodeWriter object for the given Module,
/// writing to the provided \p Buffer.
ModuleBitcodeWriter(const Module *M, SmallVectorImpl<char> &Buffer,
bool ShouldPreserveUseListOrder,
BitstreamWriter &Stream, bool ShouldPreserveUseListOrder,
const ModuleSummaryIndex *Index, bool GenerateHash)
: BitcodeWriter(Buffer), M(*M), VE(*M, ShouldPreserveUseListOrder),
Index(Index), GenerateHash(GenerateHash) {
// Save the start bit of the actual bitcode, in case there is space
// saved at the start for the darwin header above. The reader stream
// will start at the bitcode, and we need the offset of the VST
// to line up.
BitcodeStartBit = Stream.GetCurrentBitNo();
: BitcodeWriterBase(Stream), Buffer(Buffer), M(*M),
VE(*M, ShouldPreserveUseListOrder), Index(Index),
GenerateHash(GenerateHash), BitcodeStartBit(Stream.GetCurrentBitNo()) {
// Assign ValueIds to any callee values in the index that came from
// indirect call profiles and were recorded as a GUID not a Value*
// (which would have been assigned an ID by the ValueEnumerator).
@@ -162,21 +144,12 @@ public:
assignValueId(CallEdge.first.getGUID());
}
private:
/// Main entry point for writing a module to bitcode, invoked by
/// BitcodeWriter::write() after it writes the header.
void writeBlocks() override;
/// Create the "IDENTIFICATION_BLOCK_ID" containing a single string with the
/// current llvm version, and a record for the epoch number.
void writeIdentificationBlock();
/// Emit the current module to the bitstream.
void writeModule();
void write();
private:
uint64_t bitcodeStartBit() { return BitcodeStartBit; }
void writeStringRecord(unsigned Code, StringRef Str, unsigned AbbrevToUse);
void writeAttributeGroupTable();
void writeAttributeTable();
void writeTypeTable();
@@ -310,7 +283,7 @@ private:
};
/// Class to manage the bitcode writing for a combined index.
class IndexBitcodeWriter : public BitcodeWriter {
class IndexBitcodeWriter : public BitcodeWriterBase {
/// The combined index to write to bitcode.
const ModuleSummaryIndex &Index;
@@ -329,11 +302,10 @@ public:
/// Constructs a IndexBitcodeWriter object for the given combined index,
/// writing to the provided \p Buffer. When writing a subset of the index
/// for a distributed backend, provide a \p ModuleToSummariesForIndex map.
IndexBitcodeWriter(SmallVectorImpl<char> &Buffer,
const ModuleSummaryIndex &Index,
IndexBitcodeWriter(BitstreamWriter &Stream, const ModuleSummaryIndex &Index,
const std::map<std::string, GVSummaryMapTy>
*ModuleToSummariesForIndex = nullptr)
: BitcodeWriter(Buffer), Index(Index),
: BitcodeWriterBase(Stream), Index(Index),
ModuleToSummariesForIndex(ModuleToSummariesForIndex) {
// Assign unique value ids to all summaries to be written, for use
// in writing out the call graph edges. Save the mapping from GUID
@@ -480,11 +452,10 @@ public:
/// Obtain the end iterator over the summaries to be written.
iterator end() { return iterator(*this, /*IsAtEnd=*/true); }
private:
/// Main entry point for writing a combined index to bitcode, invoked by
/// BitcodeWriter::write() after it writes the header.
void writeBlocks() override;
/// Main entry point for writing a combined index to bitcode.
void write();
private:
void writeIndex();
void writeModStrings();
void writeCombinedValueSymbolTable();
@@ -597,8 +568,8 @@ static unsigned getEncodedSynchScope(SynchronizationScope SynchScope) {
llvm_unreachable("Invalid synch scope");
}
void ModuleBitcodeWriter::writeStringRecord(unsigned Code, StringRef Str,
unsigned AbbrevToUse) {
static void writeStringRecord(BitstreamWriter &Stream, unsigned Code,
StringRef Str, unsigned AbbrevToUse) {
SmallVector<unsigned, 64> Vals;
// Code: [strchar x N]
@@ -922,7 +893,7 @@ void ModuleBitcodeWriter::writeTypeTable() {
// Emit the name if it is present.
if (!ST->getName().empty())
writeStringRecord(bitc::TYPE_CODE_STRUCT_NAME, ST->getName(),
writeStringRecord(Stream, bitc::TYPE_CODE_STRUCT_NAME, ST->getName(),
StructNameAbbrev);
}
break;
@@ -1073,7 +1044,7 @@ void ModuleBitcodeWriter::writeComdats() {
/// Write a record that will eventually hold the word offset of the
/// module-level VST. For now the offset is 0, which will be backpatched
/// after the real VST is written. Saves the bit offset to backpatch.
void BitcodeWriter::writeValueSymbolTableForwardDecl() {
void BitcodeWriterBase::writeValueSymbolTableForwardDecl() {
// Write a placeholder value in for the offset of the real VST,
// which is written after the function blocks so that it can include
// the offset of each function. The placeholder offset will be
@@ -1120,13 +1091,13 @@ static StringEncoding getStringEncoding(const char *Str, unsigned StrLen) {
void ModuleBitcodeWriter::writeModuleInfo() {
// Emit various pieces of data attached to a module.
if (!M.getTargetTriple().empty())
writeStringRecord(bitc::MODULE_CODE_TRIPLE, M.getTargetTriple(),
writeStringRecord(Stream, bitc::MODULE_CODE_TRIPLE, M.getTargetTriple(),
0 /*TODO*/);
const std::string &DL = M.getDataLayoutStr();
if (!DL.empty())
writeStringRecord(bitc::MODULE_CODE_DATALAYOUT, DL, 0 /*TODO*/);
writeStringRecord(Stream, bitc::MODULE_CODE_DATALAYOUT, DL, 0 /*TODO*/);
if (!M.getModuleInlineAsm().empty())
writeStringRecord(bitc::MODULE_CODE_ASM, M.getModuleInlineAsm(),
writeStringRecord(Stream, bitc::MODULE_CODE_ASM, M.getModuleInlineAsm(),
0 /*TODO*/);
// Emit information about sections and GC, computing how many there are. Also
@@ -1142,7 +1113,7 @@ void ModuleBitcodeWriter::writeModuleInfo() {
// Give section names unique ID's.
unsigned &Entry = SectionMap[GV.getSection()];
if (!Entry) {
writeStringRecord(bitc::MODULE_CODE_SECTIONNAME, GV.getSection(),
writeStringRecord(Stream, bitc::MODULE_CODE_SECTIONNAME, GV.getSection(),
0 /*TODO*/);
Entry = SectionMap.size();
}
@@ -1154,7 +1125,7 @@ void ModuleBitcodeWriter::writeModuleInfo() {
// Give section names unique ID's.
unsigned &Entry = SectionMap[F.getSection()];
if (!Entry) {
writeStringRecord(bitc::MODULE_CODE_SECTIONNAME, F.getSection(),
writeStringRecord(Stream, bitc::MODULE_CODE_SECTIONNAME, F.getSection(),
0 /*TODO*/);
Entry = SectionMap.size();
}
@@ -1163,7 +1134,8 @@ void ModuleBitcodeWriter::writeModuleInfo() {
// Same for GC names.
unsigned &Entry = GCMap[F.getGC()];
if (!Entry) {
writeStringRecord(bitc::MODULE_CODE_GCNAME, F.getGC(), 0 /*TODO*/);
writeStringRecord(Stream, bitc::MODULE_CODE_GCNAME, F.getGC(),
0 /*TODO*/);
Entry = GCMap.size();
}
}
@@ -2761,11 +2733,13 @@ void ModuleBitcodeWriter::writeValueSymbolTable(
// Get the offset of the VST we are writing, and backpatch it into
// the VST forward declaration record.
uint64_t VSTOffset = Stream.GetCurrentBitNo();
// The BitcodeStartBit was the stream offset of the actual bitcode
// (e.g. excluding any initial darwin header).
// The BitcodeStartBit was the stream offset of the identification block.
VSTOffset -= bitcodeStartBit();
assert((VSTOffset & 31) == 0 && "VST block not 32-bit aligned");
Stream.BackpatchWord(VSTOffsetPlaceholder, VSTOffset / 32);
// Note that we add 1 here because the offset is relative to one word
// before the start of the identification block, which was historically
// always the start of the regular bitcode header.
Stream.BackpatchWord(VSTOffsetPlaceholder, VSTOffset / 32 + 1);
}
Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4);
@@ -2853,7 +2827,10 @@ void ModuleBitcodeWriter::writeValueSymbolTable(
// actual bitcode written to the stream).
uint64_t BitcodeIndex = (*FunctionToBitcodeIndex)[F] - bitcodeStartBit();
assert((BitcodeIndex & 31) == 0 && "function block not 32-bit aligned");
NameVals.push_back(BitcodeIndex / 32);
// Note that we add 1 here because the offset is relative to one word
// before the start of the identification block, which was historically
// always the start of the regular bitcode header.
NameVals.push_back(BitcodeIndex / 32 + 1);
Code = bitc::VST_CODE_FNENTRY;
AbbrevToUse = FnEntry8BitAbbrev;
@@ -3617,7 +3594,9 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
Stream.ExitBlock();
}
void ModuleBitcodeWriter::writeIdentificationBlock() {
/// Create the "IDENTIFICATION_BLOCK_ID" containing a single string with the
/// current llvm version, and a record for the epoch number.
void writeIdentificationBlock(BitstreamWriter &Stream) {
Stream.EnterSubblock(bitc::IDENTIFICATION_BLOCK_ID, 5);
// Write the "user readable" string identifying the bitcode producer
@@ -3626,7 +3605,7 @@ void ModuleBitcodeWriter::writeIdentificationBlock() {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
auto StringAbbrev = Stream.EmitAbbrev(Abbv);
writeStringRecord(bitc::IDENTIFICATION_CODE_STRING,
writeStringRecord(Stream, bitc::IDENTIFICATION_CODE_STRING,
"LLVM" LLVM_VERSION_STRING, StringAbbrev);
// Write the epoch version
@@ -3655,24 +3634,9 @@ void ModuleBitcodeWriter::writeModuleHash(size_t BlockStartPos) {
Stream.EmitRecord(bitc::MODULE_CODE_HASH, Vals);
}
void BitcodeWriter::write() {
// Emit the file header first.
writeBitcodeHeader();
void ModuleBitcodeWriter::write() {
writeIdentificationBlock(Stream);
writeBlocks();
}
void ModuleBitcodeWriter::writeBlocks() {
writeIdentificationBlock();
writeModule();
}
void IndexBitcodeWriter::writeBlocks() {
// Index contains only a single outer (module) block.
writeIndex();
}
void ModuleBitcodeWriter::writeModule() {
Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
size_t BlockStartPos = Buffer.size();
@@ -3801,7 +3765,7 @@ static void emitDarwinBCHeaderAndTrailer(SmallVectorImpl<char> &Buffer,
}
/// Helper to write the header common to all bitcode files.
void BitcodeWriter::writeBitcodeHeader() {
static void writeBitcodeHeader(BitstreamWriter &Stream) {
// Emit the file header.
Stream.Emit((unsigned)'B', 8);
Stream.Emit((unsigned)'C', 8);
@@ -3811,6 +3775,22 @@ void BitcodeWriter::writeBitcodeHeader() {
Stream.Emit(0xD, 4);
}
BitcodeWriter::BitcodeWriter(SmallVectorImpl<char> &Buffer)
: Buffer(Buffer), Stream(new BitstreamWriter(Buffer)) {
writeBitcodeHeader(*Stream);
}
BitcodeWriter::~BitcodeWriter() = default;
void BitcodeWriter::writeModule(const Module *M,
bool ShouldPreserveUseListOrder,
const ModuleSummaryIndex *Index,
bool GenerateHash) {
ModuleBitcodeWriter ModuleWriter(
M, Buffer, *Stream, ShouldPreserveUseListOrder, Index, GenerateHash);
ModuleWriter.write();
}
/// WriteBitcodeToFile - Write the specified module to the specified output
/// stream.
void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out,
@@ -3826,10 +3806,8 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out,
if (TT.isOSDarwin() || TT.isOSBinFormatMachO())
Buffer.insert(Buffer.begin(), BWH_HeaderSize, 0);
// Emit the module into the buffer.
ModuleBitcodeWriter ModuleWriter(M, Buffer, ShouldPreserveUseListOrder, Index,
GenerateHash);
ModuleWriter.write();
BitcodeWriter Writer(Buffer);
Writer.writeModule(M, ShouldPreserveUseListOrder, Index, GenerateHash);
if (TT.isOSDarwin() || TT.isOSBinFormatMachO())
emitDarwinBCHeaderAndTrailer(Buffer, TT);
@@ -3838,7 +3816,7 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out,
Out.write((char*)&Buffer.front(), Buffer.size());
}
void IndexBitcodeWriter::writeIndex() {
void IndexBitcodeWriter::write() {
Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
SmallVector<unsigned, 1> Vals;
@@ -3872,7 +3850,10 @@ void llvm::WriteIndexToFile(
SmallVector<char, 0> Buffer;
Buffer.reserve(256 * 1024);
IndexBitcodeWriter IndexWriter(Buffer, Index, ModuleToSummariesForIndex);
BitstreamWriter Stream(Buffer);
writeBitcodeHeader(Stream);
IndexBitcodeWriter IndexWriter(Stream, Index, ModuleToSummariesForIndex);
IndexWriter.write();
Out.write((char *)&Buffer.front(), Buffer.size());