`llvm-cov convert-for-testing` only functions properly when the input binary contains a single source file. When the binary has multiple source files, a `Malformed coverage data` error will occur when the generated .covmapping is read back. This is because the testing format lacks support for indicating the size of its file records, and current implementation just assumes there's only one record in it. This patch fixes this problem by introducing a new testing format version. Changes to the code: - Add a new format version. The version number is stored in the the last 8 bytes of the orignial magic number field to be backward-compatible. - Output a LEB128 number before the file records section to indicate its size in the new version. - Change the format parsing code correspondingly. - Update the document to formalize the testing format. - Additionally, fix the bug when converting COFF binaries. Reviewed By: phosek, gulfem Differential Revision: https://reviews.llvm.org/D156611
286 lines
10 KiB
C++
286 lines
10 KiB
C++
//===- CoverageMappingWriter.cpp - Code coverage mapping writer -----------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file contains support for writing coverage mapping data for
|
|
// instrumentation based coverage.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/ProfileData/Coverage/CoverageMappingWriter.h"
|
|
#include "llvm/ADT/ArrayRef.h"
|
|
#include "llvm/ADT/SmallVector.h"
|
|
#include "llvm/ADT/StringExtras.h"
|
|
#include "llvm/ProfileData/InstrProf.h"
|
|
#include "llvm/Support/Compression.h"
|
|
#include "llvm/Support/LEB128.h"
|
|
#include "llvm/Support/raw_ostream.h"
|
|
#include <algorithm>
|
|
#include <cassert>
|
|
#include <limits>
|
|
#include <vector>
|
|
|
|
using namespace llvm;
|
|
using namespace coverage;
|
|
|
|
CoverageFilenamesSectionWriter::CoverageFilenamesSectionWriter(
|
|
ArrayRef<std::string> Filenames)
|
|
: Filenames(Filenames) {
|
|
#ifndef NDEBUG
|
|
StringSet<> NameSet;
|
|
for (StringRef Name : Filenames)
|
|
assert(NameSet.insert(Name).second && "Duplicate filename");
|
|
#endif
|
|
}
|
|
|
|
void CoverageFilenamesSectionWriter::write(raw_ostream &OS, bool Compress) {
|
|
std::string FilenamesStr;
|
|
{
|
|
raw_string_ostream FilenamesOS{FilenamesStr};
|
|
for (const auto &Filename : Filenames) {
|
|
encodeULEB128(Filename.size(), FilenamesOS);
|
|
FilenamesOS << Filename;
|
|
}
|
|
}
|
|
|
|
SmallVector<uint8_t, 128> CompressedStr;
|
|
bool doCompression = Compress && compression::zlib::isAvailable() &&
|
|
DoInstrProfNameCompression;
|
|
if (doCompression)
|
|
compression::zlib::compress(arrayRefFromStringRef(FilenamesStr),
|
|
CompressedStr,
|
|
compression::zlib::BestSizeCompression);
|
|
|
|
// ::= <num-filenames>
|
|
// <uncompressed-len>
|
|
// <compressed-len-or-zero>
|
|
// (<compressed-filenames> | <uncompressed-filenames>)
|
|
encodeULEB128(Filenames.size(), OS);
|
|
encodeULEB128(FilenamesStr.size(), OS);
|
|
encodeULEB128(doCompression ? CompressedStr.size() : 0U, OS);
|
|
OS << (doCompression ? toStringRef(CompressedStr) : StringRef(FilenamesStr));
|
|
}
|
|
|
|
namespace {
|
|
|
|
/// Gather only the expressions that are used by the mapping
|
|
/// regions in this function.
|
|
class CounterExpressionsMinimizer {
|
|
ArrayRef<CounterExpression> Expressions;
|
|
SmallVector<CounterExpression, 16> UsedExpressions;
|
|
std::vector<unsigned> AdjustedExpressionIDs;
|
|
|
|
public:
|
|
CounterExpressionsMinimizer(ArrayRef<CounterExpression> Expressions,
|
|
ArrayRef<CounterMappingRegion> MappingRegions)
|
|
: Expressions(Expressions) {
|
|
AdjustedExpressionIDs.resize(Expressions.size(), 0);
|
|
for (const auto &I : MappingRegions) {
|
|
mark(I.Count);
|
|
mark(I.FalseCount);
|
|
}
|
|
for (const auto &I : MappingRegions) {
|
|
gatherUsed(I.Count);
|
|
gatherUsed(I.FalseCount);
|
|
}
|
|
}
|
|
|
|
void mark(Counter C) {
|
|
if (!C.isExpression())
|
|
return;
|
|
unsigned ID = C.getExpressionID();
|
|
AdjustedExpressionIDs[ID] = 1;
|
|
mark(Expressions[ID].LHS);
|
|
mark(Expressions[ID].RHS);
|
|
}
|
|
|
|
void gatherUsed(Counter C) {
|
|
if (!C.isExpression() || !AdjustedExpressionIDs[C.getExpressionID()])
|
|
return;
|
|
AdjustedExpressionIDs[C.getExpressionID()] = UsedExpressions.size();
|
|
const auto &E = Expressions[C.getExpressionID()];
|
|
UsedExpressions.push_back(E);
|
|
gatherUsed(E.LHS);
|
|
gatherUsed(E.RHS);
|
|
}
|
|
|
|
ArrayRef<CounterExpression> getExpressions() const { return UsedExpressions; }
|
|
|
|
/// Adjust the given counter to correctly transition from the old
|
|
/// expression ids to the new expression ids.
|
|
Counter adjust(Counter C) const {
|
|
if (C.isExpression())
|
|
C = Counter::getExpression(AdjustedExpressionIDs[C.getExpressionID()]);
|
|
return C;
|
|
}
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
/// Encode the counter.
|
|
///
|
|
/// The encoding uses the following format:
|
|
/// Low 2 bits - Tag:
|
|
/// Counter::Zero(0) - A Counter with kind Counter::Zero
|
|
/// Counter::CounterValueReference(1) - A counter with kind
|
|
/// Counter::CounterValueReference
|
|
/// Counter::Expression(2) + CounterExpression::Subtract(0) -
|
|
/// A counter with kind Counter::Expression and an expression
|
|
/// with kind CounterExpression::Subtract
|
|
/// Counter::Expression(2) + CounterExpression::Add(1) -
|
|
/// A counter with kind Counter::Expression and an expression
|
|
/// with kind CounterExpression::Add
|
|
/// Remaining bits - Counter/Expression ID.
|
|
static unsigned encodeCounter(ArrayRef<CounterExpression> Expressions,
|
|
Counter C) {
|
|
unsigned Tag = unsigned(C.getKind());
|
|
if (C.isExpression())
|
|
Tag += Expressions[C.getExpressionID()].Kind;
|
|
unsigned ID = C.getCounterID();
|
|
assert(ID <=
|
|
(std::numeric_limits<unsigned>::max() >> Counter::EncodingTagBits));
|
|
return Tag | (ID << Counter::EncodingTagBits);
|
|
}
|
|
|
|
static void writeCounter(ArrayRef<CounterExpression> Expressions, Counter C,
|
|
raw_ostream &OS) {
|
|
encodeULEB128(encodeCounter(Expressions, C), OS);
|
|
}
|
|
|
|
void CoverageMappingWriter::write(raw_ostream &OS) {
|
|
// Check that we don't have any bogus regions.
|
|
assert(all_of(MappingRegions,
|
|
[](const CounterMappingRegion &CMR) {
|
|
return CMR.startLoc() <= CMR.endLoc();
|
|
}) &&
|
|
"Source region does not begin before it ends");
|
|
|
|
// Sort the regions in an ascending order by the file id and the starting
|
|
// location. Sort by region kinds to ensure stable order for tests.
|
|
llvm::stable_sort(MappingRegions, [](const CounterMappingRegion &LHS,
|
|
const CounterMappingRegion &RHS) {
|
|
if (LHS.FileID != RHS.FileID)
|
|
return LHS.FileID < RHS.FileID;
|
|
if (LHS.startLoc() != RHS.startLoc())
|
|
return LHS.startLoc() < RHS.startLoc();
|
|
return LHS.Kind < RHS.Kind;
|
|
});
|
|
|
|
// Write out the fileid -> filename mapping.
|
|
encodeULEB128(VirtualFileMapping.size(), OS);
|
|
for (const auto &FileID : VirtualFileMapping)
|
|
encodeULEB128(FileID, OS);
|
|
|
|
// Write out the expressions.
|
|
CounterExpressionsMinimizer Minimizer(Expressions, MappingRegions);
|
|
auto MinExpressions = Minimizer.getExpressions();
|
|
encodeULEB128(MinExpressions.size(), OS);
|
|
for (const auto &E : MinExpressions) {
|
|
writeCounter(MinExpressions, Minimizer.adjust(E.LHS), OS);
|
|
writeCounter(MinExpressions, Minimizer.adjust(E.RHS), OS);
|
|
}
|
|
|
|
// Write out the mapping regions.
|
|
// Split the regions into subarrays where each region in a
|
|
// subarray has a fileID which is the index of that subarray.
|
|
unsigned PrevLineStart = 0;
|
|
unsigned CurrentFileID = ~0U;
|
|
for (auto I = MappingRegions.begin(), E = MappingRegions.end(); I != E; ++I) {
|
|
if (I->FileID != CurrentFileID) {
|
|
// Ensure that all file ids have at least one mapping region.
|
|
assert(I->FileID == (CurrentFileID + 1));
|
|
// Find the number of regions with this file id.
|
|
unsigned RegionCount = 1;
|
|
for (auto J = I + 1; J != E && I->FileID == J->FileID; ++J)
|
|
++RegionCount;
|
|
// Start a new region sub-array.
|
|
encodeULEB128(RegionCount, OS);
|
|
|
|
CurrentFileID = I->FileID;
|
|
PrevLineStart = 0;
|
|
}
|
|
Counter Count = Minimizer.adjust(I->Count);
|
|
Counter FalseCount = Minimizer.adjust(I->FalseCount);
|
|
switch (I->Kind) {
|
|
case CounterMappingRegion::CodeRegion:
|
|
case CounterMappingRegion::GapRegion:
|
|
writeCounter(MinExpressions, Count, OS);
|
|
break;
|
|
case CounterMappingRegion::ExpansionRegion: {
|
|
assert(Count.isZero());
|
|
assert(I->ExpandedFileID <=
|
|
(std::numeric_limits<unsigned>::max() >>
|
|
Counter::EncodingCounterTagAndExpansionRegionTagBits));
|
|
// Mark an expansion region with a set bit that follows the counter tag,
|
|
// and pack the expanded file id into the remaining bits.
|
|
unsigned EncodedTagExpandedFileID =
|
|
(1 << Counter::EncodingTagBits) |
|
|
(I->ExpandedFileID
|
|
<< Counter::EncodingCounterTagAndExpansionRegionTagBits);
|
|
encodeULEB128(EncodedTagExpandedFileID, OS);
|
|
break;
|
|
}
|
|
case CounterMappingRegion::SkippedRegion:
|
|
assert(Count.isZero());
|
|
encodeULEB128(unsigned(I->Kind)
|
|
<< Counter::EncodingCounterTagAndExpansionRegionTagBits,
|
|
OS);
|
|
break;
|
|
case CounterMappingRegion::BranchRegion:
|
|
encodeULEB128(unsigned(I->Kind)
|
|
<< Counter::EncodingCounterTagAndExpansionRegionTagBits,
|
|
OS);
|
|
writeCounter(MinExpressions, Count, OS);
|
|
writeCounter(MinExpressions, FalseCount, OS);
|
|
break;
|
|
}
|
|
assert(I->LineStart >= PrevLineStart);
|
|
encodeULEB128(I->LineStart - PrevLineStart, OS);
|
|
encodeULEB128(I->ColumnStart, OS);
|
|
assert(I->LineEnd >= I->LineStart);
|
|
encodeULEB128(I->LineEnd - I->LineStart, OS);
|
|
encodeULEB128(I->ColumnEnd, OS);
|
|
PrevLineStart = I->LineStart;
|
|
}
|
|
// Ensure that all file ids have at least one mapping region.
|
|
assert(CurrentFileID == (VirtualFileMapping.size() - 1));
|
|
}
|
|
|
|
void TestingFormatWriter::write(raw_ostream &OS, TestingFormatVersion Version) {
|
|
auto ByteSwap = [](uint64_t N) {
|
|
return support::endian::byte_swap<uint64_t, support::endianness::little>(N);
|
|
};
|
|
|
|
// Output a 64bit magic number.
|
|
auto Magic = ByteSwap(TestingFormatMagic);
|
|
OS.write(reinterpret_cast<char *>(&Magic), sizeof(Magic));
|
|
|
|
// Output a 64bit version field.
|
|
auto VersionLittle = ByteSwap(uint64_t(Version));
|
|
OS.write(reinterpret_cast<char *>(&VersionLittle), sizeof(VersionLittle));
|
|
|
|
// Output the ProfileNames data.
|
|
encodeULEB128(ProfileNamesData.size(), OS);
|
|
encodeULEB128(ProfileNamesAddr, OS);
|
|
OS << ProfileNamesData;
|
|
|
|
// Version2 adds an extra field to indicate the size of the
|
|
// CoverageMappingData.
|
|
if (Version == TestingFormatVersion::Version2)
|
|
encodeULEB128(CoverageMappingData.size(), OS);
|
|
|
|
// Coverage mapping data is expected to have an alignment of 8.
|
|
for (unsigned Pad = offsetToAlignment(OS.tell(), Align(8)); Pad; --Pad)
|
|
OS.write(uint8_t(0));
|
|
OS << CoverageMappingData;
|
|
|
|
// Coverage records data is expected to have an alignment of 8.
|
|
for (unsigned Pad = offsetToAlignment(OS.tell(), Align(8)); Pad; --Pad)
|
|
OS.write(uint8_t(0));
|
|
OS << CoverageRecordsData;
|
|
}
|