Summary: 1. Do not create DFSan labels for the bytes which we do not trace. This is where we run out of labels at the first place. 2. When dumping the traces on the disk, make sure to offset the label identifiers by the number of the first byte in the trace range. 3. For the last label, make sure to write it at the last position of the trace bit string, as that label represents the input size, not any particular byte. Also fixed the bug with division in python which I've introduced when migrated the scripts to Python3 (`//` is required for integral division). Otherwise, the scripts are wasting too much time unsuccessfully trying to collect and process traces from the long inputs. For more context, see https://github.com/google/oss-fuzz/issues/1632#issuecomment-481761789 Reviewers: kcc Reviewed By: kcc Subscribers: delcypher, #sanitizers, llvm-commits Tags: #llvm, #sanitizers Differential Revision: https://reviews.llvm.org/D60538 llvm-svn: 358311
224 lines
7.9 KiB
C++
224 lines
7.9 KiB
C++
/*===- DataFlow.cpp - a standalone DataFlow tracer -------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
// An experimental data-flow tracer for fuzz targets.
|
|
// It is based on DFSan and SanitizerCoverage.
|
|
// https://clang.llvm.org/docs/DataFlowSanitizer.html
|
|
// https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-data-flow
|
|
//
|
|
// It executes the fuzz target on the given input while monitoring the
|
|
// data flow for every instrumented comparison instruction.
|
|
//
|
|
// The output shows which functions depend on which bytes of the input.
|
|
//
|
|
// Build:
|
|
// 1. Compile this file with -fsanitize=dataflow
|
|
// 2. Build the fuzz target with -g -fsanitize=dataflow
|
|
// -fsanitize-coverage=trace-pc-guard,pc-table,func,trace-cmp
|
|
// 3. Link those together with -fsanitize=dataflow
|
|
//
|
|
// -fsanitize-coverage=trace-cmp inserts callbacks around every comparison
|
|
// instruction, DFSan modifies the calls to pass the data flow labels.
|
|
// The callbacks update the data flow label for the current function.
|
|
// See e.g. __dfsw___sanitizer_cov_trace_cmp1 below.
|
|
//
|
|
// -fsanitize-coverage=trace-pc-guard,pc-table,func instruments function
|
|
// entries so that the comparison callback knows that current function.
|
|
//
|
|
//
|
|
// Run:
|
|
// # Collect data flow for INPUT_FILE, write to OUTPUT_FILE (default: stdout)
|
|
// ./a.out INPUT_FILE [OUTPUT_FILE]
|
|
//
|
|
// # Print all instrumented functions. llvm-symbolizer must be present in PATH
|
|
// ./a.out
|
|
//
|
|
// Example output:
|
|
// ===============
|
|
// F0 11111111111111
|
|
// F1 10000000000000
|
|
// ===============
|
|
// "FN xxxxxxxxxx": tells what bytes of the input does the function N depend on.
|
|
// The byte string is LEN+1 bytes. The last byte is set if the function
|
|
// depends on the input length.
|
|
//===----------------------------------------------------------------------===*/
|
|
|
|
#include <assert.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <stdint.h>
|
|
#include <string.h>
|
|
|
|
#include <execinfo.h> // backtrace_symbols_fd
|
|
|
|
#include <sanitizer/dfsan_interface.h>
|
|
|
|
extern "C" {
|
|
extern int LLVMFuzzerTestOneInput(const unsigned char *Data, size_t Size);
|
|
__attribute__((weak)) extern int LLVMFuzzerInitialize(int *argc, char ***argv);
|
|
} // extern "C"
|
|
|
|
static size_t InputLen;
|
|
static size_t InputLabelBeg;
|
|
static size_t InputLabelEnd;
|
|
static size_t InputSizeLabel;
|
|
static size_t NumFuncs;
|
|
static const uintptr_t *FuncsBeg;
|
|
static __thread size_t CurrentFunc;
|
|
static dfsan_label *FuncLabels; // Array of NumFuncs elements.
|
|
static char *PrintableStringForLabel; // InputLen + 2 bytes.
|
|
static bool LabelSeen[1 << 8 * sizeof(dfsan_label)];
|
|
|
|
// Prints all instrumented functions.
|
|
static int PrintFunctions() {
|
|
// We don't have the symbolizer integrated with dfsan yet.
|
|
// So use backtrace_symbols_fd and pipe it through llvm-symbolizer.
|
|
// TODO(kcc): this is pretty ugly and may break in lots of ways.
|
|
// We'll need to make a proper in-process symbolizer work with DFSan.
|
|
FILE *Pipe = popen("sed 's/(+/ /g; s/).*//g' "
|
|
"| llvm-symbolizer "
|
|
"| grep 'dfs\\$' "
|
|
"| sed 's/dfs\\$//g'", "w");
|
|
for (size_t I = 0; I < NumFuncs; I++) {
|
|
uintptr_t PC = FuncsBeg[I * 2];
|
|
void *const Buf[1] = {(void*)PC};
|
|
backtrace_symbols_fd(Buf, 1, fileno(Pipe));
|
|
}
|
|
pclose(Pipe);
|
|
return 0;
|
|
}
|
|
|
|
extern "C"
|
|
void SetBytesForLabel(dfsan_label L, char *Bytes) {
|
|
if (LabelSeen[L])
|
|
return;
|
|
LabelSeen[L] = true;
|
|
assert(L);
|
|
if (L < InputSizeLabel) {
|
|
Bytes[L + InputLabelBeg - 1] = '1';
|
|
} else if (L == InputSizeLabel) {
|
|
Bytes[InputLen] = '1';
|
|
} else {
|
|
auto *DLI = dfsan_get_label_info(L);
|
|
SetBytesForLabel(DLI->l1, Bytes);
|
|
SetBytesForLabel(DLI->l2, Bytes);
|
|
}
|
|
}
|
|
|
|
static char *GetPrintableStringForLabel(dfsan_label L) {
|
|
memset(PrintableStringForLabel, '0', InputLen + 1);
|
|
PrintableStringForLabel[InputLen + 1] = 0;
|
|
memset(LabelSeen, 0, sizeof(LabelSeen));
|
|
SetBytesForLabel(L, PrintableStringForLabel);
|
|
return PrintableStringForLabel;
|
|
}
|
|
|
|
static void PrintDataFlow(FILE *Out) {
|
|
for (size_t I = 0; I < NumFuncs; I++)
|
|
if (FuncLabels[I])
|
|
fprintf(Out, "F%zd %s\n", I, GetPrintableStringForLabel(FuncLabels[I]));
|
|
}
|
|
|
|
int main(int argc, char **argv) {
|
|
if (LLVMFuzzerInitialize)
|
|
LLVMFuzzerInitialize(&argc, &argv);
|
|
if (argc == 1)
|
|
return PrintFunctions();
|
|
assert(argc == 4 || argc == 5);
|
|
InputLabelBeg = atoi(argv[1]);
|
|
InputLabelEnd = atoi(argv[2]);
|
|
assert(InputLabelBeg < InputLabelEnd);
|
|
|
|
const char *Input = argv[3];
|
|
fprintf(stderr, "INFO: reading '%s'\n", Input);
|
|
FILE *In = fopen(Input, "r");
|
|
assert(In);
|
|
fseek(In, 0, SEEK_END);
|
|
InputLen = ftell(In);
|
|
fseek(In, 0, SEEK_SET);
|
|
unsigned char *Buf = (unsigned char*)malloc(InputLen);
|
|
size_t NumBytesRead = fread(Buf, 1, InputLen, In);
|
|
assert(NumBytesRead == InputLen);
|
|
PrintableStringForLabel = (char*)malloc(InputLen + 2);
|
|
fclose(In);
|
|
|
|
fprintf(stderr, "INFO: running '%s'\n", Input);
|
|
for (size_t I = 1; I <= InputLen; I++) {
|
|
size_t Idx = I - 1;
|
|
if (Idx >= InputLabelBeg && Idx < InputLabelEnd) {
|
|
dfsan_label L = dfsan_create_label("", nullptr);
|
|
assert(L == I - InputLabelBeg);
|
|
dfsan_set_label(L, Buf + Idx, 1);
|
|
}
|
|
}
|
|
dfsan_label SizeL = dfsan_create_label("", nullptr);
|
|
InputSizeLabel = SizeL;
|
|
assert(InputSizeLabel == InputLabelEnd - InputLabelBeg + 1);
|
|
dfsan_set_label(SizeL, &InputLen, sizeof(InputLen));
|
|
|
|
LLVMFuzzerTestOneInput(Buf, InputLen);
|
|
free(Buf);
|
|
|
|
bool OutIsStdout = argc == 4;
|
|
fprintf(stderr, "INFO: writing dataflow to %s\n",
|
|
OutIsStdout ? "<stdout>" : argv[4]);
|
|
FILE *Out = OutIsStdout ? stdout : fopen(argv[4], "w");
|
|
PrintDataFlow(Out);
|
|
if (!OutIsStdout) fclose(Out);
|
|
}
|
|
|
|
extern "C" {
|
|
|
|
void __sanitizer_cov_trace_pc_guard_init(uint32_t *start,
|
|
uint32_t *stop) {
|
|
assert(NumFuncs == 0 && "This tool does not support DSOs");
|
|
assert(start < stop && "The code is not instrumented for coverage");
|
|
if (start == stop || *start) return; // Initialize only once.
|
|
for (uint32_t *x = start; x < stop; x++)
|
|
*x = ++NumFuncs; // The first index is 1.
|
|
FuncLabels = (dfsan_label*)calloc(NumFuncs, sizeof(dfsan_label));
|
|
fprintf(stderr, "INFO: %zd instrumented function(s) observed\n", NumFuncs);
|
|
}
|
|
|
|
void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg,
|
|
const uintptr_t *pcs_end) {
|
|
assert(NumFuncs == (pcs_end - pcs_beg) / 2);
|
|
FuncsBeg = pcs_beg;
|
|
}
|
|
|
|
void __sanitizer_cov_trace_pc_indir(uint64_t x){} // unused.
|
|
|
|
void __sanitizer_cov_trace_pc_guard(uint32_t *guard){
|
|
uint32_t FuncNum = *guard - 1; // Guards start from 1.
|
|
assert(FuncNum < NumFuncs);
|
|
CurrentFunc = FuncNum;
|
|
}
|
|
|
|
void __dfsw___sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases,
|
|
dfsan_label L1, dfsan_label UnusedL) {
|
|
assert(CurrentFunc < NumFuncs);
|
|
FuncLabels[CurrentFunc] = dfsan_union(FuncLabels[CurrentFunc], L1);
|
|
}
|
|
|
|
#define HOOK(Name, Type) \
|
|
void Name(Type Arg1, Type Arg2, dfsan_label L1, dfsan_label L2) { \
|
|
assert(CurrentFunc < NumFuncs); \
|
|
FuncLabels[CurrentFunc] = \
|
|
dfsan_union(FuncLabels[CurrentFunc], dfsan_union(L1, L2)); \
|
|
}
|
|
|
|
HOOK(__dfsw___sanitizer_cov_trace_const_cmp1, uint8_t)
|
|
HOOK(__dfsw___sanitizer_cov_trace_const_cmp2, uint16_t)
|
|
HOOK(__dfsw___sanitizer_cov_trace_const_cmp4, uint32_t)
|
|
HOOK(__dfsw___sanitizer_cov_trace_const_cmp8, uint64_t)
|
|
HOOK(__dfsw___sanitizer_cov_trace_cmp1, uint8_t)
|
|
HOOK(__dfsw___sanitizer_cov_trace_cmp2, uint16_t)
|
|
HOOK(__dfsw___sanitizer_cov_trace_cmp4, uint32_t)
|
|
HOOK(__dfsw___sanitizer_cov_trace_cmp8, uint64_t)
|
|
|
|
} // extern "C"
|