[clang-fuzzer] Add a tiny tool to generate a fuzzing dictionary for clang

It should be useful clang-fuzzer itself, though my own motivation is
to use this in fuzzing clang-pseudo. (clang-tools-extra/pseudo/fuzzer).

Differential Revision: https://reviews.llvm.org/D125166
This commit is contained in:
Sam McCall
2022-05-07 15:10:51 +02:00
parent 017abbb258
commit a2f2dfde48
5 changed files with 65 additions and 0 deletions

View File

@@ -58,6 +58,7 @@ list(APPEND CLANG_TEST_DEPS
apinotes-test
c-index-test
clang
clang-fuzzer-dictionary
clang-resource-headers
clang-format
clang-tblgen

View File

@@ -0,0 +1,4 @@
RUN: clang-fuzzer-dictionary | FileCheck %s
CHECK-DAG: or="or"
CHECK-DAG: catch="catch"
CHECK-DAG: rawstart="R\x22("

View File

@@ -109,6 +109,7 @@ endif()
add_clang_subdirectory(handle-cxx)
add_clang_subdirectory(handle-llvm)
add_clang_subdirectory(dictionary)
add_clang_executable(clang-fuzzer
EXCLUDE_FROM_ALL

View File

@@ -0,0 +1,2 @@
add_clang_executable(clang-fuzzer-dictionary dictionary.c)

View File

@@ -0,0 +1,57 @@
//===-- dictionary.c - Generate fuzzing dictionary for clang --------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This binary emits a fuzzing dictionary describing strings that are
// significant to the clang parser: keywords and other tokens.
//
// The dictionary can be used by a fuzzer to reach interesting parser states
// much more quickly.
//
// The output is a single-file dictionary supported by libFuzzer and AFL:
// https://llvm.org/docs/LibFuzzer.html#dictionaries
//
//===----------------------------------------------------------------------===//
#include <stdio.h>
static void emit(const char *Name, const char *Spelling) {
static char Hex[] = "0123456789abcdef";
printf("%s=\"", Name);
unsigned char C;
while ((C = *Spelling++)) {
if (C < 32 || C == '"' || C == '\\')
printf("\\x%c%c", Hex[C>>4], Hex[C%16]);
else
printf("%c", C);
}
printf("\"\n");
}
int main(int argc, char **argv) {
#define PUNCTUATOR(Name, Spelling) emit(#Name, Spelling);
#define KEYWORD(Name, Criteria) emit(#Name, #Name);
#define PPKEYWORD(Name) emit(#Name, #Name);
#define CXX_KEYWORD_OPERATOR(Name, Equivalent) emit(#Name, #Name);
#define OBJC_AT_KEYWORD(Name) emit(#Name, #Name);
#define ALIAS(Spelling, Equivalent, Criteria) emit(Spelling, Spelling);
#include "clang/Basic/TokenKinds.def"
// Some other sub-token chunks significant to the lexer.
emit("ucn16", "\\u0000");
emit("ucn32", "\\U00000000");
emit("rawstart", "R\"(");
emit("rawend", ")\"");
emit("quote", "\"");
emit("squote", "'");
emit("u8quote", "u8\"");
emit("u16quote", "u\"");
emit("u32quote", "U\"");
emit("esc_nl", "\\\n");
emit("hex", "0x");
}