This is a commit with the following changes:
* Remove `ExcludedPreprocessorDirectiveSkipMapping` and related functionality
Removes `ExcludedPreprocessorDirectiveSkipMapping`; its intended benefit for fast skipping of excluded directived blocks
will be superseded by a follow-up patch in the series that will use dependency scanning lexing for the same purpose.
* Refactor dependency scanning to produce pre-lexed preprocessor directive tokens, instead of minimized sources
Replaces the "source minimization" mechanism with a mechanism that produces lexed dependency directives tokens.
* Make the special lexing for dependency scanning a first-class feature of the `Preprocessor` and `Lexer`
This is bringing the following benefits:
* Full access to the preprocessor state during dependency scanning. E.g. a component can see what includes were taken and where they were located in the actual sources.
* Improved performance for dependency scanning. Measurements with a release+thin-LTO build shows ~ -11% reduction in wall time.
* Opportunity to use dependency scanning lexing to speed-up skipping of excluded conditional blocks during normal preprocessing (as follow-up, not part of this patch).
For normal preprocessing measurements show differences are below the noise level.
Since, after this change, we don't minimize sources and pass them in place of the real sources, `DependencyScanningFilesystem` is not technically necessary, but it has valuable performance benefits for caching file `stat`s along with the results of scanning the sources. So the setup of using the `DependencyScanningFilesystem` during a dependency scan remains.
Differential Revision: https://reviews.llvm.org/D125486
Differential Revision: https://reviews.llvm.org/D125487
Differential Revision: https://reviews.llvm.org/D125488
863 lines
26 KiB
C++
863 lines
26 KiB
C++
//===- DependencyDirectivesScanner.cpp ------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
///
|
|
/// \file
|
|
/// This is the interface for scanning header and source files to get the
|
|
/// minimum necessary preprocessor directives for evaluating includes. It
|
|
/// reduces the source down to #define, #include, #import, @import, and any
|
|
/// conditional preprocessor logic that contains one of those.
|
|
///
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "clang/Lex/DependencyDirectivesScanner.h"
|
|
#include "clang/Basic/CharInfo.h"
|
|
#include "clang/Basic/Diagnostic.h"
|
|
#include "clang/Lex/LexDiagnostic.h"
|
|
#include "clang/Lex/Lexer.h"
|
|
#include "llvm/ADT/ScopeExit.h"
|
|
#include "llvm/ADT/SmallString.h"
|
|
#include "llvm/ADT/StringMap.h"
|
|
#include "llvm/ADT/StringSwitch.h"
|
|
|
|
using namespace clang;
|
|
using namespace clang::dependency_directives_scan;
|
|
using namespace llvm;
|
|
|
|
namespace {
|
|
|
|
struct DirectiveWithTokens {
|
|
DirectiveKind Kind;
|
|
unsigned NumTokens;
|
|
|
|
DirectiveWithTokens(DirectiveKind Kind, unsigned NumTokens)
|
|
: Kind(Kind), NumTokens(NumTokens) {}
|
|
};
|
|
|
|
/// Does an efficient "scan" of the sources to detect the presence of
|
|
/// preprocessor (or module import) directives and collects the raw lexed tokens
|
|
/// for those directives so that the \p Lexer can "replay" them when the file is
|
|
/// included.
|
|
///
|
|
/// Note that the behavior of the raw lexer is affected by the language mode,
|
|
/// while at this point we want to do a scan and collect tokens once,
|
|
/// irrespective of the language mode that the file will get included in. To
|
|
/// compensate for that the \p Lexer, while "replaying", will adjust a token
|
|
/// where appropriate, when it could affect the preprocessor's state.
|
|
/// For example in a directive like
|
|
///
|
|
/// \code
|
|
/// #if __has_cpp_attribute(clang::fallthrough)
|
|
/// \endcode
|
|
///
|
|
/// The preprocessor needs to see '::' as 'tok::coloncolon' instead of 2
|
|
/// 'tok::colon'. The \p Lexer will adjust if it sees consecutive 'tok::colon'
|
|
/// while in C++ mode.
|
|
struct Scanner {
|
|
Scanner(StringRef Input,
|
|
SmallVectorImpl<dependency_directives_scan::Token> &Tokens,
|
|
DiagnosticsEngine *Diags, SourceLocation InputSourceLoc)
|
|
: Input(Input), Tokens(Tokens), Diags(Diags),
|
|
InputSourceLoc(InputSourceLoc), LangOpts(getLangOptsForDepScanning()),
|
|
TheLexer(InputSourceLoc, LangOpts, Input.begin(), Input.begin(),
|
|
Input.end()) {}
|
|
|
|
static LangOptions getLangOptsForDepScanning() {
|
|
LangOptions LangOpts;
|
|
// Set the lexer to use 'tok::at' for '@', instead of 'tok::unknown'.
|
|
LangOpts.ObjC = true;
|
|
LangOpts.LineComment = true;
|
|
return LangOpts;
|
|
}
|
|
|
|
/// Lex the provided source and emit the directive tokens.
|
|
///
|
|
/// \returns True on error.
|
|
bool scan(SmallVectorImpl<Directive> &Directives);
|
|
|
|
private:
|
|
/// Lexes next token and advances \p First and the \p Lexer.
|
|
LLVM_NODISCARD dependency_directives_scan::Token &
|
|
lexToken(const char *&First, const char *const End);
|
|
|
|
dependency_directives_scan::Token &lexIncludeFilename(const char *&First,
|
|
const char *const End);
|
|
|
|
/// Lexes next token and if it is identifier returns its string, otherwise
|
|
/// it skips the current line and returns \p None.
|
|
///
|
|
/// In any case (whatever the token kind) \p First and the \p Lexer will
|
|
/// advance beyond the token.
|
|
LLVM_NODISCARD Optional<StringRef>
|
|
tryLexIdentifierOrSkipLine(const char *&First, const char *const End);
|
|
|
|
/// Used when it is certain that next token is an identifier.
|
|
LLVM_NODISCARD StringRef lexIdentifier(const char *&First,
|
|
const char *const End);
|
|
|
|
/// Lexes next token and returns true iff it is an identifier that matches \p
|
|
/// Id, otherwise it skips the current line and returns false.
|
|
///
|
|
/// In any case (whatever the token kind) \p First and the \p Lexer will
|
|
/// advance beyond the token.
|
|
LLVM_NODISCARD bool isNextIdentifierOrSkipLine(StringRef Id,
|
|
const char *&First,
|
|
const char *const End);
|
|
|
|
LLVM_NODISCARD bool scanImpl(const char *First, const char *const End);
|
|
LLVM_NODISCARD bool lexPPLine(const char *&First, const char *const End);
|
|
LLVM_NODISCARD bool lexAt(const char *&First, const char *const End);
|
|
LLVM_NODISCARD bool lexModule(const char *&First, const char *const End);
|
|
LLVM_NODISCARD bool lexDefine(const char *HashLoc, const char *&First,
|
|
const char *const End);
|
|
LLVM_NODISCARD bool lexPragma(const char *&First, const char *const End);
|
|
LLVM_NODISCARD bool lexEndif(const char *&First, const char *const End);
|
|
LLVM_NODISCARD bool lexDefault(DirectiveKind Kind, const char *&First,
|
|
const char *const End);
|
|
LLVM_NODISCARD bool lexModuleDirectiveBody(DirectiveKind Kind,
|
|
const char *&First,
|
|
const char *const End);
|
|
void lexPPDirectiveBody(const char *&First, const char *const End);
|
|
|
|
DirectiveWithTokens &pushDirective(DirectiveKind Kind) {
|
|
Tokens.append(CurDirToks);
|
|
DirsWithToks.emplace_back(Kind, CurDirToks.size());
|
|
CurDirToks.clear();
|
|
return DirsWithToks.back();
|
|
}
|
|
void popDirective() {
|
|
Tokens.pop_back_n(DirsWithToks.pop_back_val().NumTokens);
|
|
}
|
|
DirectiveKind topDirective() const {
|
|
return DirsWithToks.empty() ? pp_none : DirsWithToks.back().Kind;
|
|
}
|
|
|
|
unsigned getOffsetAt(const char *CurPtr) const {
|
|
return CurPtr - Input.data();
|
|
}
|
|
|
|
/// Reports a diagnostic if the diagnostic engine is provided. Always returns
|
|
/// true at the end.
|
|
bool reportError(const char *CurPtr, unsigned Err);
|
|
|
|
StringMap<char> SplitIds;
|
|
StringRef Input;
|
|
SmallVectorImpl<dependency_directives_scan::Token> &Tokens;
|
|
DiagnosticsEngine *Diags;
|
|
SourceLocation InputSourceLoc;
|
|
|
|
/// Keeps track of the tokens for the currently lexed directive. Once a
|
|
/// directive is fully lexed and "committed" then the tokens get appended to
|
|
/// \p Tokens and \p CurDirToks is cleared for the next directive.
|
|
SmallVector<dependency_directives_scan::Token, 32> CurDirToks;
|
|
/// The directives that were lexed along with the number of tokens that each
|
|
/// directive contains. The tokens of all the directives are kept in \p Tokens
|
|
/// vector, in the same order as the directives order in \p DirsWithToks.
|
|
SmallVector<DirectiveWithTokens, 64> DirsWithToks;
|
|
LangOptions LangOpts;
|
|
Lexer TheLexer;
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
bool Scanner::reportError(const char *CurPtr, unsigned Err) {
|
|
if (!Diags)
|
|
return true;
|
|
assert(CurPtr >= Input.data() && "invalid buffer ptr");
|
|
Diags->Report(InputSourceLoc.getLocWithOffset(getOffsetAt(CurPtr)), Err);
|
|
return true;
|
|
}
|
|
|
|
static void skipOverSpaces(const char *&First, const char *const End) {
|
|
while (First != End && isHorizontalWhitespace(*First))
|
|
++First;
|
|
}
|
|
|
|
LLVM_NODISCARD static bool isRawStringLiteral(const char *First,
|
|
const char *Current) {
|
|
assert(First <= Current);
|
|
|
|
// Check if we can even back up.
|
|
if (*Current != '"' || First == Current)
|
|
return false;
|
|
|
|
// Check for an "R".
|
|
--Current;
|
|
if (*Current != 'R')
|
|
return false;
|
|
if (First == Current || !isAsciiIdentifierContinue(*--Current))
|
|
return true;
|
|
|
|
// Check for a prefix of "u", "U", or "L".
|
|
if (*Current == 'u' || *Current == 'U' || *Current == 'L')
|
|
return First == Current || !isAsciiIdentifierContinue(*--Current);
|
|
|
|
// Check for a prefix of "u8".
|
|
if (*Current != '8' || First == Current || *Current-- != 'u')
|
|
return false;
|
|
return First == Current || !isAsciiIdentifierContinue(*--Current);
|
|
}
|
|
|
|
static void skipRawString(const char *&First, const char *const End) {
|
|
assert(First[0] == '"');
|
|
assert(First[-1] == 'R');
|
|
|
|
const char *Last = ++First;
|
|
while (Last != End && *Last != '(')
|
|
++Last;
|
|
if (Last == End) {
|
|
First = Last; // Hit the end... just give up.
|
|
return;
|
|
}
|
|
|
|
StringRef Terminator(First, Last - First);
|
|
for (;;) {
|
|
// Move First to just past the next ")".
|
|
First = Last;
|
|
while (First != End && *First != ')')
|
|
++First;
|
|
if (First == End)
|
|
return;
|
|
++First;
|
|
|
|
// Look ahead for the terminator sequence.
|
|
Last = First;
|
|
while (Last != End && size_t(Last - First) < Terminator.size() &&
|
|
Terminator[Last - First] == *Last)
|
|
++Last;
|
|
|
|
// Check if we hit it (or the end of the file).
|
|
if (Last == End) {
|
|
First = Last;
|
|
return;
|
|
}
|
|
if (size_t(Last - First) < Terminator.size())
|
|
continue;
|
|
if (*Last != '"')
|
|
continue;
|
|
First = Last + 1;
|
|
return;
|
|
}
|
|
}
|
|
|
|
// Returns the length of EOL, either 0 (no end-of-line), 1 (\n) or 2 (\r\n)
|
|
static unsigned isEOL(const char *First, const char *const End) {
|
|
if (First == End)
|
|
return 0;
|
|
if (End - First > 1 && isVerticalWhitespace(First[0]) &&
|
|
isVerticalWhitespace(First[1]) && First[0] != First[1])
|
|
return 2;
|
|
return !!isVerticalWhitespace(First[0]);
|
|
}
|
|
|
|
static void skipString(const char *&First, const char *const End) {
|
|
assert(*First == '\'' || *First == '"' || *First == '<');
|
|
const char Terminator = *First == '<' ? '>' : *First;
|
|
for (++First; First != End && *First != Terminator; ++First) {
|
|
// String and character literals don't extend past the end of the line.
|
|
if (isVerticalWhitespace(*First))
|
|
return;
|
|
if (*First != '\\')
|
|
continue;
|
|
// Skip past backslash to the next character. This ensures that the
|
|
// character right after it is skipped as well, which matters if it's
|
|
// the terminator.
|
|
if (++First == End)
|
|
return;
|
|
if (!isWhitespace(*First))
|
|
continue;
|
|
// Whitespace after the backslash might indicate a line continuation.
|
|
const char *FirstAfterBackslashPastSpace = First;
|
|
skipOverSpaces(FirstAfterBackslashPastSpace, End);
|
|
if (unsigned NLSize = isEOL(FirstAfterBackslashPastSpace, End)) {
|
|
// Advance the character pointer to the next line for the next
|
|
// iteration.
|
|
First = FirstAfterBackslashPastSpace + NLSize - 1;
|
|
}
|
|
}
|
|
if (First != End)
|
|
++First; // Finish off the string.
|
|
}
|
|
|
|
// Returns the length of the skipped newline
|
|
static unsigned skipNewline(const char *&First, const char *End) {
|
|
if (First == End)
|
|
return 0;
|
|
assert(isVerticalWhitespace(*First));
|
|
unsigned Len = isEOL(First, End);
|
|
assert(Len && "expected newline");
|
|
First += Len;
|
|
return Len;
|
|
}
|
|
|
|
static bool wasLineContinuation(const char *First, unsigned EOLLen) {
|
|
return *(First - (int)EOLLen - 1) == '\\';
|
|
}
|
|
|
|
static void skipToNewlineRaw(const char *&First, const char *const End) {
|
|
for (;;) {
|
|
if (First == End)
|
|
return;
|
|
|
|
unsigned Len = isEOL(First, End);
|
|
if (Len)
|
|
return;
|
|
|
|
do {
|
|
if (++First == End)
|
|
return;
|
|
Len = isEOL(First, End);
|
|
} while (!Len);
|
|
|
|
if (First[-1] != '\\')
|
|
return;
|
|
|
|
First += Len;
|
|
// Keep skipping lines...
|
|
}
|
|
}
|
|
|
|
static void skipLineComment(const char *&First, const char *const End) {
|
|
assert(First[0] == '/' && First[1] == '/');
|
|
First += 2;
|
|
skipToNewlineRaw(First, End);
|
|
}
|
|
|
|
static void skipBlockComment(const char *&First, const char *const End) {
|
|
assert(First[0] == '/' && First[1] == '*');
|
|
if (End - First < 4) {
|
|
First = End;
|
|
return;
|
|
}
|
|
for (First += 3; First != End; ++First)
|
|
if (First[-1] == '*' && First[0] == '/') {
|
|
++First;
|
|
return;
|
|
}
|
|
}
|
|
|
|
/// \returns True if the current single quotation mark character is a C++ 14
|
|
/// digit separator.
|
|
static bool isQuoteCppDigitSeparator(const char *const Start,
|
|
const char *const Cur,
|
|
const char *const End) {
|
|
assert(*Cur == '\'' && "expected quotation character");
|
|
// skipLine called in places where we don't expect a valid number
|
|
// body before `start` on the same line, so always return false at the start.
|
|
if (Start == Cur)
|
|
return false;
|
|
// The previous character must be a valid PP number character.
|
|
// Make sure that the L, u, U, u8 prefixes don't get marked as a
|
|
// separator though.
|
|
char Prev = *(Cur - 1);
|
|
if (Prev == 'L' || Prev == 'U' || Prev == 'u')
|
|
return false;
|
|
if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u')
|
|
return false;
|
|
if (!isPreprocessingNumberBody(Prev))
|
|
return false;
|
|
// The next character should be a valid identifier body character.
|
|
return (Cur + 1) < End && isAsciiIdentifierContinue(*(Cur + 1));
|
|
}
|
|
|
|
static void skipLine(const char *&First, const char *const End) {
|
|
for (;;) {
|
|
assert(First <= End);
|
|
if (First == End)
|
|
return;
|
|
|
|
if (isVerticalWhitespace(*First)) {
|
|
skipNewline(First, End);
|
|
return;
|
|
}
|
|
const char *Start = First;
|
|
while (First != End && !isVerticalWhitespace(*First)) {
|
|
// Iterate over strings correctly to avoid comments and newlines.
|
|
if (*First == '"' ||
|
|
(*First == '\'' && !isQuoteCppDigitSeparator(Start, First, End))) {
|
|
if (isRawStringLiteral(Start, First))
|
|
skipRawString(First, End);
|
|
else
|
|
skipString(First, End);
|
|
continue;
|
|
}
|
|
|
|
// Iterate over comments correctly.
|
|
if (*First != '/' || End - First < 2) {
|
|
++First;
|
|
continue;
|
|
}
|
|
|
|
if (First[1] == '/') {
|
|
// "//...".
|
|
skipLineComment(First, End);
|
|
continue;
|
|
}
|
|
|
|
if (First[1] != '*') {
|
|
++First;
|
|
continue;
|
|
}
|
|
|
|
// "/*...*/".
|
|
skipBlockComment(First, End);
|
|
}
|
|
if (First == End)
|
|
return;
|
|
|
|
// Skip over the newline.
|
|
unsigned Len = skipNewline(First, End);
|
|
if (!wasLineContinuation(First, Len)) // Continue past line-continuations.
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void skipDirective(StringRef Name, const char *&First,
|
|
const char *const End) {
|
|
if (llvm::StringSwitch<bool>(Name)
|
|
.Case("warning", true)
|
|
.Case("error", true)
|
|
.Default(false))
|
|
// Do not process quotes or comments.
|
|
skipToNewlineRaw(First, End);
|
|
else
|
|
skipLine(First, End);
|
|
}
|
|
|
|
static void skipWhitespace(const char *&First, const char *const End) {
|
|
for (;;) {
|
|
assert(First <= End);
|
|
skipOverSpaces(First, End);
|
|
|
|
if (End - First < 2)
|
|
return;
|
|
|
|
if (First[0] == '\\' && isVerticalWhitespace(First[1])) {
|
|
skipNewline(++First, End);
|
|
continue;
|
|
}
|
|
|
|
// Check for a non-comment character.
|
|
if (First[0] != '/')
|
|
return;
|
|
|
|
// "// ...".
|
|
if (First[1] == '/') {
|
|
skipLineComment(First, End);
|
|
return;
|
|
}
|
|
|
|
// Cannot be a comment.
|
|
if (First[1] != '*')
|
|
return;
|
|
|
|
// "/*...*/".
|
|
skipBlockComment(First, End);
|
|
}
|
|
}
|
|
|
|
bool Scanner::lexModuleDirectiveBody(DirectiveKind Kind, const char *&First,
|
|
const char *const End) {
|
|
const char *DirectiveLoc = Input.data() + CurDirToks.front().Offset;
|
|
for (;;) {
|
|
const dependency_directives_scan::Token &Tok = lexToken(First, End);
|
|
if (Tok.is(tok::eof))
|
|
return reportError(
|
|
DirectiveLoc,
|
|
diag::err_dep_source_scanner_missing_semi_after_at_import);
|
|
if (Tok.is(tok::semi))
|
|
break;
|
|
}
|
|
pushDirective(Kind);
|
|
skipWhitespace(First, End);
|
|
if (First == End)
|
|
return false;
|
|
if (!isVerticalWhitespace(*First))
|
|
return reportError(
|
|
DirectiveLoc, diag::err_dep_source_scanner_unexpected_tokens_at_import);
|
|
skipNewline(First, End);
|
|
return false;
|
|
}
|
|
|
|
dependency_directives_scan::Token &Scanner::lexToken(const char *&First,
|
|
const char *const End) {
|
|
clang::Token Tok;
|
|
TheLexer.LexFromRawLexer(Tok);
|
|
First = Input.data() + TheLexer.getCurrentBufferOffset();
|
|
assert(First <= End);
|
|
|
|
unsigned Offset = TheLexer.getCurrentBufferOffset() - Tok.getLength();
|
|
CurDirToks.emplace_back(Offset, Tok.getLength(), Tok.getKind(),
|
|
Tok.getFlags());
|
|
return CurDirToks.back();
|
|
}
|
|
|
|
dependency_directives_scan::Token &
|
|
Scanner::lexIncludeFilename(const char *&First, const char *const End) {
|
|
clang::Token Tok;
|
|
TheLexer.LexIncludeFilename(Tok);
|
|
First = Input.data() + TheLexer.getCurrentBufferOffset();
|
|
assert(First <= End);
|
|
|
|
unsigned Offset = TheLexer.getCurrentBufferOffset() - Tok.getLength();
|
|
CurDirToks.emplace_back(Offset, Tok.getLength(), Tok.getKind(),
|
|
Tok.getFlags());
|
|
return CurDirToks.back();
|
|
}
|
|
|
|
void Scanner::lexPPDirectiveBody(const char *&First, const char *const End) {
|
|
while (true) {
|
|
const dependency_directives_scan::Token &Tok = lexToken(First, End);
|
|
if (Tok.is(tok::eod))
|
|
break;
|
|
}
|
|
}
|
|
|
|
LLVM_NODISCARD Optional<StringRef>
|
|
Scanner::tryLexIdentifierOrSkipLine(const char *&First, const char *const End) {
|
|
const dependency_directives_scan::Token &Tok = lexToken(First, End);
|
|
if (Tok.isNot(tok::raw_identifier)) {
|
|
if (!Tok.is(tok::eod))
|
|
skipLine(First, End);
|
|
return None;
|
|
}
|
|
|
|
bool NeedsCleaning = Tok.Flags & clang::Token::NeedsCleaning;
|
|
if (LLVM_LIKELY(!NeedsCleaning))
|
|
return Input.slice(Tok.Offset, Tok.getEnd());
|
|
|
|
SmallString<64> Spelling;
|
|
Spelling.resize(Tok.Length);
|
|
|
|
unsigned SpellingLength = 0;
|
|
const char *BufPtr = Input.begin() + Tok.Offset;
|
|
const char *AfterIdent = Input.begin() + Tok.getEnd();
|
|
while (BufPtr < AfterIdent) {
|
|
unsigned Size;
|
|
Spelling[SpellingLength++] =
|
|
Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts);
|
|
BufPtr += Size;
|
|
}
|
|
|
|
return SplitIds.try_emplace(StringRef(Spelling.begin(), SpellingLength), 0)
|
|
.first->first();
|
|
}
|
|
|
|
StringRef Scanner::lexIdentifier(const char *&First, const char *const End) {
|
|
Optional<StringRef> Id = tryLexIdentifierOrSkipLine(First, End);
|
|
assert(Id.hasValue() && "expected identifier token");
|
|
return Id.getValue();
|
|
}
|
|
|
|
bool Scanner::isNextIdentifierOrSkipLine(StringRef Id, const char *&First,
|
|
const char *const End) {
|
|
if (Optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End)) {
|
|
if (*FoundId == Id)
|
|
return true;
|
|
skipLine(First, End);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool Scanner::lexAt(const char *&First, const char *const End) {
|
|
// Handle "@import".
|
|
|
|
// Lex '@'.
|
|
const dependency_directives_scan::Token &AtTok = lexToken(First, End);
|
|
assert(AtTok.is(tok::at));
|
|
(void)AtTok;
|
|
|
|
if (!isNextIdentifierOrSkipLine("import", First, End))
|
|
return false;
|
|
return lexModuleDirectiveBody(decl_at_import, First, End);
|
|
}
|
|
|
|
bool Scanner::lexModule(const char *&First, const char *const End) {
|
|
StringRef Id = lexIdentifier(First, End);
|
|
bool Export = false;
|
|
if (Id == "export") {
|
|
Export = true;
|
|
Optional<StringRef> NextId = tryLexIdentifierOrSkipLine(First, End);
|
|
if (!NextId)
|
|
return false;
|
|
Id = *NextId;
|
|
}
|
|
|
|
if (Id != "module" && Id != "import") {
|
|
skipLine(First, End);
|
|
return false;
|
|
}
|
|
|
|
skipWhitespace(First, End);
|
|
|
|
// Ignore this as a module directive if the next character can't be part of
|
|
// an import.
|
|
|
|
switch (*First) {
|
|
case ':':
|
|
case '<':
|
|
case '"':
|
|
break;
|
|
default:
|
|
if (!isAsciiIdentifierContinue(*First)) {
|
|
skipLine(First, End);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
TheLexer.seek(getOffsetAt(First), /*IsAtStartOfLine*/ false);
|
|
|
|
DirectiveKind Kind;
|
|
if (Id == "module")
|
|
Kind = Export ? cxx_export_module_decl : cxx_module_decl;
|
|
else
|
|
Kind = Export ? cxx_export_import_decl : cxx_import_decl;
|
|
|
|
return lexModuleDirectiveBody(Kind, First, End);
|
|
}
|
|
|
|
bool Scanner::lexPragma(const char *&First, const char *const End) {
|
|
Optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End);
|
|
if (!FoundId)
|
|
return false;
|
|
|
|
StringRef Id = FoundId.getValue();
|
|
auto Kind = llvm::StringSwitch<DirectiveKind>(Id)
|
|
.Case("once", pp_pragma_once)
|
|
.Case("push_macro", pp_pragma_push_macro)
|
|
.Case("pop_macro", pp_pragma_pop_macro)
|
|
.Case("include_alias", pp_pragma_include_alias)
|
|
.Default(pp_none);
|
|
if (Kind != pp_none) {
|
|
lexPPDirectiveBody(First, End);
|
|
pushDirective(Kind);
|
|
return false;
|
|
}
|
|
|
|
if (Id != "clang") {
|
|
skipLine(First, End);
|
|
return false;
|
|
}
|
|
|
|
// #pragma clang.
|
|
if (!isNextIdentifierOrSkipLine("module", First, End))
|
|
return false;
|
|
|
|
// #pragma clang module.
|
|
if (!isNextIdentifierOrSkipLine("import", First, End))
|
|
return false;
|
|
|
|
// #pragma clang module import.
|
|
lexPPDirectiveBody(First, End);
|
|
pushDirective(pp_pragma_import);
|
|
return false;
|
|
}
|
|
|
|
bool Scanner::lexEndif(const char *&First, const char *const End) {
|
|
// Strip out "#else" if it's empty.
|
|
if (topDirective() == pp_else)
|
|
popDirective();
|
|
|
|
// If "#ifdef" is empty, strip it and skip the "#endif".
|
|
//
|
|
// FIXME: Once/if Clang starts disallowing __has_include in macro expansions,
|
|
// we can skip empty `#if` and `#elif` blocks as well after scanning for a
|
|
// literal __has_include in the condition. Even without that rule we could
|
|
// drop the tokens if we scan for identifiers in the condition and find none.
|
|
if (topDirective() == pp_ifdef || topDirective() == pp_ifndef) {
|
|
popDirective();
|
|
skipLine(First, End);
|
|
return false;
|
|
}
|
|
|
|
return lexDefault(pp_endif, First, End);
|
|
}
|
|
|
|
bool Scanner::lexDefault(DirectiveKind Kind, const char *&First,
|
|
const char *const End) {
|
|
lexPPDirectiveBody(First, End);
|
|
pushDirective(Kind);
|
|
return false;
|
|
}
|
|
|
|
static bool isStartOfRelevantLine(char First) {
|
|
switch (First) {
|
|
case '#':
|
|
case '@':
|
|
case 'i':
|
|
case 'e':
|
|
case 'm':
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool Scanner::lexPPLine(const char *&First, const char *const End) {
|
|
assert(First != End);
|
|
|
|
skipWhitespace(First, End);
|
|
assert(First <= End);
|
|
if (First == End)
|
|
return false;
|
|
|
|
if (!isStartOfRelevantLine(*First)) {
|
|
skipLine(First, End);
|
|
assert(First <= End);
|
|
return false;
|
|
}
|
|
|
|
TheLexer.seek(getOffsetAt(First), /*IsAtStartOfLine*/ true);
|
|
|
|
auto ScEx1 = make_scope_exit([&]() {
|
|
/// Clear Scanner's CurDirToks before returning, in case we didn't push a
|
|
/// new directive.
|
|
CurDirToks.clear();
|
|
});
|
|
|
|
// Handle "@import".
|
|
if (*First == '@')
|
|
return lexAt(First, End);
|
|
|
|
if (*First == 'i' || *First == 'e' || *First == 'm')
|
|
return lexModule(First, End);
|
|
|
|
// Handle preprocessing directives.
|
|
|
|
TheLexer.setParsingPreprocessorDirective(true);
|
|
auto ScEx2 = make_scope_exit(
|
|
[&]() { TheLexer.setParsingPreprocessorDirective(false); });
|
|
|
|
// Lex '#'.
|
|
const dependency_directives_scan::Token &HashTok = lexToken(First, End);
|
|
assert(HashTok.is(tok::hash));
|
|
(void)HashTok;
|
|
|
|
Optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End);
|
|
if (!FoundId)
|
|
return false;
|
|
|
|
StringRef Id = FoundId.getValue();
|
|
|
|
if (Id == "pragma")
|
|
return lexPragma(First, End);
|
|
|
|
auto Kind = llvm::StringSwitch<DirectiveKind>(Id)
|
|
.Case("include", pp_include)
|
|
.Case("__include_macros", pp___include_macros)
|
|
.Case("define", pp_define)
|
|
.Case("undef", pp_undef)
|
|
.Case("import", pp_import)
|
|
.Case("include_next", pp_include_next)
|
|
.Case("if", pp_if)
|
|
.Case("ifdef", pp_ifdef)
|
|
.Case("ifndef", pp_ifndef)
|
|
.Case("elif", pp_elif)
|
|
.Case("elifdef", pp_elifdef)
|
|
.Case("elifndef", pp_elifndef)
|
|
.Case("else", pp_else)
|
|
.Case("endif", pp_endif)
|
|
.Default(pp_none);
|
|
if (Kind == pp_none) {
|
|
skipDirective(Id, First, End);
|
|
return false;
|
|
}
|
|
|
|
if (Kind == pp_endif)
|
|
return lexEndif(First, End);
|
|
|
|
switch (Kind) {
|
|
case pp_include:
|
|
case pp___include_macros:
|
|
case pp_include_next:
|
|
case pp_import:
|
|
lexIncludeFilename(First, End);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
// Everything else.
|
|
return lexDefault(Kind, First, End);
|
|
}
|
|
|
|
static void skipUTF8ByteOrderMark(const char *&First, const char *const End) {
|
|
if ((End - First) >= 3 && First[0] == '\xef' && First[1] == '\xbb' &&
|
|
First[2] == '\xbf')
|
|
First += 3;
|
|
}
|
|
|
|
bool Scanner::scanImpl(const char *First, const char *const End) {
|
|
skipUTF8ByteOrderMark(First, End);
|
|
while (First != End)
|
|
if (lexPPLine(First, End))
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
bool Scanner::scan(SmallVectorImpl<Directive> &Directives) {
|
|
bool Error = scanImpl(Input.begin(), Input.end());
|
|
|
|
if (!Error) {
|
|
// Add an EOF on success.
|
|
pushDirective(pp_eof);
|
|
}
|
|
|
|
ArrayRef<dependency_directives_scan::Token> RemainingTokens = Tokens;
|
|
for (const DirectiveWithTokens &DirWithToks : DirsWithToks) {
|
|
assert(RemainingTokens.size() >= DirWithToks.NumTokens);
|
|
Directives.emplace_back(DirWithToks.Kind,
|
|
RemainingTokens.take_front(DirWithToks.NumTokens));
|
|
RemainingTokens = RemainingTokens.drop_front(DirWithToks.NumTokens);
|
|
}
|
|
assert(RemainingTokens.empty());
|
|
|
|
return Error;
|
|
}
|
|
|
|
bool clang::scanSourceForDependencyDirectives(
|
|
StringRef Input, SmallVectorImpl<dependency_directives_scan::Token> &Tokens,
|
|
SmallVectorImpl<Directive> &Directives, DiagnosticsEngine *Diags,
|
|
SourceLocation InputSourceLoc) {
|
|
return Scanner(Input, Tokens, Diags, InputSourceLoc).scan(Directives);
|
|
}
|
|
|
|
void clang::printDependencyDirectivesAsSource(
|
|
StringRef Source,
|
|
ArrayRef<dependency_directives_scan::Directive> Directives,
|
|
llvm::raw_ostream &OS) {
|
|
// Add a space separator where it is convenient for testing purposes.
|
|
auto needsSpaceSeparator =
|
|
[](tok::TokenKind Prev,
|
|
const dependency_directives_scan::Token &Tok) -> bool {
|
|
if (Prev == Tok.Kind)
|
|
return !Tok.isOneOf(tok::l_paren, tok::r_paren, tok::l_square,
|
|
tok::r_square);
|
|
if (Prev == tok::raw_identifier &&
|
|
Tok.isOneOf(tok::hash, tok::numeric_constant, tok::string_literal,
|
|
tok::char_constant, tok::header_name))
|
|
return true;
|
|
if (Prev == tok::r_paren &&
|
|
Tok.isOneOf(tok::raw_identifier, tok::hash, tok::string_literal,
|
|
tok::char_constant, tok::unknown))
|
|
return true;
|
|
if (Prev == tok::comma &&
|
|
Tok.isOneOf(tok::l_paren, tok::string_literal, tok::less))
|
|
return true;
|
|
return false;
|
|
};
|
|
|
|
for (const dependency_directives_scan::Directive &Directive : Directives) {
|
|
Optional<tok::TokenKind> PrevTokenKind;
|
|
for (const dependency_directives_scan::Token &Tok : Directive.Tokens) {
|
|
if (PrevTokenKind && needsSpaceSeparator(*PrevTokenKind, Tok))
|
|
OS << ' ';
|
|
PrevTokenKind = Tok.Kind;
|
|
OS << Source.slice(Tok.Offset, Tok.getEnd());
|
|
}
|
|
}
|
|
}
|