Files
clang-p2996/lld/ELF/Strings.cpp
Rui Ueyama a13efc2a73 Introduce StringRefZ class to represent null-terminated strings.
StringRefZ is a class to represent a null-terminated string. String
length is computed lazily, so it's more efficient than StringRef to
represent strings in string table.

The motivation of defining this new class is to merge functions
that only differ in string types; we have many constructors that takes
`const char *` or `StringRef`. With StringRefZ, we can merge them.

Differential Revision: https://reviews.llvm.org/D27037

llvm-svn: 288172
2016-11-29 18:05:04 +00:00

229 lines
6.1 KiB
C++

//===- Strings.cpp -------------------------------------------------------===//
//
// The LLVM Linker
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "Strings.h"
#include "Config.h"
#include "Error.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Config/config.h"
#include "llvm/Demangle/Demangle.h"
#include <algorithm>
#include <cstring>
using namespace llvm;
using namespace lld;
using namespace lld::elf;
// This is a scanner for the glob pattern.
// A glob pattern token is one of "*", "?", "[<chars>]", "[^<chars>]"
// (which is a negative form of "[<chars>]"), or a non-meta character.
// This function returns the first token in S.
BitVector GlobPattern::scan(StringRef &S) {
switch (S[0]) {
case '*':
S = S.substr(1);
// '*' is represented by an empty bitvector.
// All other bitvectors are 256-bit long.
return BitVector();
case '?':
S = S.substr(1);
return BitVector(256, true);
case '[': {
size_t End = S.find(']', 1);
if (End == StringRef::npos) {
error("invalid glob pattern: " + Original);
return BitVector(256, false);
}
StringRef Chars = S.substr(1, End - 1);
S = S.substr(End + 1);
if (Chars.startswith("^"))
return expand(Chars.substr(1)).flip();
return expand(Chars);
}
default:
BitVector BV(256, false);
BV[S[0]] = true;
S = S.substr(1);
return BV;
}
}
// Expands character ranges and returns a bitmap.
// For example, "a-cf-hz" is expanded to "abcfghz".
BitVector GlobPattern::expand(StringRef S) {
BitVector BV(256, false);
// Expand "x-y".
for (;;) {
if (S.size() < 3)
break;
// If it doesn't start with something like "x-y",
// consume the first character and proceed.
if (S[1] != '-') {
BV[S[0]] = true;
S = S.substr(1);
continue;
}
// It must be in the form of "x-y".
// Validate it and then interpret the range.
if (S[0] > S[2]) {
error("invalid glob pattern: " + Original);
return BV;
}
for (int C = S[0]; C <= S[2]; ++C)
BV[C] = true;
S = S.substr(3);
}
for (char C : S)
BV[C] = true;
return BV;
}
GlobPattern::GlobPattern(StringRef S) : Original(S) {
if (!hasWildcard(S)) {
// S doesn't contain any metacharacter,
// so the regular string comparison should work.
Exact = S;
} else if (S.endswith("*") && !hasWildcard(S.drop_back())) {
// S is something like "foo*". We can use startswith().
Prefix = S.drop_back();
} else if (S.startswith("*") && !hasWildcard(S.drop_front())) {
// S is something like "*foo". We can use endswith().
Suffix = S.drop_front();
} else {
// Otherwise, we need to do real glob pattern matching.
// Parse the pattern now.
while (!S.empty())
Tokens.push_back(scan(S));
}
}
bool GlobPattern::match(StringRef S) const {
if (Exact)
return S == *Exact;
if (Prefix)
return S.startswith(*Prefix);
if (Suffix)
return S.endswith(*Suffix);
return matchOne(Tokens, S);
}
// Runs glob pattern Pats against string S.
bool GlobPattern::matchOne(ArrayRef<BitVector> Pats, StringRef S) const {
for (;;) {
if (Pats.empty())
return S.empty();
// If Pats[0] is '*', try to match Pats[1..] against all possible
// substrings of S to see at least one pattern succeeds.
if (Pats[0].size() == 0) {
Pats = Pats.slice(1);
if (Pats.empty())
// Fast path. If a pattern is '*', it matches anything.
return true;
for (size_t I = 0, E = S.size(); I < E; ++I)
if (matchOne(Pats, S.substr(I)))
return true;
return false;
}
// If Pats[0] is not '*', it must consume one character.
if (S.empty() || !Pats[0][S[0]])
return false;
Pats = Pats.slice(1);
S = S.substr(1);
}
}
StringMatcher::StringMatcher(const std::vector<StringRef> &Pat) {
for (StringRef S : Pat)
Patterns.push_back(GlobPattern(S));
}
bool StringMatcher::match(StringRef S) const {
for (const GlobPattern &Pat : Patterns)
if (Pat.match(S))
return true;
return false;
}
// If an input string is in the form of "foo.N" where N is a number,
// return N. Otherwise, returns 65536, which is one greater than the
// lowest priority.
int elf::getPriority(StringRef S) {
size_t Pos = S.rfind('.');
if (Pos == StringRef::npos)
return 65536;
int V;
if (S.substr(Pos + 1).getAsInteger(10, V))
return 65536;
return V;
}
bool elf::hasWildcard(StringRef S) {
return S.find_first_of("?*[") != StringRef::npos;
}
StringRef elf::unquote(StringRef S) {
if (!S.startswith("\""))
return S;
return S.substr(1, S.size() - 2);
}
// Converts a hex string (e.g. "deadbeef") to a vector.
std::vector<uint8_t> elf::parseHex(StringRef S) {
std::vector<uint8_t> Hex;
while (!S.empty()) {
StringRef B = S.substr(0, 2);
S = S.substr(2);
uint8_t H;
if (B.getAsInteger(16, H)) {
error("not a hexadecimal value: " + B);
return {};
}
Hex.push_back(H);
}
return Hex;
}
static bool isAlpha(char C) {
return ('a' <= C && C <= 'z') || ('A' <= C && C <= 'Z') || C == '_';
}
static bool isAlnum(char C) { return isAlpha(C) || ('0' <= C && C <= '9'); }
// Returns true if S is valid as a C language identifier.
bool elf::isValidCIdentifier(StringRef S) {
return !S.empty() && isAlpha(S[0]) &&
std::all_of(S.begin() + 1, S.end(), isAlnum);
}
// Returns the demangled C++ symbol name for Name.
std::string elf::demangle(StringRef Name) {
// __cxa_demangle can be used to demangle strings other than symbol
// names which do not necessarily start with "_Z". Name can be
// either a C or C++ symbol. Don't call __cxa_demangle if the name
// does not look like a C++ symbol name to avoid getting unexpected
// result for a C symbol that happens to match a mangled type name.
if (!Name.startswith("_Z"))
return Name;
char *Buf = itaniumDemangle(Name.str().c_str(), nullptr, nullptr, nullptr);
if (!Buf)
return Name;
std::string S(Buf);
free(Buf);
return S;
}