#pragma once #include "AST/SourceCode.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringMap.h" namespace clice::testing { struct AnnotatedSource { std::string content; /// All named offsets llvm::StringMap offsets; llvm::StringMap ranges; std::vector nameless_offsets; /// Point Annotation: /// - $(key): Marks a single point. /// /// Range Annotation: /// - @key[...content...]: Marks a range. /// /// A range annotation for 'key' creates both a `ranges["key"]` and an `offsets["key"]` /// (pointing to the start). static AnnotatedSource from(llvm::StringRef content) { std::string source; source.reserve(content.size()); llvm::StringMap offsets; llvm::StringMap ranges; std::vector nameless_offsets; std::uint32_t offset = 0; std::uint32_t i = 0; // Helper lambda to parse a point annotation $(key). // It captures all necessary variables by reference. // Returns true if a point was successfully parsed, false otherwise. auto try_parse_point_annotation = [&]() -> bool { if(content[i] != '$') { return false; } // Peek ahead to see if it's "$(key)" or just "$" if(i + 1 < content.size() && content[i + 1] == '(') { // It's the full "$(key)" syntax uint32_t key_start = i + 2; size_t key_end = content.find(')', key_start); if(key_end == llvm::StringRef::npos) { return false; } // Malformed llvm::StringRef key = content.slice(key_start, key_end); /// empty key is regarded as a nameless, and `()` is not consumed. if(key.empty()) { // It's the shorthand "$" syntax for an nameless key nameless_offsets.emplace_back(offset); i += 1; // Advance cursor past the single '$' } else { offsets.try_emplace(key, offset); i = key_end + 1; // Advance cursor past the entire "$(key)" } return true; } else { // It's the shorthand "$" syntax for an nameless key nameless_offsets.emplace_back(offset); i += 1; // Advance cursor past the single '$' return true; } }; while(i < content.size()) { // Check for a point annotation first. if(try_parse_point_annotation()) { continue; } char c = content[i]; // Handle Range: @key[...] if(c == '@') { // Skip '@' i += 1; const char open_bracket = '['; const char close_bracket = ']'; llvm::StringRef key = content.substr(i).take_until( [&](char c) { return isspace(c) || c == open_bracket; }); i += key.size(); while(i < content.size() && isspace(content[i])) { i++; } assert(i < content.size() && content[i] == open_bracket && "Expect @key[...] for ranges."); i += 1; // Skip '[' uint32_t begin_offset = offset; int bracket_level = 1; while(i < content.size() && bracket_level > 0) { // Inside a range, we can still have nested point annotations. if(try_parse_point_annotation()) { continue; } char inner_c = content[i]; if(inner_c == open_bracket) bracket_level++; else if(inner_c == close_bracket) bracket_level--; if(bracket_level > 0) { source += inner_c; offset += 1; i += 1; } else { i += 1; // Skip the final ']' } } ranges.try_emplace(key, LocalSourceRange{begin_offset, offset}); continue; } // If nothing else matched, it's a regular character. source += c; offset += 1; i += 1; } return AnnotatedSource{ std::move(source), std::move(offsets), std::move(ranges), std::move(nameless_offsets), }; } }; struct AnnotatedSources { /// All sources file in the compilation. llvm::StringMap all_files; void add_source(llvm::StringRef file, llvm::StringRef content) { all_files.try_emplace(file, AnnotatedSource::from(content)); } /// Add sources to the params, use `#[filename]` to mark /// a new file start. For example /// /// ```cpp /// #[test.h] /// int foo(); /// /// #[main.cpp] /// #include "test.h" /// int x = foo(); /// ``` void add_sources(llvm::StringRef content) { std::string curr_file; std::string curr_content; /// Save previous file to params. auto save_previous_file = [&]() { if(curr_file.empty()) { return; } add_source(curr_file, curr_content); curr_file.clear(); curr_content.clear(); }; while(!content.empty()) { llvm::StringRef line = content.take_front(content.find_first_of("\r\n")); content = content.drop_front(line.size()); if(content.starts_with("\r\n")) { content = content.drop_front(2); } else if(content.starts_with("\n")) { content = content.drop_front(1); } if(line.starts_with("#[") && line.ends_with("]")) { save_previous_file(); curr_file = line.slice(2, line.size() - 1).str(); } else if(!curr_file.empty()) { curr_content += line; curr_content += '\n'; } } save_previous_file(); } }; } // namespace clice::testing