//===--- DirectiveTree.cpp - Find and strip preprocessor directives -------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "clang-pseudo/DirectiveTree.h" #include "clang/Basic/IdentifierTable.h" #include "clang/Basic/TokenKinds.h" #include "llvm/Support/FormatVariadic.h" #include #include namespace clang { namespace pseudo { namespace { class DirectiveParser { public: explicit DirectiveParser(const TokenStream &Code) : Code(Code), Tok(&Code.front()) {} void parse(DirectiveTree *Result) { parse(Result, /*TopLevel=*/true); } private: // Roles that a directive might take within a conditional block. enum class Cond { None, If, Else, End }; static Cond classifyDirective(tok::PPKeywordKind K) { switch (K) { case clang::tok::pp_if: case clang::tok::pp_ifdef: case clang::tok::pp_ifndef: return Cond::If; case clang::tok::pp_elif: case clang::tok::pp_elifdef: case clang::tok::pp_elifndef: case clang::tok::pp_else: return Cond::Else; case clang::tok::pp_endif: return Cond::End; default: return Cond::None; } } // Parses tokens starting at Tok into Tree. // If we reach an End or Else directive that ends Tree, returns it. // If TopLevel is true, then we do not expect End and always return // std::nullopt. std::optional parse(DirectiveTree *Tree, bool TopLevel) { auto StartsDirective = [&, AllowDirectiveAt((const Token *)nullptr)]() mutable { if (Tok->flag(LexFlags::StartsPPLine)) { // If we considered a comment at the start of a PP-line, it doesn't // start a directive but the directive can still start after it. if (Tok->Kind == tok::comment) AllowDirectiveAt = Tok + 1; return Tok->Kind == tok::hash; } return Tok->Kind == tok::hash && AllowDirectiveAt == Tok; }; // Each iteration adds one chunk (or returns, if we see #endif). while (Tok->Kind != tok::eof) { // If there's no directive here, we have a code chunk. if (!StartsDirective()) { const Token *Start = Tok; do ++Tok; while (Tok->Kind != tok::eof && !StartsDirective()); Tree->Chunks.push_back(DirectiveTree::Code{ Token::Range{Code.index(*Start), Code.index(*Tok)}}); continue; } // We have some kind of directive. DirectiveTree::Directive Directive; parseDirective(&Directive); Cond Kind = classifyDirective(Directive.Kind); if (Kind == Cond::If) { // #if or similar, starting a nested conditional block. DirectiveTree::Conditional Conditional; Conditional.Branches.emplace_back(); Conditional.Branches.back().first = std::move(Directive); parseConditional(&Conditional); Tree->Chunks.push_back(std::move(Conditional)); } else if ((Kind == Cond::Else || Kind == Cond::End) && !TopLevel) { // #endif or similar, ending this PStructure scope. // (#endif is unexpected at the top level, treat as simple directive). return std::move(Directive); } else { // #define or similar, a simple directive at the current scope. Tree->Chunks.push_back(std::move(Directive)); } } return std::nullopt; } // Parse the rest of a conditional section, after seeing the If directive. // Returns after consuming the End directive. void parseConditional(DirectiveTree::Conditional *C) { assert(C->Branches.size() == 1 && C->Branches.front().second.Chunks.empty() && "Should be ready to parse first branch body"); while (Tok->Kind != tok::eof) { auto Terminator = parse(&C->Branches.back().second, /*TopLevel=*/false); if (!Terminator) { assert(Tok->Kind == tok::eof && "gave up parsing before eof?"); C->End.Tokens = Token::Range::emptyAt(Code.index(*Tok)); return; } if (classifyDirective(Terminator->Kind) == Cond::End) { C->End = std::move(*Terminator); return; } assert(classifyDirective(Terminator->Kind) == Cond::Else && "ended branch unexpectedly"); C->Branches.emplace_back(); C->Branches.back().first = std::move(*Terminator); } } // Parse a directive. Tok is the hash. void parseDirective(DirectiveTree::Directive *D) { assert(Tok->Kind == tok::hash); // Directive spans from the hash until the end of line or file. const Token *Begin = Tok++; while (Tok->Kind != tok::eof && !Tok->flag(LexFlags::StartsPPLine)) ++Tok; ArrayRef Tokens{Begin, Tok}; D->Tokens = {Code.index(*Tokens.begin()), Code.index(*Tokens.end())}; // Directive name is the first non-comment token after the hash. Tokens = Tokens.drop_front().drop_while( [](const Token &T) { return T.Kind == tok::comment; }); if (!Tokens.empty()) D->Kind = PPKeywords.get(Tokens.front().text()).getPPKeywordID(); } const TokenStream &Code; const Token *Tok; clang::IdentifierTable PPKeywords; }; struct Dumper { llvm::raw_ostream &OS; unsigned Indent = 0; Dumper(llvm::raw_ostream& OS) : OS(OS) {} void operator()(const DirectiveTree& Tree) { for (const auto& Chunk : Tree.Chunks) std::visit(*this, Chunk); } void operator()(const DirectiveTree::Conditional &Conditional) { for (unsigned I = 0; I < Conditional.Branches.size(); ++I) { const auto &Branch = Conditional.Branches[I]; (*this)(Branch.first, Conditional.Taken == I); Indent += 2; (*this)(Branch.second); Indent -= 2; } (*this)(Conditional.End); } void operator()(const DirectiveTree::Directive &Directive, bool Taken = false) { OS.indent(Indent) << llvm::formatv( "#{0} ({1} tokens){2}\n", tok::getPPKeywordSpelling(Directive.Kind), Directive.Tokens.size(), Taken ? " TAKEN" : ""); } void operator()(const DirectiveTree::Code &Code) { OS.indent(Indent) << llvm::formatv("code ({0} tokens)\n", Code.Tokens.size()); } }; } // namespace DirectiveTree DirectiveTree::parse(const TokenStream &Code) { DirectiveTree Result; DirectiveParser(Code).parse(&Result); return Result; } // Define operator<< in terms of dump() functions above. #define OSTREAM_DUMP(Type) \ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Type &T) { \ Dumper{OS}(T); \ return OS; \ } OSTREAM_DUMP(DirectiveTree) OSTREAM_DUMP(DirectiveTree::Directive) OSTREAM_DUMP(DirectiveTree::Conditional) OSTREAM_DUMP(DirectiveTree::Code) #undef OSTREAM_DUMP namespace { // Makes choices about conditional branches. // // Generally it tries to maximize the amount of useful code we see. // // Caveat: each conditional is evaluated independently. Consider this code: // #ifdef WINDOWS // bool isWindows = true; // #endif // #ifndef WINDOWS // bool isWindows = false; // #endif // We take both branches and define isWindows twice. We could track more state // in order to produce a consistent view, but this is complex. class BranchChooser { public: BranchChooser(const TokenStream &Code) : Code(Code) {} // Describes code seen by making particular branch choices. Higher is better. struct Score { int Tokens = 0; // excluding comments and directives int Directives = 0; int Errors = 0; // #error directives bool operator>(const Score &Other) const { // Seeing errors is bad, other things are good. return std::make_tuple(-Errors, Tokens, Directives) > std::make_tuple(-Other.Errors, Other.Tokens, Other.Directives); } Score &operator+=(const Score &Other) { Tokens += Other.Tokens; Directives += Other.Directives; Errors += Other.Errors; return *this; } }; Score operator()(DirectiveTree::Code &C) { Score S; for (const Token &T : Code.tokens(C.Tokens)) if (T.Kind != tok::comment) ++S.Tokens; return S; } Score operator()(DirectiveTree::Directive &D) { Score S; S.Directives = 1; S.Errors = D.Kind == tok::pp_error; return S; } Score operator()(DirectiveTree::Conditional &C) { Score Best; bool MayTakeTrivial = true; bool TookTrivial = false; for (unsigned I = 0; I < C.Branches.size(); ++I) { // Walk the branch to make its nested choices in any case. Score BranchScore = walk(C.Branches[I].second); // If we already took an #if 1, don't consider any other branches. if (TookTrivial) continue; // Is this a trivial #if 0 or #if 1? if (auto TriviallyTaken = isTakenWhenReached(C.Branches[I].first)) { if (!*TriviallyTaken) continue; // Don't consider #if 0 even if it scores well. if (MayTakeTrivial) TookTrivial = true; } else { // After a nontrivial condition, #elif 1 isn't guaranteed taken. MayTakeTrivial = false; } // Is this the best branch so far? (Including if it's #if 1). if (TookTrivial || !C.Taken || BranchScore > Best) { Best = BranchScore; C.Taken = I; } } return Best; } Score walk(DirectiveTree &M) { Score S; for (auto &C : M.Chunks) S += std::visit(*this, C); return S; } private: // Return true if the directive starts an always-taken conditional branch, // false if the branch is never taken, and std::nullopt otherwise. std::optional isTakenWhenReached(const DirectiveTree::Directive &Dir) { switch (Dir.Kind) { case clang::tok::pp_if: case clang::tok::pp_elif: break; // handled below case clang::tok::pp_else: return true; default: // #ifdef etc return std::nullopt; } const auto &Tokens = Code.tokens(Dir.Tokens); assert(!Tokens.empty() && Tokens.front().Kind == tok::hash); const Token &Name = Tokens.front().nextNC(); const Token &Value = Name.nextNC(); // Does the condition consist of exactly one token? if (&Value >= Tokens.end() || &Value.nextNC() < Tokens.end()) return std::nullopt; return llvm::StringSwitch>(Value.text()) .Cases("true", "1", true) .Cases("false", "0", false) .Default(std::nullopt); } const TokenStream &Code; }; } // namespace void chooseConditionalBranches(DirectiveTree &Tree, const TokenStream &Code) { BranchChooser{Code}.walk(Tree); } namespace { class Preprocessor { const TokenStream &In; TokenStream &Out; public: Preprocessor(const TokenStream &In, TokenStream &Out) : In(In), Out(Out) {} ~Preprocessor() { Out.finalize(); } void walk(const DirectiveTree &T) { for (const auto &C : T.Chunks) std::visit(*this, C); } void operator()(const DirectiveTree::Code &C) { for (const auto &Tok : In.tokens(C.Tokens)) Out.push(Tok); } void operator()(const DirectiveTree::Directive &) {} void operator()(const DirectiveTree::Conditional &C) { if (C.Taken) walk(C.Branches[*C.Taken].second); } }; } // namespace TokenStream DirectiveTree::stripDirectives(const TokenStream &In) const { TokenStream Out; Preprocessor(In, Out).walk(*this); return Out; } } // namespace pseudo } // namespace clang