//===--- LexerUtils.cpp - clang-tidy---------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "LexerUtils.h" #include "clang/AST/AST.h" #include "clang/Basic/SourceManager.h" #include #include namespace clang::tidy::utils::lexer { std::pair getPreviousTokenAndStart(SourceLocation Location, const SourceManager &SM, const LangOptions &LangOpts, bool SkipComments) { Token Token; Token.setKind(tok::unknown); Location = Location.getLocWithOffset(-1); if (Location.isInvalid()) return {Token, Location}; auto StartOfFile = SM.getLocForStartOfFile(SM.getFileID(Location)); while (Location != StartOfFile) { Location = Lexer::GetBeginningOfToken(Location, SM, LangOpts); if (!Lexer::getRawToken(Location, Token, SM, LangOpts) && (!SkipComments || !Token.is(tok::comment))) { break; } Location = Location.getLocWithOffset(-1); } return {Token, Location}; } Token getPreviousToken(SourceLocation Location, const SourceManager &SM, const LangOptions &LangOpts, bool SkipComments) { auto [Token, Start] = getPreviousTokenAndStart(Location, SM, LangOpts, SkipComments); return Token; } SourceLocation findPreviousTokenStart(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts) { if (Start.isInvalid() || Start.isMacroID()) return {}; SourceLocation BeforeStart = Start.getLocWithOffset(-1); if (BeforeStart.isInvalid() || BeforeStart.isMacroID()) return {}; return Lexer::GetBeginningOfToken(BeforeStart, SM, LangOpts); } SourceLocation findPreviousTokenKind(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts, tok::TokenKind TK) { if (Start.isInvalid() || Start.isMacroID()) return {}; while (true) { SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts); if (L.isInvalid() || L.isMacroID()) return {}; Token T; if (Lexer::getRawToken(L, T, SM, LangOpts, /*IgnoreWhiteSpace=*/true)) return {}; if (T.is(TK)) return T.getLocation(); Start = L; } } SourceLocation findNextTerminator(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts) { return findNextAnyTokenKind(Start, SM, LangOpts, tok::comma, tok::semi); } std::optional findNextTokenIncludingComments(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts) { // `Lexer::findNextToken` will ignore comment if (Start.isMacroID()) return std::nullopt; Start = Lexer::getLocForEndOfToken(Start, 0, SM, LangOpts); // Break down the source location. std::pair LocInfo = SM.getDecomposedLoc(Start); bool InvalidTemp = false; StringRef File = SM.getBufferData(LocInfo.first, &InvalidTemp); if (InvalidTemp) return std::nullopt; // Lex from the start of the given location. Lexer L(SM.getLocForStartOfFile(LocInfo.first), LangOpts, File.begin(), File.data() + LocInfo.second, File.end()); L.SetCommentRetentionState(true); // Find the token. Token Tok; L.LexFromRawLexer(Tok); return Tok; } std::optional findNextTokenSkippingComments(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts) { while (Start.isValid()) { std::optional CurrentToken = Lexer::findNextToken(Start, SM, LangOpts); if (!CurrentToken || !CurrentToken->is(tok::comment)) return CurrentToken; Start = CurrentToken->getLocation(); } return std::nullopt; } bool rangeContainsExpansionsOrDirectives(SourceRange Range, const SourceManager &SM, const LangOptions &LangOpts) { assert(Range.isValid() && "Invalid Range for relexing provided"); SourceLocation Loc = Range.getBegin(); while (Loc <= Range.getEnd()) { if (Loc.isMacroID()) return true; std::optional Tok = Lexer::findNextToken(Loc, SM, LangOpts); if (!Tok) return true; if (Tok->is(tok::hash)) return true; Loc = Tok->getLocation(); } return false; } std::optional getQualifyingToken(tok::TokenKind TK, CharSourceRange Range, const ASTContext &Context, const SourceManager &SM) { assert((TK == tok::kw_const || TK == tok::kw_volatile || TK == tok::kw_restrict) && "TK is not a qualifier keyword"); std::pair LocInfo = SM.getDecomposedLoc(Range.getBegin()); StringRef File = SM.getBufferData(LocInfo.first); Lexer RawLexer(SM.getLocForStartOfFile(LocInfo.first), Context.getLangOpts(), File.begin(), File.data() + LocInfo.second, File.end()); std::optional LastMatchBeforeTemplate; std::optional LastMatchAfterTemplate; bool SawTemplate = false; Token Tok; while (!RawLexer.LexFromRawLexer(Tok) && Range.getEnd() != Tok.getLocation() && !SM.isBeforeInTranslationUnit(Range.getEnd(), Tok.getLocation())) { if (Tok.is(tok::raw_identifier)) { IdentifierInfo &Info = Context.Idents.get( StringRef(SM.getCharacterData(Tok.getLocation()), Tok.getLength())); Tok.setIdentifierInfo(&Info); Tok.setKind(Info.getTokenID()); } if (Tok.is(tok::less)) SawTemplate = true; else if (Tok.isOneOf(tok::greater, tok::greatergreater)) LastMatchAfterTemplate = std::nullopt; else if (Tok.is(TK)) { if (SawTemplate) LastMatchAfterTemplate = Tok; else LastMatchBeforeTemplate = Tok; } } return LastMatchAfterTemplate != std::nullopt ? LastMatchAfterTemplate : LastMatchBeforeTemplate; } static bool breakAndReturnEnd(const Stmt &S) { return isa(S); } static bool breakAndReturnEndPlus1Token(const Stmt &S) { return isa(S); } // Given a Stmt which does not include it's semicolon this method returns the // SourceLocation of the semicolon. static SourceLocation getSemicolonAfterStmtEndLoc(const SourceLocation &EndLoc, const SourceManager &SM, const LangOptions &LangOpts) { if (EndLoc.isMacroID()) { // Assuming EndLoc points to a function call foo within macro F. // This method is supposed to return location of the semicolon within // those macro arguments: // F ( foo() ; ) // ^ EndLoc ^ SpellingLoc ^ next token of SpellingLoc const SourceLocation SpellingLoc = SM.getSpellingLoc(EndLoc); std::optional NextTok = findNextTokenSkippingComments(SpellingLoc, SM, LangOpts); // Was the next token found successfully? // All macro issues are simply resolved by ensuring it's a semicolon. if (NextTok && NextTok->is(tok::TokenKind::semi)) { // Ideally this would return `F` with spelling location `;` (NextTok) // following the example above. For now simply return NextTok location. return NextTok->getLocation(); } // Fallthrough to 'normal handling'. // F ( foo() ) ; // ^ EndLoc ^ SpellingLoc ) ^ next token of EndLoc } std::optional NextTok = findNextTokenSkippingComments(EndLoc, SM, LangOpts); // Testing for semicolon again avoids some issues with macros. if (NextTok && NextTok->is(tok::TokenKind::semi)) return NextTok->getLocation(); return {}; } SourceLocation getUnifiedEndLoc(const Stmt &S, const SourceManager &SM, const LangOptions &LangOpts) { const Stmt *LastChild = &S; while (!LastChild->children().empty() && !breakAndReturnEnd(*LastChild) && !breakAndReturnEndPlus1Token(*LastChild)) { for (const Stmt *Child : LastChild->children()) LastChild = Child; } if (!breakAndReturnEnd(*LastChild) && breakAndReturnEndPlus1Token(*LastChild)) return getSemicolonAfterStmtEndLoc(S.getEndLoc(), SM, LangOpts); return S.getEndLoc(); } SourceLocation getLocationForNoexceptSpecifier(const FunctionDecl *FuncDecl, const SourceManager &SM) { if (!FuncDecl) return {}; const LangOptions &LangOpts = FuncDecl->getLangOpts(); if (FuncDecl->getNumParams() == 0) { // Start at the beginning of the function declaration, and find the closing // parenthesis after which we would place the noexcept specifier. Token CurrentToken; SourceLocation CurrentLocation = FuncDecl->getBeginLoc(); while (!Lexer::getRawToken(CurrentLocation, CurrentToken, SM, LangOpts, true)) { if (CurrentToken.is(tok::r_paren)) return CurrentLocation.getLocWithOffset(1); CurrentLocation = CurrentToken.getEndLoc(); } // Failed to find the closing parenthesis, so just return an invalid // SourceLocation. return {}; } // FunctionDecl with parameters const SourceLocation NoexceptLoc = FuncDecl->getParamDecl(FuncDecl->getNumParams() - 1)->getEndLoc(); if (NoexceptLoc.isValid()) return Lexer::findLocationAfterToken( NoexceptLoc, tok::r_paren, SM, LangOpts, /*SkipTrailingWhitespaceAndNewLine=*/true); return {}; } } // namespace clang::tidy::utils::lexer