288 lines
9.9 KiB
C++
288 lines
9.9 KiB
C++
//===--- LexerUtils.cpp - clang-tidy---------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "LexerUtils.h"
|
|
#include "clang/AST/AST.h"
|
|
#include "clang/Basic/SourceManager.h"
|
|
#include <optional>
|
|
#include <utility>
|
|
|
|
namespace clang::tidy::utils::lexer {
|
|
|
|
std::pair<Token, SourceLocation>
|
|
getPreviousTokenAndStart(SourceLocation Location, const SourceManager &SM,
|
|
const LangOptions &LangOpts, bool SkipComments) {
|
|
Token Token;
|
|
Token.setKind(tok::unknown);
|
|
|
|
Location = Location.getLocWithOffset(-1);
|
|
if (Location.isInvalid())
|
|
return {Token, Location};
|
|
|
|
auto StartOfFile = SM.getLocForStartOfFile(SM.getFileID(Location));
|
|
while (Location != StartOfFile) {
|
|
Location = Lexer::GetBeginningOfToken(Location, SM, LangOpts);
|
|
if (!Lexer::getRawToken(Location, Token, SM, LangOpts) &&
|
|
(!SkipComments || !Token.is(tok::comment))) {
|
|
break;
|
|
}
|
|
Location = Location.getLocWithOffset(-1);
|
|
}
|
|
return {Token, Location};
|
|
}
|
|
|
|
Token getPreviousToken(SourceLocation Location, const SourceManager &SM,
|
|
const LangOptions &LangOpts, bool SkipComments) {
|
|
auto [Token, Start] =
|
|
getPreviousTokenAndStart(Location, SM, LangOpts, SkipComments);
|
|
return Token;
|
|
}
|
|
|
|
SourceLocation findPreviousTokenStart(SourceLocation Start,
|
|
const SourceManager &SM,
|
|
const LangOptions &LangOpts) {
|
|
if (Start.isInvalid() || Start.isMacroID())
|
|
return {};
|
|
|
|
SourceLocation BeforeStart = Start.getLocWithOffset(-1);
|
|
if (BeforeStart.isInvalid() || BeforeStart.isMacroID())
|
|
return {};
|
|
|
|
return Lexer::GetBeginningOfToken(BeforeStart, SM, LangOpts);
|
|
}
|
|
|
|
SourceLocation findPreviousTokenKind(SourceLocation Start,
|
|
const SourceManager &SM,
|
|
const LangOptions &LangOpts,
|
|
tok::TokenKind TK) {
|
|
if (Start.isInvalid() || Start.isMacroID())
|
|
return {};
|
|
|
|
while (true) {
|
|
SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts);
|
|
if (L.isInvalid() || L.isMacroID())
|
|
return {};
|
|
|
|
Token T;
|
|
if (Lexer::getRawToken(L, T, SM, LangOpts, /*IgnoreWhiteSpace=*/true))
|
|
return {};
|
|
|
|
if (T.is(TK))
|
|
return T.getLocation();
|
|
|
|
Start = L;
|
|
}
|
|
}
|
|
|
|
SourceLocation findNextTerminator(SourceLocation Start, const SourceManager &SM,
|
|
const LangOptions &LangOpts) {
|
|
return findNextAnyTokenKind(Start, SM, LangOpts, tok::comma, tok::semi);
|
|
}
|
|
|
|
std::optional<Token>
|
|
findNextTokenIncludingComments(SourceLocation Start, const SourceManager &SM,
|
|
const LangOptions &LangOpts) {
|
|
// `Lexer::findNextToken` will ignore comment
|
|
if (Start.isMacroID())
|
|
return std::nullopt;
|
|
Start = Lexer::getLocForEndOfToken(Start, 0, SM, LangOpts);
|
|
// Break down the source location.
|
|
std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Start);
|
|
bool InvalidTemp = false;
|
|
StringRef File = SM.getBufferData(LocInfo.first, &InvalidTemp);
|
|
if (InvalidTemp)
|
|
return std::nullopt;
|
|
// Lex from the start of the given location.
|
|
Lexer L(SM.getLocForStartOfFile(LocInfo.first), LangOpts, File.begin(),
|
|
File.data() + LocInfo.second, File.end());
|
|
L.SetCommentRetentionState(true);
|
|
// Find the token.
|
|
Token Tok;
|
|
L.LexFromRawLexer(Tok);
|
|
return Tok;
|
|
}
|
|
|
|
std::optional<Token>
|
|
findNextTokenSkippingComments(SourceLocation Start, const SourceManager &SM,
|
|
const LangOptions &LangOpts) {
|
|
while (Start.isValid()) {
|
|
std::optional<Token> CurrentToken =
|
|
Lexer::findNextToken(Start, SM, LangOpts);
|
|
if (!CurrentToken || !CurrentToken->is(tok::comment))
|
|
return CurrentToken;
|
|
|
|
Start = CurrentToken->getLocation();
|
|
}
|
|
|
|
return std::nullopt;
|
|
}
|
|
|
|
bool rangeContainsExpansionsOrDirectives(SourceRange Range,
|
|
const SourceManager &SM,
|
|
const LangOptions &LangOpts) {
|
|
assert(Range.isValid() && "Invalid Range for relexing provided");
|
|
SourceLocation Loc = Range.getBegin();
|
|
|
|
while (Loc <= Range.getEnd()) {
|
|
if (Loc.isMacroID())
|
|
return true;
|
|
|
|
std::optional<Token> Tok = Lexer::findNextToken(Loc, SM, LangOpts);
|
|
|
|
if (!Tok)
|
|
return true;
|
|
|
|
if (Tok->is(tok::hash))
|
|
return true;
|
|
|
|
Loc = Tok->getLocation();
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
std::optional<Token> getQualifyingToken(tok::TokenKind TK,
|
|
CharSourceRange Range,
|
|
const ASTContext &Context,
|
|
const SourceManager &SM) {
|
|
assert((TK == tok::kw_const || TK == tok::kw_volatile ||
|
|
TK == tok::kw_restrict) &&
|
|
"TK is not a qualifier keyword");
|
|
std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Range.getBegin());
|
|
StringRef File = SM.getBufferData(LocInfo.first);
|
|
Lexer RawLexer(SM.getLocForStartOfFile(LocInfo.first), Context.getLangOpts(),
|
|
File.begin(), File.data() + LocInfo.second, File.end());
|
|
std::optional<Token> LastMatchBeforeTemplate;
|
|
std::optional<Token> LastMatchAfterTemplate;
|
|
bool SawTemplate = false;
|
|
Token Tok;
|
|
while (!RawLexer.LexFromRawLexer(Tok) &&
|
|
Range.getEnd() != Tok.getLocation() &&
|
|
!SM.isBeforeInTranslationUnit(Range.getEnd(), Tok.getLocation())) {
|
|
if (Tok.is(tok::raw_identifier)) {
|
|
IdentifierInfo &Info = Context.Idents.get(
|
|
StringRef(SM.getCharacterData(Tok.getLocation()), Tok.getLength()));
|
|
Tok.setIdentifierInfo(&Info);
|
|
Tok.setKind(Info.getTokenID());
|
|
}
|
|
if (Tok.is(tok::less))
|
|
SawTemplate = true;
|
|
else if (Tok.isOneOf(tok::greater, tok::greatergreater))
|
|
LastMatchAfterTemplate = std::nullopt;
|
|
else if (Tok.is(TK)) {
|
|
if (SawTemplate)
|
|
LastMatchAfterTemplate = Tok;
|
|
else
|
|
LastMatchBeforeTemplate = Tok;
|
|
}
|
|
}
|
|
return LastMatchAfterTemplate != std::nullopt ? LastMatchAfterTemplate
|
|
: LastMatchBeforeTemplate;
|
|
}
|
|
|
|
static bool breakAndReturnEnd(const Stmt &S) {
|
|
return isa<CompoundStmt, DeclStmt, NullStmt>(S);
|
|
}
|
|
|
|
static bool breakAndReturnEndPlus1Token(const Stmt &S) {
|
|
return isa<Expr, DoStmt, ReturnStmt, BreakStmt, ContinueStmt, GotoStmt,
|
|
SEHLeaveStmt>(S);
|
|
}
|
|
|
|
// Given a Stmt which does not include it's semicolon this method returns the
|
|
// SourceLocation of the semicolon.
|
|
static SourceLocation getSemicolonAfterStmtEndLoc(const SourceLocation &EndLoc,
|
|
const SourceManager &SM,
|
|
const LangOptions &LangOpts) {
|
|
|
|
if (EndLoc.isMacroID()) {
|
|
// Assuming EndLoc points to a function call foo within macro F.
|
|
// This method is supposed to return location of the semicolon within
|
|
// those macro arguments:
|
|
// F ( foo() ; )
|
|
// ^ EndLoc ^ SpellingLoc ^ next token of SpellingLoc
|
|
const SourceLocation SpellingLoc = SM.getSpellingLoc(EndLoc);
|
|
std::optional<Token> NextTok =
|
|
findNextTokenSkippingComments(SpellingLoc, SM, LangOpts);
|
|
|
|
// Was the next token found successfully?
|
|
// All macro issues are simply resolved by ensuring it's a semicolon.
|
|
if (NextTok && NextTok->is(tok::TokenKind::semi)) {
|
|
// Ideally this would return `F` with spelling location `;` (NextTok)
|
|
// following the example above. For now simply return NextTok location.
|
|
return NextTok->getLocation();
|
|
}
|
|
|
|
// Fallthrough to 'normal handling'.
|
|
// F ( foo() ) ;
|
|
// ^ EndLoc ^ SpellingLoc ) ^ next token of EndLoc
|
|
}
|
|
|
|
std::optional<Token> NextTok =
|
|
findNextTokenSkippingComments(EndLoc, SM, LangOpts);
|
|
|
|
// Testing for semicolon again avoids some issues with macros.
|
|
if (NextTok && NextTok->is(tok::TokenKind::semi))
|
|
return NextTok->getLocation();
|
|
|
|
return {};
|
|
}
|
|
|
|
SourceLocation getUnifiedEndLoc(const Stmt &S, const SourceManager &SM,
|
|
const LangOptions &LangOpts) {
|
|
|
|
const Stmt *LastChild = &S;
|
|
while (!LastChild->children().empty() && !breakAndReturnEnd(*LastChild) &&
|
|
!breakAndReturnEndPlus1Token(*LastChild)) {
|
|
for (const Stmt *Child : LastChild->children())
|
|
LastChild = Child;
|
|
}
|
|
|
|
if (!breakAndReturnEnd(*LastChild) && breakAndReturnEndPlus1Token(*LastChild))
|
|
return getSemicolonAfterStmtEndLoc(S.getEndLoc(), SM, LangOpts);
|
|
|
|
return S.getEndLoc();
|
|
}
|
|
|
|
SourceLocation getLocationForNoexceptSpecifier(const FunctionDecl *FuncDecl,
|
|
const SourceManager &SM) {
|
|
if (!FuncDecl)
|
|
return {};
|
|
|
|
const LangOptions &LangOpts = FuncDecl->getLangOpts();
|
|
|
|
if (FuncDecl->getNumParams() == 0) {
|
|
// Start at the beginning of the function declaration, and find the closing
|
|
// parenthesis after which we would place the noexcept specifier.
|
|
Token CurrentToken;
|
|
SourceLocation CurrentLocation = FuncDecl->getBeginLoc();
|
|
while (!Lexer::getRawToken(CurrentLocation, CurrentToken, SM, LangOpts,
|
|
true)) {
|
|
if (CurrentToken.is(tok::r_paren))
|
|
return CurrentLocation.getLocWithOffset(1);
|
|
|
|
CurrentLocation = CurrentToken.getEndLoc();
|
|
}
|
|
|
|
// Failed to find the closing parenthesis, so just return an invalid
|
|
// SourceLocation.
|
|
return {};
|
|
}
|
|
|
|
// FunctionDecl with parameters
|
|
const SourceLocation NoexceptLoc =
|
|
FuncDecl->getParamDecl(FuncDecl->getNumParams() - 1)->getEndLoc();
|
|
if (NoexceptLoc.isValid())
|
|
return Lexer::findLocationAfterToken(
|
|
NoexceptLoc, tok::r_paren, SM, LangOpts,
|
|
/*SkipTrailingWhitespaceAndNewLine=*/true);
|
|
|
|
return {};
|
|
}
|
|
|
|
} // namespace clang::tidy::utils::lexer
|