//===--- SymbolCollector.cpp -------------------------------------*- C++-*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "SymbolCollector.h" #include "AST.h" #include "CodeComplete.h" #include "CodeCompletionStrings.h" #include "ExpectedTypes.h" #include "SourceCode.h" #include "URI.h" #include "clang-include-cleaner/Analysis.h" #include "clang-include-cleaner/IncludeSpeller.h" #include "clang-include-cleaner/Record.h" #include "clang-include-cleaner/Types.h" #include "index/CanonicalIncludes.h" #include "index/Relation.h" #include "index/Symbol.h" #include "index/SymbolID.h" #include "index/SymbolLocation.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclBase.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/DeclarationName.h" #include "clang/AST/Expr.h" #include "clang/Basic/FileEntry.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" #include "clang/Index/IndexSymbol.h" #include "clang/Lex/Preprocessor.h" #include "clang/Lex/Token.h" #include "clang/Tooling/Inclusions/HeaderAnalysis.h" #include "clang/Tooling/Inclusions/StandardLibrary.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" #include #include #include #include #include namespace clang { namespace clangd { namespace { /// If \p ND is a template specialization, returns the described template. /// Otherwise, returns \p ND. const NamedDecl &getTemplateOrThis(const NamedDecl &ND) { if (auto *T = ND.getDescribedTemplate()) return *T; return ND; } // Checks whether the decl is a private symbol in a header generated by // protobuf compiler. // FIXME: make filtering extensible when there are more use cases for symbol // filters. bool isPrivateProtoDecl(const NamedDecl &ND) { const auto &SM = ND.getASTContext().getSourceManager(); if (!isProtoFile(nameLocation(ND, SM), SM)) return false; // ND without identifier can be operators. if (ND.getIdentifier() == nullptr) return false; auto Name = ND.getIdentifier()->getName(); if (!Name.contains('_')) return false; // Nested proto entities (e.g. Message::Nested) have top-level decls // that shouldn't be used (Message_Nested). Ignore them completely. // The nested entities are dangling type aliases, we may want to reconsider // including them in the future. // For enum constants, SOME_ENUM_CONSTANT is not private and should be // indexed. Outer_INNER is private. This heuristic relies on naming style, it // will include OUTER_INNER and exclude some_enum_constant. // FIXME: the heuristic relies on naming style (i.e. no underscore in // user-defined names) and can be improved. return (ND.getKind() != Decl::EnumConstant) || llvm::any_of(Name, islower); } // We only collect #include paths for symbols that are suitable for global code // completion, except for namespaces since #include path for a namespace is hard // to define. Symbol::IncludeDirective shouldCollectIncludePath(index::SymbolKind Kind) { using SK = index::SymbolKind; switch (Kind) { case SK::Macro: case SK::Enum: case SK::Struct: case SK::Class: case SK::Union: case SK::TypeAlias: case SK::Using: case SK::Function: case SK::Variable: case SK::EnumConstant: case SK::Concept: return Symbol::Include | Symbol::Import; case SK::Protocol: return Symbol::Import; default: return Symbol::Invalid; } } // Return the symbol range of the token at \p TokLoc. std::pair getTokenRange(SourceLocation TokLoc, const SourceManager &SM, const LangOptions &LangOpts) { auto CreatePosition = [&SM](SourceLocation Loc) { auto LSPLoc = sourceLocToPosition(SM, Loc); SymbolLocation::Position Pos; Pos.setLine(LSPLoc.line); Pos.setColumn(LSPLoc.character); return Pos; }; auto TokenLength = clang::Lexer::MeasureTokenLength(TokLoc, SM, LangOpts); return {CreatePosition(TokLoc), CreatePosition(TokLoc.getLocWithOffset(TokenLength))}; } // Checks whether \p ND is a good candidate to be the *canonical* declaration of // its symbol (e.g. a go-to-declaration target). This overrides the default of // using Clang's canonical declaration, which is the first in the TU. // // Example: preferring a class declaration over its forward declaration. bool isPreferredDeclaration(const NamedDecl &ND, index::SymbolRoleSet Roles) { const auto &SM = ND.getASTContext().getSourceManager(); if (isa(ND)) return (Roles & static_cast(index::SymbolRole::Definition)) && !isInsideMainFile(ND.getLocation(), SM); if (const auto *ID = dyn_cast(&ND)) return ID->isThisDeclarationADefinition(); if (const auto *PD = dyn_cast(&ND)) return PD->isThisDeclarationADefinition(); return false; } RefKind toRefKind(index::SymbolRoleSet Roles, bool Spelled = false) { RefKind Result = RefKind::Unknown; if (Roles & static_cast(index::SymbolRole::Declaration)) Result |= RefKind::Declaration; if (Roles & static_cast(index::SymbolRole::Definition)) Result |= RefKind::Definition; if (Roles & static_cast(index::SymbolRole::Reference)) Result |= RefKind::Reference; if (Spelled) Result |= RefKind::Spelled; return Result; } std::optional indexableRelation(const index::SymbolRelation &R) { if (R.Roles & static_cast(index::SymbolRole::RelationBaseOf)) return RelationKind::BaseOf; if (R.Roles & static_cast(index::SymbolRole::RelationOverrideOf)) return RelationKind::OverriddenBy; return std::nullopt; } // Check if there is an exact spelling of \p ND at \p Loc. bool isSpelled(SourceLocation Loc, const NamedDecl &ND) { auto Name = ND.getDeclName(); const auto NameKind = Name.getNameKind(); if (NameKind != DeclarationName::Identifier && NameKind != DeclarationName::CXXConstructorName) return false; const auto &AST = ND.getASTContext(); const auto &SM = AST.getSourceManager(); const auto &LO = AST.getLangOpts(); clang::Token Tok; if (clang::Lexer::getRawToken(Loc, Tok, SM, LO)) return false; auto StrName = Name.getAsString(); return clang::Lexer::getSpelling(Tok, SM, LO) == StrName; } } // namespace // Encapsulates decisions about how to record header paths in the index, // including filename normalization, URI conversion etc. // Expensive checks are cached internally. class SymbolCollector::HeaderFileURICache { struct FrameworkUmbrellaSpelling { // Spelling for the public umbrella header, e.g. std::optional PublicHeader; // Spelling for the private umbrella header, e.g. // std::optional PrivateHeader; }; // Weird double-indirect access to PP, which might not be ready yet when // HeaderFiles is created but will be by the time it's used. // (IndexDataConsumer::setPreprocessor can happen before or after initialize) Preprocessor *&PP; const SourceManager &SM; const include_cleaner::PragmaIncludes *PI; llvm::StringRef FallbackDir; llvm::DenseMap CacheFEToURI; llvm::StringMap CachePathToURI; llvm::DenseMap CacheFIDToInclude; llvm::StringMap CachePathToFrameworkSpelling; llvm::StringMap CacheFrameworkToUmbrellaHeaderSpelling; public: HeaderFileURICache(Preprocessor *&PP, const SourceManager &SM, const SymbolCollector::Options &Opts) : PP(PP), SM(SM), PI(Opts.PragmaIncludes), FallbackDir(Opts.FallbackDir) { } // Returns a canonical URI for the file \p FE. // We attempt to make the path absolute first. const std::string &toURI(const FileEntryRef FE) { auto R = CacheFEToURI.try_emplace(FE); if (R.second) { auto CanonPath = getCanonicalPath(FE, SM.getFileManager()); R.first->second = &toURIInternal(CanonPath ? *CanonPath : FE.getName()); } return *R.first->second; } // Returns a canonical URI for \p Path. // If the file is in the FileManager, use that to canonicalize the path. // We attempt to make the path absolute in any case. const std::string &toURI(llvm::StringRef Path) { if (auto File = SM.getFileManager().getFileRef(Path)) return toURI(*File); return toURIInternal(Path); } // Gets a canonical include (URI of the header or
or "header") for // header of \p FID (which should usually be the *expansion* file). // This does not account for any per-symbol overrides! // Returns "" if includes should not be inserted for this file. llvm::StringRef getIncludeHeader(FileID FID) { auto R = CacheFIDToInclude.try_emplace(FID); if (R.second) R.first->second = getIncludeHeaderUncached(FID); return R.first->second; } // If a file is mapped by canonical headers, use that mapping, regardless // of whether it's an otherwise-good header (header guards etc). llvm::StringRef mapCanonical(llvm::StringRef HeaderPath) { if (!PP) return ""; // Populate the system header mapping as late as possible to // ensure the preprocessor has been set already. CanonicalIncludes SysHeaderMapping; SysHeaderMapping.addSystemHeadersMapping(PP->getLangOpts()); auto Canonical = SysHeaderMapping.mapHeader(HeaderPath); if (Canonical.empty()) return ""; // If we had a mapping, always use it. assert(Canonical.starts_with("<") || Canonical.starts_with("\"")); return Canonical; } private: // This takes care of making paths absolute and path->URI caching, but no // FileManager-based canonicalization. const std::string &toURIInternal(llvm::StringRef Path) { auto R = CachePathToURI.try_emplace(Path); if (R.second) { llvm::SmallString<256> AbsPath = Path; if (!llvm::sys::path::is_absolute(AbsPath) && !FallbackDir.empty()) llvm::sys::fs::make_absolute(FallbackDir, AbsPath); assert(llvm::sys::path::is_absolute(AbsPath) && "If the VFS can't make paths absolute, a FallbackDir must be " "provided"); llvm::sys::path::remove_dots(AbsPath, /*remove_dot_dot=*/true); R.first->second = URI::create(AbsPath).toString(); } return R.first->second; } struct FrameworkHeaderPath { // Path to the framework directory containing the Headers/PrivateHeaders // directories e.g. /Frameworks/Foundation.framework/ llvm::StringRef HeadersParentDir; // Subpath relative to the Headers or PrivateHeaders dir, e.g. NSObject.h // Note: This is NOT relative to the `HeadersParentDir`. llvm::StringRef HeaderSubpath; // Whether this header is under the PrivateHeaders dir bool IsPrivateHeader; }; std::optional splitFrameworkHeaderPath(llvm::StringRef Path) { using namespace llvm::sys; path::reverse_iterator I = path::rbegin(Path); path::reverse_iterator Prev = I; path::reverse_iterator E = path::rend(Path); while (I != E) { if (*I == "Headers") { FrameworkHeaderPath HeaderPath; HeaderPath.HeadersParentDir = Path.substr(0, I - E); HeaderPath.HeaderSubpath = Path.substr(Prev - E); HeaderPath.IsPrivateHeader = false; return HeaderPath; } if (*I == "PrivateHeaders") { FrameworkHeaderPath HeaderPath; HeaderPath.HeadersParentDir = Path.substr(0, I - E); HeaderPath.HeaderSubpath = Path.substr(Prev - E); HeaderPath.IsPrivateHeader = true; return HeaderPath; } Prev = I; ++I; } // Unexpected, must not be a framework header. return std::nullopt; } // Frameworks typically have an umbrella header of the same name, e.g. // instead of or // instead of // which should be used instead of directly // importing the header. std::optional getFrameworkUmbrellaSpelling(llvm::StringRef Framework, const HeaderSearch &HS, FrameworkHeaderPath &HeaderPath) { auto Res = CacheFrameworkToUmbrellaHeaderSpelling.try_emplace(Framework); auto *CachedSpelling = &Res.first->second; if (!Res.second) { return HeaderPath.IsPrivateHeader ? CachedSpelling->PrivateHeader : CachedSpelling->PublicHeader; } SmallString<256> UmbrellaPath(HeaderPath.HeadersParentDir); llvm::sys::path::append(UmbrellaPath, "Headers", Framework + ".h"); llvm::vfs::Status Status; auto StatErr = HS.getFileMgr().getNoncachedStatValue(UmbrellaPath, Status); if (!StatErr) CachedSpelling->PublicHeader = llvm::formatv("<{0}/{0}.h>", Framework); UmbrellaPath = HeaderPath.HeadersParentDir; llvm::sys::path::append(UmbrellaPath, "PrivateHeaders", Framework + "_Private.h"); StatErr = HS.getFileMgr().getNoncachedStatValue(UmbrellaPath, Status); if (!StatErr) CachedSpelling->PrivateHeader = llvm::formatv("<{0}/{0}_Private.h>", Framework); return HeaderPath.IsPrivateHeader ? CachedSpelling->PrivateHeader : CachedSpelling->PublicHeader; } // Compute the framework include spelling for `FE` which is in a framework // named `Framework`, e.g. `NSObject.h` in framework `Foundation` would // give if the umbrella header exists, otherwise // . std::optional getFrameworkHeaderIncludeSpelling(FileEntryRef FE, llvm::StringRef Framework, HeaderSearch &HS) { auto Res = CachePathToFrameworkSpelling.try_emplace(FE.getName()); auto *CachedHeaderSpelling = &Res.first->second; if (!Res.second) return llvm::StringRef(*CachedHeaderSpelling); auto HeaderPath = splitFrameworkHeaderPath(FE.getName()); if (!HeaderPath) { // Unexpected: must not be a proper framework header, don't cache the // failure. CachePathToFrameworkSpelling.erase(Res.first); return std::nullopt; } if (auto UmbrellaSpelling = getFrameworkUmbrellaSpelling(Framework, HS, *HeaderPath)) { *CachedHeaderSpelling = *UmbrellaSpelling; return llvm::StringRef(*CachedHeaderSpelling); } *CachedHeaderSpelling = llvm::formatv("<{0}/{1}>", Framework, HeaderPath->HeaderSubpath).str(); return llvm::StringRef(*CachedHeaderSpelling); } llvm::StringRef getIncludeHeaderUncached(FileID FID) { const auto FE = SM.getFileEntryRefForID(FID); if (!FE || FE->getName().empty()) return ""; if (auto Verbatim = PI->getPublic(*FE); !Verbatim.empty()) return Verbatim; llvm::StringRef Filename = FE->getName(); if (auto Canonical = mapCanonical(Filename); !Canonical.empty()) return Canonical; // Framework headers are spelled as , not // "path/FrameworkName.framework/Headers/Foo.h". auto &HS = PP->getHeaderSearchInfo(); if (const auto *HFI = HS.getExistingFileInfo(*FE, /*WantExternal*/ false)) if (!HFI->Framework.empty()) if (auto Spelling = getFrameworkHeaderIncludeSpelling(*FE, HFI->Framework, HS)) return *Spelling; if (!tooling::isSelfContainedHeader(*FE, PP->getSourceManager(), PP->getHeaderSearchInfo())) { // A .inc or .def file is often included into a real header to define // symbols (e.g. LLVM tablegen files). if (Filename.ends_with(".inc") || Filename.ends_with(".def")) // Don't use cache reentrantly due to iterator invalidation. return getIncludeHeaderUncached(SM.getFileID(SM.getIncludeLoc(FID))); // Conservatively refuse to insert #includes to files without guards. return ""; } // Standard case: just insert the file itself. return toURI(*FE); } }; // Return the symbol location of the token at \p TokLoc. std::optional SymbolCollector::getTokenLocation(SourceLocation TokLoc) { const auto &SM = ASTCtx->getSourceManager(); const auto FE = SM.getFileEntryRefForID(SM.getFileID(TokLoc)); if (!FE) return std::nullopt; SymbolLocation Result; Result.FileURI = HeaderFileURIs->toURI(*FE).c_str(); auto Range = getTokenRange(TokLoc, SM, ASTCtx->getLangOpts()); Result.Start = Range.first; Result.End = Range.second; return Result; } SymbolCollector::SymbolCollector(Options Opts) : Opts(std::move(Opts)) {} SymbolCollector::~SymbolCollector() = default; void SymbolCollector::initialize(ASTContext &Ctx) { ASTCtx = &Ctx; HeaderFileURIs = std::make_unique( this->PP, ASTCtx->getSourceManager(), Opts); CompletionAllocator = std::make_shared(); CompletionTUInfo = std::make_unique(CompletionAllocator); } bool SymbolCollector::shouldCollectSymbol(const NamedDecl &ND, const ASTContext &ASTCtx, const Options &Opts, bool IsMainFileOnly) { // Skip anonymous declarations, e.g (anonymous enum/class/struct). if (ND.getDeclName().isEmpty()) return false; // Skip main-file symbols if we are not collecting them. if (IsMainFileOnly && !Opts.CollectMainFileSymbols) return false; // Skip symbols in anonymous namespaces in header files. if (!IsMainFileOnly && ND.isInAnonymousNamespace()) return false; // For function local symbols, index only classes and its member functions. if (index::isFunctionLocalSymbol(&ND)) return isa(ND) || (ND.isCXXInstanceMember() && ND.isFunctionOrFunctionTemplate()); // We want most things but not "local" symbols such as symbols inside // FunctionDecl, BlockDecl, ObjCMethodDecl and OMPDeclareReductionDecl. // FIXME: Need a matcher for ExportDecl in order to include symbols declared // within an export. const auto *DeclCtx = ND.getDeclContext(); switch (DeclCtx->getDeclKind()) { case Decl::TranslationUnit: case Decl::Namespace: case Decl::LinkageSpec: case Decl::Enum: case Decl::ObjCProtocol: case Decl::ObjCInterface: case Decl::ObjCCategory: case Decl::ObjCCategoryImpl: case Decl::ObjCImplementation: break; default: // Record has a few derivations (e.g. CXXRecord, Class specialization), it's // easier to cast. if (!isa(DeclCtx)) return false; } // Avoid indexing internal symbols in protobuf generated headers. if (isPrivateProtoDecl(ND)) return false; if (!Opts.CollectReserved && (hasReservedName(ND) || hasReservedScope(*ND.getDeclContext())) && ASTCtx.getSourceManager().isInSystemHeader(ND.getLocation())) return false; return true; } const Decl * SymbolCollector::getRefContainer(const Decl *Enclosing, const SymbolCollector::Options &Opts) { while (Enclosing) { const auto *ND = dyn_cast(Enclosing); if (ND && shouldCollectSymbol(*ND, ND->getASTContext(), Opts, true)) { break; } Enclosing = dyn_cast_or_null(Enclosing->getDeclContext()); } return Enclosing; } // Always return true to continue indexing. bool SymbolCollector::handleDeclOccurrence( const Decl *D, index::SymbolRoleSet Roles, llvm::ArrayRef Relations, SourceLocation Loc, index::IndexDataConsumer::ASTNodeInfo ASTNode) { assert(ASTCtx && PP && HeaderFileURIs); assert(CompletionAllocator && CompletionTUInfo); assert(ASTNode.OrigD); // Indexing API puts canonical decl into D, which might not have a valid // source location for implicit/built-in decls. Fallback to original decl in // such cases. if (D->getLocation().isInvalid()) D = ASTNode.OrigD; // If OrigD is an declaration associated with a friend declaration and it's // not a definition, skip it. Note that OrigD is the occurrence that the // collector is currently visiting. if ((ASTNode.OrigD->getFriendObjectKind() != Decl::FriendObjectKind::FOK_None) && !(Roles & static_cast(index::SymbolRole::Definition))) return true; // A declaration created for a friend declaration should not be used as the // canonical declaration in the index. Use OrigD instead, unless we've already // picked a replacement for D if (D->getFriendObjectKind() != Decl::FriendObjectKind::FOK_None) D = CanonicalDecls.try_emplace(D, ASTNode.OrigD).first->second; // Flag to mark that D should be considered canonical meaning its declaration // will override any previous declaration for the Symbol. bool DeclIsCanonical = false; // Avoid treating ObjCImplementationDecl as a canonical declaration if it has // a corresponding non-implicit and non-forward declared ObjcInterfaceDecl. if (const auto *IID = dyn_cast(D)) { DeclIsCanonical = true; if (const auto *CID = IID->getClassInterface()) if (const auto *DD = CID->getDefinition()) if (!DD->isImplicitInterfaceDecl()) D = DD; } // Avoid treating ObjCCategoryImplDecl as a canonical declaration in favor of // its ObjCCategoryDecl if it has one. if (const auto *CID = dyn_cast(D)) { DeclIsCanonical = true; if (const auto *CD = CID->getCategoryDecl()) D = CD; } const NamedDecl *ND = dyn_cast(D); if (!ND) return true; auto ID = getSymbolIDCached(ND); if (!ID) return true; // Mark D as referenced if this is a reference coming from the main file. // D may not be an interesting symbol, but it's cheaper to check at the end. auto &SM = ASTCtx->getSourceManager(); if (Opts.CountReferences && (Roles & static_cast(index::SymbolRole::Reference)) && SM.getFileID(SM.getSpellingLoc(Loc)) == SM.getMainFileID()) ReferencedSymbols.insert(ID); // ND is the canonical (i.e. first) declaration. If it's in the main file // (which is not a header), then no public declaration was visible, so assume // it's main-file only. bool IsMainFileOnly = SM.isWrittenInMainFile(SM.getExpansionLoc(ND->getBeginLoc())) && !isHeaderFile(SM.getFileEntryRefForID(SM.getMainFileID())->getName(), ASTCtx->getLangOpts()); // In C, printf is a redecl of an implicit builtin! So check OrigD instead. if (ASTNode.OrigD->isImplicit() || !shouldCollectSymbol(*ND, *ASTCtx, Opts, IsMainFileOnly)) return true; // Note: we need to process relations for all decl occurrences, including // refs, because the indexing code only populates relations for specific // occurrences. For example, RelationBaseOf is only populated for the // occurrence inside the base-specifier. processRelations(*ND, ID, Relations); bool CollectRef = static_cast(Opts.RefFilter & toRefKind(Roles)); // Unlike other fields, e.g. Symbols (which use spelling locations), we use // file locations for references (as it aligns the behavior of clangd's // AST-based xref). // FIXME: we should try to use the file locations for other fields. if (CollectRef && (!IsMainFileOnly || Opts.CollectMainFileRefs || ND->isExternallyVisible()) && !isa(ND)) { auto FileLoc = SM.getFileLoc(Loc); auto FID = SM.getFileID(FileLoc); if (Opts.RefsInHeaders || FID == SM.getMainFileID()) { addRef(ID, SymbolRef{FileLoc, FID, Roles, getRefContainer(ASTNode.Parent, Opts), isSpelled(FileLoc, *ND)}); } } // Don't continue indexing if this is a mere reference. if (!(Roles & (static_cast(index::SymbolRole::Declaration) | static_cast(index::SymbolRole::Definition)))) return true; // FIXME: ObjCPropertyDecl are not properly indexed here: // - ObjCPropertyDecl may have an OrigD of ObjCPropertyImplDecl, which is // not a NamedDecl. auto *OriginalDecl = dyn_cast(ASTNode.OrigD); if (!OriginalDecl) return true; const Symbol *BasicSymbol = Symbols.find(ID); if (isPreferredDeclaration(*OriginalDecl, Roles)) // If OriginalDecl is preferred, replace/create the existing canonical // declaration (e.g. a class forward declaration). There should be at most // one duplicate as we expect to see only one preferred declaration per // TU, because in practice they are definitions. BasicSymbol = addDeclaration(*OriginalDecl, std::move(ID), IsMainFileOnly); else if (!BasicSymbol || DeclIsCanonical) BasicSymbol = addDeclaration(*ND, std::move(ID), IsMainFileOnly); if (Roles & static_cast(index::SymbolRole::Definition)) addDefinition(*OriginalDecl, *BasicSymbol); return true; } void SymbolCollector::handleMacros(const MainFileMacros &MacroRefsToIndex) { assert(HeaderFileURIs && PP); const auto &SM = PP->getSourceManager(); const auto MainFileEntryRef = SM.getFileEntryRefForID(SM.getMainFileID()); assert(MainFileEntryRef); const std::string &MainFileURI = HeaderFileURIs->toURI(*MainFileEntryRef); // Add macro references. for (const auto &IDToRefs : MacroRefsToIndex.MacroRefs) { for (const auto &MacroRef : IDToRefs.second) { const auto &Range = MacroRef.toRange(SM); bool IsDefinition = MacroRef.IsDefinition; Ref R; R.Location.Start.setLine(Range.start.line); R.Location.Start.setColumn(Range.start.character); R.Location.End.setLine(Range.end.line); R.Location.End.setColumn(Range.end.character); R.Location.FileURI = MainFileURI.c_str(); R.Kind = IsDefinition ? RefKind::Definition : RefKind::Reference; Refs.insert(IDToRefs.first, R); if (IsDefinition) { Symbol S; S.ID = IDToRefs.first; auto StartLoc = cantFail(sourceLocationInMainFile(SM, Range.start)); auto EndLoc = cantFail(sourceLocationInMainFile(SM, Range.end)); S.Name = toSourceCode(SM, SourceRange(StartLoc, EndLoc)); S.SymInfo.Kind = index::SymbolKind::Macro; S.SymInfo.SubKind = index::SymbolSubKind::None; S.SymInfo.Properties = index::SymbolPropertySet(); S.SymInfo.Lang = index::SymbolLanguage::C; S.Origin = Opts.Origin; S.CanonicalDeclaration = R.Location; // Make the macro visible for code completion if main file is an // include-able header. if (!HeaderFileURIs->getIncludeHeader(SM.getMainFileID()).empty()) { S.Flags |= Symbol::IndexedForCodeCompletion; S.Flags |= Symbol::VisibleOutsideFile; } Symbols.insert(S); } } } } bool SymbolCollector::handleMacroOccurrence(const IdentifierInfo *Name, const MacroInfo *MI, index::SymbolRoleSet Roles, SourceLocation Loc) { assert(PP); // Builtin macros don't have useful locations and aren't needed in completion. if (MI->isBuiltinMacro()) return true; const auto &SM = PP->getSourceManager(); auto DefLoc = MI->getDefinitionLoc(); // Also avoid storing macros that aren't defined in any file, i.e. predefined // macros like __DBL_MIN__ and those defined on the command line. if (SM.isWrittenInBuiltinFile(DefLoc) || SM.isWrittenInCommandLineFile(DefLoc) || Name->getName() == "__GCC_HAVE_DWARF2_CFI_ASM") return true; auto ID = getSymbolIDCached(Name->getName(), MI, SM); if (!ID) return true; auto SpellingLoc = SM.getSpellingLoc(Loc); bool IsMainFileOnly = SM.isInMainFile(SM.getExpansionLoc(DefLoc)) && !isHeaderFile(SM.getFileEntryRefForID(SM.getMainFileID())->getName(), ASTCtx->getLangOpts()); // Do not store references to main-file macros. if ((static_cast(Opts.RefFilter) & Roles) && !IsMainFileOnly && (Opts.RefsInHeaders || SM.getFileID(SpellingLoc) == SM.getMainFileID())) { // FIXME: Populate container information for macro references. // FIXME: All MacroRefs are marked as Spelled now, but this should be // checked. addRef(ID, SymbolRef{Loc, SM.getFileID(Loc), Roles, /*Container=*/nullptr, /*Spelled=*/true}); } // Collect symbols. if (!Opts.CollectMacro) return true; // Skip main-file macros if we are not collecting them. if (IsMainFileOnly && !Opts.CollectMainFileSymbols) return false; // Mark the macro as referenced if this is a reference coming from the main // file. The macro may not be an interesting symbol, but it's cheaper to check // at the end. if (Opts.CountReferences && (Roles & static_cast(index::SymbolRole::Reference)) && SM.getFileID(SpellingLoc) == SM.getMainFileID()) ReferencedSymbols.insert(ID); // Don't continue indexing if this is a mere reference. // FIXME: remove macro with ID if it is undefined. if (!(Roles & static_cast(index::SymbolRole::Declaration) || Roles & static_cast(index::SymbolRole::Definition))) return true; // Only collect one instance in case there are multiple. if (Symbols.find(ID) != nullptr) return true; Symbol S; S.ID = std::move(ID); S.Name = Name->getName(); if (!IsMainFileOnly) { S.Flags |= Symbol::IndexedForCodeCompletion; S.Flags |= Symbol::VisibleOutsideFile; } S.SymInfo = index::getSymbolInfoForMacro(*MI); S.Origin = Opts.Origin; // FIXME: use the result to filter out symbols. shouldIndexFile(SM.getFileID(Loc)); if (auto DeclLoc = getTokenLocation(DefLoc)) S.CanonicalDeclaration = *DeclLoc; CodeCompletionResult SymbolCompletion(Name); const auto *CCS = SymbolCompletion.CreateCodeCompletionStringForMacro( *PP, *CompletionAllocator, *CompletionTUInfo); std::string Signature; std::string SnippetSuffix; getSignature(*CCS, &Signature, &SnippetSuffix, SymbolCompletion.Kind, SymbolCompletion.CursorKind); S.Signature = Signature; S.CompletionSnippetSuffix = SnippetSuffix; IndexedMacros.insert(Name); setIncludeLocation(S, DefLoc, include_cleaner::Macro{Name, DefLoc}); Symbols.insert(S); return true; } void SymbolCollector::processRelations( const NamedDecl &ND, const SymbolID &ID, ArrayRef Relations) { for (const auto &R : Relations) { auto RKind = indexableRelation(R); if (!RKind) continue; const Decl *Object = R.RelatedSymbol; auto ObjectID = getSymbolIDCached(Object); if (!ObjectID) continue; // Record the relation. // TODO: There may be cases where the object decl is not indexed for some // reason. Those cases should probably be removed in due course, but for // now there are two possible ways to handle it: // (A) Avoid storing the relation in such cases. // (B) Store it anyways. Clients will likely lookup() the SymbolID // in the index and find nothing, but that's a situation they // probably need to handle for other reasons anyways. // We currently do (B) because it's simpler. if (*RKind == RelationKind::BaseOf) this->Relations.insert({ID, *RKind, ObjectID}); else if (*RKind == RelationKind::OverriddenBy) this->Relations.insert({ObjectID, *RKind, ID}); } } void SymbolCollector::setIncludeLocation(const Symbol &S, SourceLocation DefLoc, const include_cleaner::Symbol &Sym) { const auto &SM = PP->getSourceManager(); if (!Opts.CollectIncludePath || shouldCollectIncludePath(S.SymInfo.Kind) == Symbol::Invalid) return; // Use the expansion location to get the #include header since this is // where the symbol is exposed. if (FileID FID = SM.getDecomposedExpansionLoc(DefLoc).first; FID.isValid()) IncludeFiles[S.ID] = FID; // We update providers for a symbol with each occurence, as SymbolCollector // might run while parsing, rather than at the end of a translation unit. // Hence we see more and more redecls over time. SymbolProviders[S.ID] = include_cleaner::headersForSymbol(Sym, SM, Opts.PragmaIncludes); } llvm::StringRef getStdHeader(const Symbol *S, const LangOptions &LangOpts) { tooling::stdlib::Lang Lang = tooling::stdlib::Lang::CXX; if (LangOpts.C11) Lang = tooling::stdlib::Lang::C; else if(!LangOpts.CPlusPlus) return ""; if (S->Scope == "std::" && S->Name == "move") { if (!S->Signature.contains(',')) return ""; return ""; } if (auto StdSym = tooling::stdlib::Symbol::named(S->Scope, S->Name, Lang)) if (auto Header = StdSym->header()) return Header->name(); return ""; } void SymbolCollector::finish() { // At the end of the TU, add 1 to the refcount of all referenced symbols. for (const auto &ID : ReferencedSymbols) { if (const auto *S = Symbols.find(ID)) { // SymbolSlab::Builder returns const symbols because strings are interned // and modifying returned symbols without inserting again wouldn't go // well. const_cast is safe here as we're modifying a data owned by the // Symbol. This reduces time spent in SymbolCollector by ~1%. ++const_cast(S)->References; } } if (Opts.CollectMacro) { assert(PP); // First, drop header guards. We can't identify these until EOF. for (const IdentifierInfo *II : IndexedMacros) { if (const auto *MI = PP->getMacroDefinition(II).getMacroInfo()) if (auto ID = getSymbolIDCached(II->getName(), MI, PP->getSourceManager())) if (MI->isUsedForHeaderGuard()) Symbols.erase(ID); } } llvm::DenseMap FileToContainsImportsOrObjC; llvm::DenseMap HeaderSpelling; // Fill in IncludeHeaders. // We delay this until end of TU so header guards are all resolved. for (const auto &[SID, Providers] : SymbolProviders) { const Symbol *S = Symbols.find(SID); if (!S) continue; FileID FID = IncludeFiles.lookup(SID); // Determine if the FID is #include'd or #import'ed. Symbol::IncludeDirective Directives = Symbol::Invalid; auto CollectDirectives = shouldCollectIncludePath(S->SymInfo.Kind); if ((CollectDirectives & Symbol::Include) != 0) Directives |= Symbol::Include; // Only allow #import for symbols from ObjC-like files. if ((CollectDirectives & Symbol::Import) != 0 && FID.isValid()) { auto [It, Inserted] = FileToContainsImportsOrObjC.try_emplace(FID); if (Inserted) It->second = FilesWithObjCConstructs.contains(FID) || tooling::codeContainsImports( ASTCtx->getSourceManager().getBufferData(FID)); if (It->second) Directives |= Symbol::Import; } if (Directives == Symbol::Invalid) continue; // Use the include location-based logic for Objective-C symbols. if (Directives & Symbol::Import) { llvm::StringRef IncludeHeader = getStdHeader(S, ASTCtx->getLangOpts()); if (IncludeHeader.empty()) IncludeHeader = HeaderFileURIs->getIncludeHeader(FID); if (!IncludeHeader.empty()) { auto NewSym = *S; NewSym.IncludeHeaders.push_back({IncludeHeader, 1, Directives}); Symbols.insert(NewSym); } // FIXME: use providers from include-cleaner library once it's polished // for Objective-C. continue; } // For #include's, use the providers computed by the include-cleaner // library. assert(Directives == Symbol::Include); // Ignore providers that are not self-contained, this is especially // important for symbols defined in the main-file. We want to prefer the // header, if possible. // TODO: Limit this to specifically ignore main file, when we're indexing a // non-header file? auto SelfContainedProvider = [this](llvm::ArrayRef Providers) -> std::optional { for (const auto &H : Providers) { if (H.kind() != include_cleaner::Header::Physical) return H; if (tooling::isSelfContainedHeader(H.physical(), PP->getSourceManager(), PP->getHeaderSearchInfo())) return H; } return std::nullopt; }; const auto OptionalProvider = SelfContainedProvider(Providers); if (!OptionalProvider) continue; const auto &H = *OptionalProvider; const auto [SpellingIt, Inserted] = HeaderSpelling.try_emplace(H); if (Inserted) { auto &SM = ASTCtx->getSourceManager(); if (H.kind() == include_cleaner::Header::Kind::Physical) { // FIXME: Get rid of this once include-cleaner has support for system // headers. if (auto Canonical = HeaderFileURIs->mapCanonical(H.physical().getName()); !Canonical.empty()) SpellingIt->second = Canonical; // For physical files, prefer URIs as spellings might change // depending on the translation unit. else if (tooling::isSelfContainedHeader(H.physical(), SM, PP->getHeaderSearchInfo())) SpellingIt->second = HeaderFileURIs->toURI(H.physical()); } else { SpellingIt->second = include_cleaner::spellHeader( {H, PP->getHeaderSearchInfo(), SM.getFileEntryForID(SM.getMainFileID())}); } } if (!SpellingIt->second.empty()) { auto NewSym = *S; NewSym.IncludeHeaders.push_back({SpellingIt->second, 1, Directives}); Symbols.insert(NewSym); } } ReferencedSymbols.clear(); IncludeFiles.clear(); SymbolProviders.clear(); FilesWithObjCConstructs.clear(); } const Symbol *SymbolCollector::addDeclaration(const NamedDecl &ND, SymbolID ID, bool IsMainFileOnly) { auto &Ctx = ND.getASTContext(); auto &SM = Ctx.getSourceManager(); Symbol S; S.ID = std::move(ID); std::string QName = printQualifiedName(ND); // FIXME: this returns foo:bar: for objective-C methods, we prefer only foo: // for consistency with CodeCompletionString and a clean name/signature split. std::tie(S.Scope, S.Name) = splitQualifiedName(QName); std::string TemplateSpecializationArgs = printTemplateSpecializationArgs(ND); S.TemplateSpecializationArgs = TemplateSpecializationArgs; // We collect main-file symbols, but do not use them for code completion. if (!IsMainFileOnly && isIndexedForCodeCompletion(ND, Ctx)) S.Flags |= Symbol::IndexedForCodeCompletion; if (isImplementationDetail(&ND)) S.Flags |= Symbol::ImplementationDetail; if (!IsMainFileOnly) S.Flags |= Symbol::VisibleOutsideFile; S.SymInfo = index::getSymbolInfo(&ND); auto Loc = nameLocation(ND, SM); assert(Loc.isValid() && "Invalid source location for NamedDecl"); // FIXME: use the result to filter out symbols. auto FID = SM.getFileID(Loc); shouldIndexFile(FID); if (auto DeclLoc = getTokenLocation(Loc)) S.CanonicalDeclaration = *DeclLoc; S.Origin = Opts.Origin; if (ND.getAvailability() == AR_Deprecated) S.Flags |= Symbol::Deprecated; // Add completion info. // FIXME: we may want to choose a different redecl, or combine from several. assert(ASTCtx && PP && "ASTContext and Preprocessor must be set."); // We use the primary template, as clang does during code completion. CodeCompletionResult SymbolCompletion(&getTemplateOrThis(ND), 0); const auto *CCS = SymbolCompletion.CreateCodeCompletionString( *ASTCtx, *PP, CodeCompletionContext::CCC_Symbol, *CompletionAllocator, *CompletionTUInfo, /*IncludeBriefComments*/ false); std::string Documentation = formatDocumentation(*CCS, getDocComment(Ctx, SymbolCompletion, /*CommentsFromHeaders=*/true)); if (!(S.Flags & Symbol::IndexedForCodeCompletion)) { if (Opts.StoreAllDocumentation) S.Documentation = Documentation; Symbols.insert(S); return Symbols.find(S.ID); } S.Documentation = Documentation; std::string Signature; std::string SnippetSuffix; getSignature(*CCS, &Signature, &SnippetSuffix, SymbolCompletion.Kind, SymbolCompletion.CursorKind); S.Signature = Signature; S.CompletionSnippetSuffix = SnippetSuffix; std::string ReturnType = getReturnType(*CCS); S.ReturnType = ReturnType; std::optional TypeStorage; if (S.Flags & Symbol::IndexedForCodeCompletion) { TypeStorage = OpaqueType::fromCompletionResult(*ASTCtx, SymbolCompletion); if (TypeStorage) S.Type = TypeStorage->raw(); } Symbols.insert(S); setIncludeLocation(S, ND.getLocation(), include_cleaner::Symbol{ND}); if (S.SymInfo.Lang == index::SymbolLanguage::ObjC) FilesWithObjCConstructs.insert(FID); return Symbols.find(S.ID); } void SymbolCollector::addDefinition(const NamedDecl &ND, const Symbol &DeclSym) { if (DeclSym.Definition) return; const auto &SM = ND.getASTContext().getSourceManager(); auto Loc = nameLocation(ND, SM); shouldIndexFile(SM.getFileID(Loc)); auto DefLoc = getTokenLocation(Loc); // If we saw some forward declaration, we end up copying the symbol. // This is not ideal, but avoids duplicating the "is this a definition" check // in clang::index. We should only see one definition. if (!DefLoc) return; Symbol S = DeclSym; // FIXME: use the result to filter out symbols. S.Definition = *DefLoc; Symbols.insert(S); } bool SymbolCollector::shouldIndexFile(FileID FID) { if (!Opts.FileFilter) return true; auto I = FilesToIndexCache.try_emplace(FID); if (I.second) I.first->second = Opts.FileFilter(ASTCtx->getSourceManager(), FID); return I.first->second; } void SymbolCollector::addRef(SymbolID ID, const SymbolRef &SR) { const auto &SM = ASTCtx->getSourceManager(); // FIXME: use the result to filter out references. shouldIndexFile(SR.FID); if (const auto FE = SM.getFileEntryRefForID(SR.FID)) { auto Range = getTokenRange(SR.Loc, SM, ASTCtx->getLangOpts()); Ref R; R.Location.Start = Range.first; R.Location.End = Range.second; R.Location.FileURI = HeaderFileURIs->toURI(*FE).c_str(); R.Kind = toRefKind(SR.Roles, SR.Spelled); R.Container = getSymbolIDCached(SR.Container); Refs.insert(ID, R); } } SymbolID SymbolCollector::getSymbolIDCached(const Decl *D) { auto It = DeclToIDCache.try_emplace(D, SymbolID{}); if (It.second) It.first->second = getSymbolID(D); return It.first->second; } SymbolID SymbolCollector::getSymbolIDCached(const llvm::StringRef MacroName, const MacroInfo *MI, const SourceManager &SM) { auto It = MacroToIDCache.try_emplace(MI, SymbolID{}); if (It.second) It.first->second = getSymbolID(MacroName, MI, SM); return It.first->second; } } // namespace clangd } // namespace clang