diff --git a/infra/base-images/base-builder/indexer/manifest_types.py b/infra/base-images/base-builder/indexer/manifest_types.py index 716ca6a99816..a501d4357057 100644 --- a/infra/base-images/base-builder/indexer/manifest_types.py +++ b/infra/base-images/base-builder/indexer/manifest_types.py @@ -40,7 +40,6 @@ import manifest_constants import pathlib - SRC_DIR = manifest_constants.SRC_DIR OBJ_DIR = manifest_constants.OBJ_DIR INDEX_DIR = manifest_constants.INDEX_DIR @@ -406,8 +405,27 @@ def save_build( archive_path: pathlib.PurePath, out_dir: pathlib.PurePath = pathlib.Path("/out"), overwrite: bool = True, + sanitize_source_dir: bool = True, ) -> Self: - """Saves a build archive with this Manifest.""" + """Saves a build archive with this Manifest. + + Args: + source_dir: The directory containing the source code, or None. + build_dir: The directory containing the build artifacts. + index_dir: The directory containing the source code index files. + archive_path: The path where the build archive should be saved. + out_dir: The output directory path in the container. + overwrite: Whether to overwrite the archive if it already exists. + sanitize_source_dir: Whether to sanitize the source directory (excluding + large or unnecessary files like .git directories, seed corpora, and + build artifacts) when saving. + + Returns: + This Manifest instance. + + Raises: + FileExistsError: If the archive already exists and overwrite is False. + """ if os.path.exists(archive_path) and not overwrite: raise FileExistsError(f"Not overwriting existing archive {archive_path}") @@ -420,39 +438,56 @@ def save_build( def _save_dir( path: pathlib.PurePath, prefix: pathlib.Path, + *, + sanitize: bool = True, exclude_build_artifacts: bool = False, only_include_target: str | None = None, - ): + ) -> None: + """Saves a directory to the build archive. + + Args: + path: The directory path to save. + prefix: The prefix path in the archive. + sanitize: Whether to sanitize the directory files. + exclude_build_artifacts: Whether to exclude ELF files. + only_include_target: If set, only this specific ELF target and .so + files are included. + """ + if not sanitize: + tar.add(path.as_posix(), arcname=prefix) + return + prefix = prefix.as_posix() + "/" for root, _, files in os.walk(path): for file in files: - if file.endswith("_seed_corpus.zip"): + file_path = pathlib.Path(root, file) + + if file_path.name.endswith("_seed_corpus.zip"): # Don't copy over the seed corpus -- it's not necessary. continue - if "/.git/" in root or root.endswith("/.git"): + if any(p.name == ".git" for p in file_path.parents): # Skip the .git directory -- it can be large. continue - - file = pathlib.Path(root, file) - if exclude_build_artifacts and _is_elf(file): + if exclude_build_artifacts and _is_elf(file_path): continue - if only_include_target and _is_elf(file): - # Skip ELF files that aren't the relevant target (unless it's a - # shared library). - if ( - file.name != only_include_target - and ".so" not in file.name - and not file.absolute().is_relative_to(out_dir / "lib") - ): - continue + if ( + only_include_target + and _is_elf(file_path) + and file_path.name != only_include_target + and ".so" not in file_path.name + and not file_path.absolute().is_relative_to(out_dir / "lib") + ): + # Skip ELF files that aren't the relevant target (unless it's + # a shared library). + continue tar.add( # Don't try to replicate symlinks in the tarfile, because they # can lead to various issues (e.g. absolute symlinks). - file.resolve().as_posix(), - arcname=prefix + str(file.relative_to(path)), + file_path.resolve().as_posix(), + arcname=f"{prefix}{file_path.relative_to(path)}", ) dumped_self = self @@ -480,7 +515,12 @@ def _save_dir( _save_dir(index_dir, INDEX_DIR) if source_dir: - _save_dir(source_dir, SRC_DIR, exclude_build_artifacts=True) + _save_dir( + source_dir, + SRC_DIR, + sanitize=sanitize_source_dir, + exclude_build_artifacts=True, + ) # Only include the relevant target for the snapshot, to save on disk # space. diff --git a/infra/indexer/frontend/ast_visitor.cc b/infra/indexer/frontend/ast_visitor.cc index 4c0feb8a1c1a..dca1e0206407 100644 --- a/infra/indexer/frontend/ast_visitor.cc +++ b/infra/indexer/frontend/ast_visitor.cc @@ -108,10 +108,19 @@ bool IsParentADefinition(const clang::Decl* decl) { const clang::ClassTemplateDecl* GetClassTemplateDefinition( const clang::ClassTemplateDecl* class_template_decl) { - if (class_template_decl->getTemplatedDecl()->getDefinition()) { - class_template_decl = class_template_decl->getTemplatedDecl() - ->getDefinition() - ->getDescribedClassTemplate(); + if (!class_template_decl) { + return nullptr; + } + if (class_template_decl->getTemplatedDecl() && + class_template_decl->getTemplatedDecl()->getDefinition()) { + const auto* definition = + class_template_decl->getTemplatedDecl()->getDefinition(); + if (definition) { + const auto* described = definition->getDescribedClassTemplate(); + if (described) { + return described; + } + } } return class_template_decl; } @@ -141,7 +150,13 @@ const clang::Decl* GetSpecializationDecl( const clang::ClassTemplateDecl* class_template_decl, const llvm::ArrayRef template_arguments, const clang::ASTContext& context) { + if (!class_template_decl) { + return nullptr; + } class_template_decl = GetClassTemplateDefinition(class_template_decl); + if (!class_template_decl) { + return nullptr; + } const clang::Decl* decl = class_template_decl; const auto* specialization_decl = FindSpecialization(class_template_decl, template_arguments, context); @@ -280,8 +295,7 @@ const clang::NamedDecl* GetTemplatePrototypeNamedDecl( named_decl->getDeclContext())) { if (const clang::FunctionDecl* instantiation_pattern = function_decl->getTemplateInstantiationPattern()) { - template_context = instantiation_pattern; - template_decl = instantiation_pattern->getDescribedFunctionTemplate(); + template_context = instantiation_pattern->getDefinition(); } else if (function_decl->getDescribedFunctionTemplate() && function_decl->getDescribedFunctionTemplate() ->getInstantiatedFromMemberTemplate()) { @@ -1046,13 +1060,21 @@ LocationId AstVisitor::GetLocationId(clang::SourceLocation start, } LocationId AstVisitor::GetLocationId(const clang::Decl* decl) { + if (!decl) { + return kInvalidLocationId; + } + // If we have a template specialization or instantiation, we should make // sure we use the source location that matches the closest explicit // specialization instead of the base template. if (llvm::isa(decl)) { const auto* specialization_decl = llvm::cast(decl); - decl = GetSpecializationDecl(specialization_decl, context_); + const auto* resolved_decl = + GetSpecializationDecl(specialization_decl, context_); + if (resolved_decl) { + decl = resolved_decl; + } } // For class template definitions, the AST has two nodes: @@ -1070,7 +1092,11 @@ LocationId AstVisitor::GetLocationId(const clang::Decl* decl) { const auto* class_template_decl = cxx_record_decl->getDescribedClassTemplate(); if (class_template_decl) { - decl = GetClassTemplateDefinition(class_template_decl); + const auto* resolved_template = + GetClassTemplateDefinition(class_template_decl); + if (resolved_template) { + decl = resolved_template; + } } } @@ -1083,16 +1109,26 @@ LocationId AstVisitor::GetLocationId(const clang::Decl* decl) { if (llvm::isa(decl)) { const auto* function_decl = llvm::cast(decl); if (function_decl->isTemplateInstantiation()) { - function_decl = function_decl->getTemplateInstantiationPattern(); + const auto* pattern = function_decl->getTemplateInstantiationPattern(); + if (pattern) { + function_decl = pattern; + if (function_decl->getDefinition()) { + function_decl = function_decl->getDefinition(); + } + } } else if (function_decl->getTemplateSpecializationInfo()) { const auto* tmp_info = function_decl->getTemplateSpecializationInfo(); - function_decl = tmp_info->getFunction(); + if (tmp_info && tmp_info->getFunction()) { + function_decl = tmp_info->getFunction(); + } } decl = function_decl; - const auto* func_template = function_decl->getDescribedFunctionTemplate(); - if (func_template) { - decl = func_template; + if (function_decl) { + const auto* func_template = function_decl->getDescribedFunctionTemplate(); + if (func_template) { + decl = func_template; + } } } @@ -1420,8 +1456,9 @@ std::optional AstVisitor::GetEntityForDecl(const clang::Decl* decl, // Check for template instantiation. const clang::Decl* function_template = nullptr; - if (function_decl->getTemplateInstantiationPattern()) { - function_template = function_decl->getTemplateInstantiationPattern(); + if (const auto* instantiation_pattern = + function_decl->getTemplateInstantiationPattern()) { + function_template = instantiation_pattern->getDefinition(); } else if (function_decl->getDescribedFunctionTemplate() && function_decl->getDescribedFunctionTemplate() ->getInstantiatedFromMemberTemplate()) { diff --git a/infra/indexer/main.cc b/infra/indexer/main.cc index 43b5e257fdd5..35a51a3a94ff 100644 --- a/infra/indexer/main.cc +++ b/infra/indexer/main.cc @@ -19,7 +19,6 @@ #include #include -#include "init.h" #include "indexer/frontend/frontend.h" #include "indexer/index/file_copier.h" #include "indexer/index/sqlite.h" @@ -28,6 +27,7 @@ #include "absl/log/check.h" #include "absl/log/log.h" #include "absl/strings/string_view.h" +#include "init.h" #include "clang/Tooling/AllTUsExecution.h" #include "clang/Tooling/CompilationDatabase.h" #include "clang/Tooling/Tooling.h"