//===--- DebugIR.cpp - Transform debug metadata to allow debugging IR -----===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // A Module transform pass that emits a succinct version of the IR and replaces // the source file metadata to allow debuggers to step through the IR. // // FIXME: instead of replacing debug metadata, this pass should allow for // additional metadata to be used to point capable debuggers to the IR file // without destroying the mapping to the original source file. // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "debug-ir" #include "llvm/ADT/ValueMap.h" #include "llvm/Assembly/AssemblyAnnotationWriter.h" #include "llvm/DebugInfo.h" #include "llvm/DIBuilder.h" #include "llvm/InstVisitor.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Module.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" #include "DebugIR.h" #include <string> #define STR_HELPER(x) #x #define STR(x) STR_HELPER(x) using namespace llvm; namespace { /// Builds a map of Value* to line numbers on which the Value appears in a /// textual representation of the IR by plugging into the AssemblyWriter by /// masquerading as an AssemblyAnnotationWriter. class ValueToLineMap : public AssemblyAnnotationWriter { ValueMap<const Value *, unsigned int> Lines; typedef ValueMap<const Value *, unsigned int>::const_iterator LineIter; void addEntry(const Value *V, formatted_raw_ostream &Out) { Out.flush(); Lines.insert(std::make_pair(V, Out.getLine() + 1)); } public: /// Prints Module to a null buffer in order to build the map of Value pointers /// to line numbers. ValueToLineMap(const Module *M) { raw_null_ostream ThrowAway; M->print(ThrowAway, this); } // This function is called after an Instruction, GlobalValue, or GlobalAlias // is printed. void printInfoComment(const Value &V, formatted_raw_ostream &Out) { addEntry(&V, Out); } void emitFunctionAnnot(const Function *F, formatted_raw_ostream &Out) { addEntry(F, Out); } /// If V appears on a line in the textual IR representation, sets Line to the /// line number and returns true, otherwise returns false. bool getLine(const Value *V, unsigned int &Line) const { LineIter i = Lines.find(V); if (i != Lines.end()) { Line = i->second; return true; } return false; } }; /// Removes debug intrisncs like llvm.dbg.declare and llvm.dbg.value. class DebugIntrinsicsRemover : public InstVisitor<DebugIntrinsicsRemover> { void remove(Instruction &I) { I.eraseFromParent(); } public: static void process(Module &M) { DebugIntrinsicsRemover Remover; Remover.visit(&M); } void visitDbgDeclareInst(DbgDeclareInst &I) { remove(I); } void visitDbgValueInst(DbgValueInst &I) { remove(I); } void visitDbgInfoIntrinsic(DbgInfoIntrinsic &I) { remove(I); } }; /// Removes debug metadata (!dbg) nodes from all instructions, and optionally /// metadata named "llvm.dbg.cu" if RemoveNamedInfo is true. class DebugMetadataRemover : public InstVisitor<DebugMetadataRemover> { bool RemoveNamedInfo; public: static void process(Module &M, bool RemoveNamedInfo = true) { DebugMetadataRemover Remover(RemoveNamedInfo); Remover.run(&M); } DebugMetadataRemover(bool RemoveNamedInfo) : RemoveNamedInfo(RemoveNamedInfo) {} void visitInstruction(Instruction &I) { if (I.getMetadata(LLVMContext::MD_dbg)) I.setMetadata(LLVMContext::MD_dbg, 0); } void run(Module *M) { // Remove debug metadata attached to instructions visit(M); if (RemoveNamedInfo) { // Remove CU named metadata (and all children nodes) NamedMDNode *Node = M->getNamedMetadata("llvm.dbg.cu"); if (Node) M->eraseNamedMetadata(Node); } } }; /// Updates debug metadata in a Module: /// - changes Filename/Directory to values provided on construction /// - adds/updates line number (DebugLoc) entries associated with each /// instruction to reflect the instruction's location in an LLVM IR file class DIUpdater : public InstVisitor<DIUpdater> { /// Builder of debug information DIBuilder Builder; /// Helper for type attributes/sizes/etc DataLayout Layout; /// Map of Value* to line numbers const ValueToLineMap LineTable; /// Map of Value* (in original Module) to Value* (in optional cloned Module) const ValueToValueMapTy *VMap; /// Directory of debug metadata DebugInfoFinder Finder; /// Source filename and directory StringRef Filename; StringRef Directory; // CU nodes needed when creating DI subprograms MDNode *FileNode; MDNode *LexicalBlockFileNode; const MDNode *CUNode; ValueMap<const Function *, MDNode *> SubprogramDescriptors; DenseMap<const Type *, MDNode *> TypeDescriptors; public: DIUpdater(Module &M, StringRef Filename = StringRef(), StringRef Directory = StringRef(), const Module *DisplayM = 0, const ValueToValueMapTy *VMap = 0) : Builder(M), Layout(&M), LineTable(DisplayM ? DisplayM : &M), VMap(VMap), Finder(), Filename(Filename), Directory(Directory), FileNode(0), LexicalBlockFileNode(0), CUNode(0) { Finder.processModule(M); visit(&M); } ~DIUpdater() { Builder.finalize(); } void visitModule(Module &M) { if (Finder.compile_unit_count() > 1) report_fatal_error("DebugIR pass supports only a signle compile unit per " "Module."); createCompileUnit( Finder.compile_unit_count() == 1 ? *Finder.compile_unit_begin() : 0); } void visitFunction(Function &F) { if (F.isDeclaration() || findDISubprogram(&F)) return; StringRef MangledName = F.getName(); DICompositeType Sig = createFunctionSignature(&F); // find line of function declaration unsigned Line = 0; if (!findLine(&F, Line)) { DEBUG(dbgs() << "WARNING: No line for Function " << F.getName().str() << "\n"); return; } Instruction *FirstInst = F.begin()->begin(); unsigned ScopeLine = 0; if (!findLine(FirstInst, ScopeLine)) { DEBUG(dbgs() << "WARNING: No line for 1st Instruction in Function " << F.getName().str() << "\n"); return; } bool Local = F.hasInternalLinkage(); bool IsDefinition = !F.isDeclaration(); bool IsOptimized = false; int FuncFlags = llvm::DIDescriptor::FlagPrototyped; assert(CUNode && FileNode); DISubprogram Sub = Builder.createFunction( DICompileUnit(CUNode), F.getName(), MangledName, DIFile(FileNode), Line, Sig, Local, IsDefinition, ScopeLine, FuncFlags, IsOptimized, &F); assert(Sub.isSubprogram()); DEBUG(dbgs() << "create subprogram mdnode " << *Sub << ": " << "\n"); SubprogramDescriptors.insert(std::make_pair(&F, Sub)); } void visitInstruction(Instruction &I) { DebugLoc Loc(I.getDebugLoc()); /// If a ValueToValueMap is provided, use it to get the real instruction as /// the line table was generated on a clone of the module on which we are /// operating. Value *RealInst = 0; if (VMap) RealInst = VMap->lookup(&I); if (!RealInst) RealInst = &I; unsigned Col = 0; // FIXME: support columns unsigned Line; if (!LineTable.getLine(RealInst, Line)) { // Instruction has no line, it may have been removed (in the module that // will be passed to the debugger) so there is nothing to do here. DEBUG(dbgs() << "WARNING: no LineTable entry for instruction " << RealInst << "\n"); DEBUG(RealInst->dump()); return; } DebugLoc NewLoc; if (!Loc.isUnknown()) // I had a previous debug location: re-use the DebugLoc NewLoc = DebugLoc::get(Line, Col, Loc.getScope(RealInst->getContext()), Loc.getInlinedAt(RealInst->getContext())); else if (MDNode *scope = findScope(&I)) NewLoc = DebugLoc::get(Line, Col, scope, 0); else { DEBUG(dbgs() << "WARNING: no valid scope for instruction " << &I << ". no DebugLoc will be present." << "\n"); return; } addDebugLocation(I, NewLoc); } private: void createCompileUnit(MDNode *CUToReplace) { std::string Flags; bool IsOptimized = false; StringRef Producer; unsigned RuntimeVersion(0); StringRef SplitName; if (CUToReplace) { // save fields from existing CU to re-use in the new CU DICompileUnit ExistingCU(CUToReplace); Producer = ExistingCU.getProducer(); IsOptimized = ExistingCU.isOptimized(); Flags = ExistingCU.getFlags(); RuntimeVersion = ExistingCU.getRunTimeVersion(); SplitName = ExistingCU.getSplitDebugFilename(); } else { Producer = "LLVM Version " STR(LLVM_VERSION_MAJOR) "." STR(LLVM_VERSION_MINOR); } CUNode = Builder.createCompileUnit(dwarf::DW_LANG_C99, Filename, Directory, Producer, IsOptimized, Flags, RuntimeVersion); if (CUToReplace) CUToReplace->replaceAllUsesWith(const_cast<MDNode *>(CUNode)); DICompileUnit CU(CUNode); FileNode = Builder.createFile(Filename, Directory); LexicalBlockFileNode = Builder.createLexicalBlockFile(CU, DIFile(FileNode)); } /// Returns the MDNode* that represents the DI scope to associate with I MDNode *findScope(const Instruction *I) { const Function *F = I->getParent()->getParent(); if (MDNode *ret = findDISubprogram(F)) return ret; DEBUG(dbgs() << "WARNING: Using fallback lexical block file scope " << LexicalBlockFileNode << " as scope for instruction " << I << "\n"); return LexicalBlockFileNode; } /// Returns the MDNode* that is the descriptor for F MDNode *findDISubprogram(const Function *F) { typedef ValueMap<const Function *, MDNode *>::const_iterator FuncNodeIter; FuncNodeIter i = SubprogramDescriptors.find(F); if (i != SubprogramDescriptors.end()) return i->second; DEBUG(dbgs() << "searching for DI scope node for Function " << F << " in a list of " << Finder.subprogram_count() << " subprogram nodes" << "\n"); for (DebugInfoFinder::iterator i = Finder.subprogram_begin(), e = Finder.subprogram_end(); i != e; ++i) { DISubprogram S(*i); if (S.getFunction() == F) { DEBUG(dbgs() << "Found DISubprogram " << *i << " for function " << S.getFunction() << "\n"); return *i; } } DEBUG(dbgs() << "unable to find DISubprogram node for function " << F->getName().str() << "\n"); return 0; } /// Sets Line to the line number on which V appears and returns true. If a /// line location for V is not found, returns false. bool findLine(const Value *V, unsigned &Line) { if (LineTable.getLine(V, Line)) return true; if (VMap) { Value *mapped = VMap->lookup(V); if (mapped && LineTable.getLine(mapped, Line)) return true; } return false; } std::string getTypeName(Type *T) { std::string TypeName; raw_string_ostream TypeStream(TypeName); T->print(TypeStream); TypeStream.flush(); return TypeName; } /// Returns the MDNode that represents type T if it is already created, or 0 /// if it is not. MDNode *getType(const Type *T) { typedef DenseMap<const Type *, MDNode *>::const_iterator TypeNodeIter; TypeNodeIter i = TypeDescriptors.find(T); if (i != TypeDescriptors.end()) return i->second; return 0; } /// Returns a DebugInfo type from an LLVM type T. DIDerivedType getOrCreateType(Type *T) { MDNode *N = getType(T); if (N) return DIDerivedType(N); else if (T->isVoidTy()) return DIDerivedType(0); else if (T->isStructTy()) { N = Builder.createStructType( DIScope(LexicalBlockFileNode), T->getStructName(), DIFile(FileNode), 0, Layout.getTypeSizeInBits(T), Layout.getABITypeAlignment(T), 0, DIType(0), DIArray(0)); // filled in later // N is added to the map (early) so that element search below can find it, // so as to avoid infinite recursion for structs that contain pointers to // their own type. TypeDescriptors[T] = N; DICompositeType StructDescriptor(N); SmallVector<Value *, 4> Elements; for (unsigned i = 0; i < T->getStructNumElements(); ++i) Elements.push_back(getOrCreateType(T->getStructElementType(i))); // set struct elements StructDescriptor.setTypeArray(Builder.getOrCreateArray(Elements)); } else if (T->isPointerTy()) { Type *PointeeTy = T->getPointerElementType(); if (!(N = getType(PointeeTy))) N = Builder.createPointerType( getOrCreateType(PointeeTy), Layout.getPointerSizeInBits(), Layout.getPrefTypeAlignment(T), getTypeName(T)); } else if (T->isArrayTy()) { SmallVector<Value *, 1> Subrange; Subrange.push_back( Builder.getOrCreateSubrange(0, T->getArrayNumElements() - 1)); N = Builder.createArrayType(Layout.getTypeSizeInBits(T), Layout.getPrefTypeAlignment(T), getOrCreateType(T->getArrayElementType()), Builder.getOrCreateArray(Subrange)); } else { int encoding = llvm::dwarf::DW_ATE_signed; if (T->isIntegerTy()) encoding = llvm::dwarf::DW_ATE_unsigned; else if (T->isFloatingPointTy()) encoding = llvm::dwarf::DW_ATE_float; N = Builder.createBasicType(getTypeName(T), T->getPrimitiveSizeInBits(), 0, encoding); } TypeDescriptors[T] = N; return DIDerivedType(N); } /// Returns a DebugInfo type that represents a function signature for Func. DICompositeType createFunctionSignature(const Function *Func) { SmallVector<Value *, 4> Params; DIDerivedType ReturnType(getOrCreateType(Func->getReturnType())); Params.push_back(ReturnType); const Function::ArgumentListType &Args(Func->getArgumentList()); for (Function::ArgumentListType::const_iterator i = Args.begin(), e = Args.end(); i != e; ++i) { Type *T(i->getType()); Params.push_back(getOrCreateType(T)); } DIArray ParamArray = Builder.getOrCreateArray(Params); return Builder.createSubroutineType(DIFile(FileNode), ParamArray); } /// Associates Instruction I with debug location Loc. void addDebugLocation(Instruction &I, DebugLoc Loc) { MDNode *MD = Loc.getAsMDNode(I.getContext()); I.setMetadata(LLVMContext::MD_dbg, MD); } }; /// Sets Filename/Directory from the Module identifier and returns true, or /// false if source information is not present. bool getSourceInfoFromModule(const Module &M, std::string &Directory, std::string &Filename) { std::string PathStr(M.getModuleIdentifier()); if (PathStr.length() == 0 || PathStr == "<stdin>") return false; Filename = sys::path::filename(PathStr); SmallVector<char, 16> Path(PathStr.begin(), PathStr.end()); sys::path::remove_filename(Path); Directory = StringRef(Path.data(), Path.size()); return true; } // Sets Filename/Directory from debug information in M and returns true, or // false if no debug information available, or cannot be parsed. bool getSourceInfoFromDI(const Module &M, std::string &Directory, std::string &Filename) { NamedMDNode *CUNode = M.getNamedMetadata("llvm.dbg.cu"); if (!CUNode || CUNode->getNumOperands() == 0) return false; DICompileUnit CU(CUNode->getOperand(0)); if (!CU.Verify()) return false; Filename = CU.getFilename(); Directory = CU.getDirectory(); return true; } } // anonymous namespace namespace llvm { bool DebugIR::getSourceInfo(const Module &M) { ParsedPath = getSourceInfoFromDI(M, Directory, Filename) || getSourceInfoFromModule(M, Directory, Filename); return ParsedPath; } bool DebugIR::updateExtension(StringRef NewExtension) { size_t dot = Filename.find_last_of("."); if (dot == std::string::npos) return false; Filename.erase(dot); Filename += NewExtension.str(); return true; } void DebugIR::generateFilename(OwningPtr<int> &fd) { SmallVector<char, 16> PathVec; fd.reset(new int); sys::fs::createTemporaryFile("debug-ir", "ll", *fd, PathVec); StringRef Path(PathVec.data(), PathVec.size()); Filename = sys::path::filename(Path); sys::path::remove_filename(PathVec); Directory = StringRef(PathVec.data(), PathVec.size()); GeneratedPath = true; } std::string DebugIR::getPath() { SmallVector<char, 16> Path; sys::path::append(Path, Directory, Filename); Path.resize(Filename.size() + Directory.size() + 2); Path[Filename.size() + Directory.size() + 1] = '\0'; return std::string(Path.data()); } void DebugIR::writeDebugBitcode(const Module *M, int *fd) { OwningPtr<raw_fd_ostream> Out; std::string error; if (!fd) { std::string Path = getPath(); Out.reset(new raw_fd_ostream(Path.c_str(), error)); DEBUG(dbgs() << "WRITING debug bitcode from Module " << M << " to file " << Path << "\n"); } else { DEBUG(dbgs() << "WRITING debug bitcode from Module " << M << " to fd " << *fd << "\n"); Out.reset(new raw_fd_ostream(*fd, true)); } M->print(*Out, 0); Out->close(); } void DebugIR::createDebugInfo(Module &M, OwningPtr<Module> &DisplayM) { if (M.getFunctionList().size() == 0) // no functions -- no debug info needed return; OwningPtr<ValueToValueMapTy> VMap; if (WriteSourceToDisk && (HideDebugIntrinsics || HideDebugMetadata)) { VMap.reset(new ValueToValueMapTy); DisplayM.reset(CloneModule(&M, *VMap)); if (HideDebugIntrinsics) DebugIntrinsicsRemover::process(*DisplayM); if (HideDebugMetadata) DebugMetadataRemover::process(*DisplayM); } DIUpdater R(M, Filename, Directory, DisplayM.get(), VMap.get()); } bool DebugIR::isMissingPath() { return Filename.empty() || Directory.empty(); } bool DebugIR::runOnModule(Module &M) { OwningPtr<int> fd; if (isMissingPath() && !getSourceInfo(M)) { if (!WriteSourceToDisk) report_fatal_error("DebugIR unable to determine file name in input. " "Ensure Module contains an identifier, a valid " "DICompileUnit, or construct DebugIR with " "non-empty Filename/Directory parameters."); else generateFilename(fd); } if (!GeneratedPath && WriteSourceToDisk) updateExtension(".debug-ll"); // Clear line numbers. Keep debug info (if any) if we were able to read the // file name from the DICompileUnit descriptor. DebugMetadataRemover::process(M, !ParsedPath); OwningPtr<Module> DisplayM; createDebugInfo(M, DisplayM); if (WriteSourceToDisk) { Module *OutputM = DisplayM.get() ? DisplayM.get() : &M; writeDebugBitcode(OutputM, fd.get()); } DEBUG(M.dump()); return true; } bool DebugIR::runOnModule(Module &M, std::string &Path) { bool result = runOnModule(M); Path = getPath(); return result; } } // llvm namespace char DebugIR::ID = 0; INITIALIZE_PASS(DebugIR, "debug-ir", "Enable debugging IR", false, false) ModulePass *llvm::createDebugIRPass(bool HideDebugIntrinsics, bool HideDebugMetadata, StringRef Directory, StringRef Filename) { return new DebugIR(HideDebugIntrinsics, HideDebugMetadata, Directory, Filename); } ModulePass *llvm::createDebugIRPass() { return new DebugIR(); }