//===- GNUArchiveReader.cpp -----------------------------------------------===// // // The MCLinker Project // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #include "mcld/LD/GNUArchiveReader.h" #include "mcld/InputTree.h" #include "mcld/LinkerConfig.h" #include "mcld/Module.h" #include "mcld/ADT/SizeTraits.h" #include "mcld/MC/Attribute.h" #include "mcld/MC/Input.h" #include "mcld/LD/ELFObjectReader.h" #include "mcld/LD/ResolveInfo.h" #include "mcld/Support/FileHandle.h" #include "mcld/Support/FileSystem.h" #include "mcld/Support/MemoryArea.h" #include "mcld/Support/MsgHandling.h" #include "mcld/Support/Path.h" #include <llvm/ADT/StringRef.h> #include <llvm/Support/Host.h> #include <cstdlib> #include <cstring> namespace mcld { GNUArchiveReader::GNUArchiveReader(Module& pModule, ELFObjectReader& pELFObjectReader) : m_Module(pModule), m_ELFObjectReader(pELFObjectReader) { } GNUArchiveReader::~GNUArchiveReader() { } /// isMyFormat bool GNUArchiveReader::isMyFormat(Input& pInput, bool& pContinue) const { assert(pInput.hasMemArea()); if (pInput.memArea()->size() < Archive::MAGIC_LEN) return false; llvm::StringRef region = pInput.memArea()->request(pInput.fileOffset(), Archive::MAGIC_LEN); const char* str = region.begin(); bool result = false; assert(str != NULL); pContinue = true; if (isArchive(str) || isThinArchive(str)) result = true; return result; } /// isArchive bool GNUArchiveReader::isArchive(const char* pStr) const { return (memcmp(pStr, Archive::MAGIC, Archive::MAGIC_LEN) == 0); } /// isThinArchive bool GNUArchiveReader::isThinArchive(const char* pStr) const { return (memcmp(pStr, Archive::THIN_MAGIC, Archive::MAGIC_LEN) == 0); } /// isThinArchive bool GNUArchiveReader::isThinArchive(Input& pInput) const { assert(pInput.hasMemArea()); llvm::StringRef region = pInput.memArea()->request(pInput.fileOffset(), Archive::MAGIC_LEN); const char* str = region.begin(); bool result = false; assert(str != NULL); if (isThinArchive(str)) result = true; return result; } bool GNUArchiveReader::readArchive(const LinkerConfig& pConfig, Archive& pArchive) { // bypass the empty archive if (Archive::MAGIC_LEN == pArchive.getARFile().memArea()->size()) return true; if (pArchive.getARFile().attribute()->isWholeArchive()) return includeAllMembers(pConfig, pArchive); // if this is the first time read this archive, setup symtab and strtab if (pArchive.getSymbolTable().empty()) { // read the symtab of the archive readSymbolTable(pArchive); // read the strtab of the archive readStringTable(pArchive); // add root archive to ArchiveMemberMap pArchive.addArchiveMember(pArchive.getARFile().name(), pArchive.inputs().root(), &InputTree::Downward); } // include the needed members in the archive and build up the input tree bool willSymResolved; do { willSymResolved = false; for (size_t idx = 0; idx < pArchive.numOfSymbols(); ++idx) { // bypass if we already decided to include this symbol or not if (Archive::Symbol::Unknown != pArchive.getSymbolStatus(idx)) continue; // bypass if another symbol with the same object file offset is included if (pArchive.hasObjectMember(pArchive.getObjFileOffset(idx))) { pArchive.setSymbolStatus(idx, Archive::Symbol::Include); continue; } // check if we should include this defined symbol Archive::Symbol::Status status = shouldIncludeSymbol(pArchive.getSymbolName(idx)); if (Archive::Symbol::Unknown != status) pArchive.setSymbolStatus(idx, status); if (Archive::Symbol::Include == status) { // include the object member from the given offset includeMember(pConfig, pArchive, pArchive.getObjFileOffset(idx)); willSymResolved = true; } // end of if } // end of for } while (willSymResolved); return true; } /// readMemberHeader - read the header of a member in a archive file and then /// return the corresponding archive member (it may be an input object or /// another archive) /// @param pArchiveRoot - the archive root that holds the strtab (extended /// name table) /// @param pArchiveFile - the archive that contains the needed object /// @param pFileOffset - file offset of the member header in the archive /// @param pNestedOffset - used when we find a nested archive /// @param pMemberSize - the file size of this member Input* GNUArchiveReader::readMemberHeader(Archive& pArchiveRoot, Input& pArchiveFile, uint32_t pFileOffset, uint32_t& pNestedOffset, size_t& pMemberSize) { assert(pArchiveFile.hasMemArea()); llvm::StringRef header_region = pArchiveFile.memArea()->request( (pArchiveFile.fileOffset() + pFileOffset), sizeof(Archive::MemberHeader)); const Archive::MemberHeader* header = reinterpret_cast<const Archive::MemberHeader*>(header_region.begin()); assert(memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)) == 0); pMemberSize = atoi(header->size); // parse the member name and nested offset if any std::string member_name; llvm::StringRef name_field(header->name, sizeof(header->name)); if (header->name[0] != '/') { // this is an object file in an archive size_t pos = name_field.find_first_of('/'); member_name.assign(name_field.substr(0, pos).str()); } else { // this is an object/archive file in a thin archive size_t begin = 1; size_t end = name_field.find_first_of(" :"); uint32_t name_offset = 0; // parse the name offset name_field.substr(begin, end - begin).getAsInteger(10, name_offset); if (name_field[end] == ':') { // there is a nested offset begin = end + 1; end = name_field.find_first_of(' ', begin); name_field.substr(begin, end - begin).getAsInteger(10, pNestedOffset); } // get the member name from the extended name table assert(pArchiveRoot.hasStrTable()); begin = name_offset; end = pArchiveRoot.getStrTable().find_first_of('\n', begin); member_name.assign( pArchiveRoot.getStrTable().substr(begin, end - begin - 1)); } Input* member = NULL; bool isThinAR = isThinArchive(pArchiveFile); if (!isThinAR) { // this is an object file in an archive member = pArchiveRoot.getMemberFile( pArchiveFile, isThinAR, member_name, pArchiveFile.path(), (pFileOffset + sizeof(Archive::MemberHeader))); } else { // this is a member in a thin archive // try to find if this is a archive already in the map first Archive::ArchiveMember* ar_member = pArchiveRoot.getArchiveMember(member_name); if (ar_member != NULL) { return ar_member->file; } // get nested file path, the nested file's member name is the relative // path to the archive containing it. sys::fs::Path input_path(pArchiveFile.path().parent_path()); if (!input_path.empty()) input_path.append(sys::fs::Path(member_name)); else input_path.assign(member_name); member = pArchiveRoot.getMemberFile( pArchiveFile, isThinAR, member_name, input_path); } return member; } template <size_t SIZE> static void readSymbolTableEntries(Archive& pArchive, llvm::StringRef pMemRegion) { typedef typename SizeTraits<SIZE>::Offset Offset; const Offset* data = reinterpret_cast<const Offset*>(pMemRegion.begin()); // read the number of symbols Offset number = 0; if (llvm::sys::IsLittleEndianHost) number = mcld::bswap<SIZE>(*data); else number = *data; // set up the pointers for file offset and name offset ++data; const char* name = reinterpret_cast<const char*>(data + number); // add the archive symbols for (Offset i = 0; i < number; ++i) { if (llvm::sys::IsLittleEndianHost) pArchive.addSymbol(name, mcld::bswap<SIZE>(*data)); else pArchive.addSymbol(name, *data); name += strlen(name) + 1; ++data; } } /// readSymbolTable - read the archive symbol map (armap) bool GNUArchiveReader::readSymbolTable(Archive& pArchive) { assert(pArchive.getARFile().hasMemArea()); MemoryArea* memory_area = pArchive.getARFile().memArea(); llvm::StringRef header_region = memory_area->request( (pArchive.getARFile().fileOffset() + Archive::MAGIC_LEN), sizeof(Archive::MemberHeader)); const Archive::MemberHeader* header = reinterpret_cast<const Archive::MemberHeader*>(header_region.begin()); assert(memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)) == 0); int symtab_size = atoi(header->size); pArchive.setSymTabSize(symtab_size); if (!pArchive.getARFile().attribute()->isWholeArchive()) { llvm::StringRef symtab_region = memory_area->request( (pArchive.getARFile().fileOffset() + Archive::MAGIC_LEN + sizeof(Archive::MemberHeader)), symtab_size); if (strncmp(header->name, Archive::SVR4_SYMTAB_NAME, strlen(Archive::SVR4_SYMTAB_NAME)) == 0) readSymbolTableEntries<32>(pArchive, symtab_region); else if (strncmp(header->name, Archive::IRIX6_SYMTAB_NAME, strlen(Archive::IRIX6_SYMTAB_NAME)) == 0) readSymbolTableEntries<64>(pArchive, symtab_region); else unreachable(diag::err_unsupported_archive); } return true; } /// readStringTable - read the strtab for long file name of the archive bool GNUArchiveReader::readStringTable(Archive& pArchive) { size_t offset = Archive::MAGIC_LEN + sizeof(Archive::MemberHeader) + pArchive.getSymTabSize(); if ((offset & 1) != 0x0) ++offset; assert(pArchive.getARFile().hasMemArea()); MemoryArea* memory_area = pArchive.getARFile().memArea(); llvm::StringRef header_region = memory_area->request((pArchive.getARFile().fileOffset() + offset), sizeof(Archive::MemberHeader)); const Archive::MemberHeader* header = reinterpret_cast<const Archive::MemberHeader*>(header_region.begin()); assert(memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)) == 0); if (memcmp(header->name, Archive::STRTAB_NAME, sizeof(header->name)) == 0) { // read the extended name table int strtab_size = atoi(header->size); llvm::StringRef strtab_region = memory_area->request((pArchive.getARFile().fileOffset() + offset + sizeof(Archive::MemberHeader)), strtab_size); const char* strtab = strtab_region.begin(); pArchive.getStrTable().assign(strtab, strtab_size); } return true; } /// shouldIncludeStatus - given a sym name from armap and check if including /// the corresponding archive member, and then return the decision enum Archive::Symbol::Status GNUArchiveReader::shouldIncludeSymbol( const llvm::StringRef& pSymName) const { // TODO: handle symbol version issue and user defined symbols const ResolveInfo* info = m_Module.getNamePool().findInfo(pSymName); if (info != NULL) { if (!info->isUndef()) return Archive::Symbol::Exclude; if (info->isWeak()) return Archive::Symbol::Unknown; return Archive::Symbol::Include; } return Archive::Symbol::Unknown; } /// includeMember - include the object member in the given file offset, and /// return the size of the object /// @param pConfig - LinkerConfig /// @param pArchiveRoot - the archive root /// @param pFileOffset - file offset of the member header in the archive size_t GNUArchiveReader::includeMember(const LinkerConfig& pConfig, Archive& pArchive, uint32_t pFileOffset) { Input* cur_archive = &(pArchive.getARFile()); Input* member = NULL; uint32_t file_offset = pFileOffset; size_t size = 0; do { uint32_t nested_offset = 0; // use the file offset in current archive to find out the member we // want to include member = readMemberHeader( pArchive, *cur_archive, file_offset, nested_offset, size); assert(member != NULL); // bypass if we get an archive that is already in the map if (Input::Archive == member->type()) { cur_archive = member; file_offset = nested_offset; continue; } // insert a node into the subtree of current archive. Archive::ArchiveMember* parent = pArchive.getArchiveMember(cur_archive->name()); assert(parent != NULL); pArchive.inputs().insert(parent->lastPos, *(parent->move), *member); // move the iterator to new created node, and also adjust the // direction to Afterward for next insertion in this subtree parent->move->move(parent->lastPos); parent->move = &InputTree::Afterward; bool doContinue = false; if (m_ELFObjectReader.isMyFormat(*member, doContinue)) { member->setType(Input::Object); // Set this object as no export if the archive is in the exclude libs. if (pArchive.getARFile().noExport()) { member->setNoExport(); } pArchive.addObjectMember(pFileOffset, parent->lastPos); m_ELFObjectReader.readHeader(*member); m_ELFObjectReader.readSections(*member); m_ELFObjectReader.readSymbols(*member); m_Module.getObjectList().push_back(member); } else if (doContinue && isMyFormat(*member, doContinue)) { member->setType(Input::Archive); // when adding a new archive node, set the iterator to archive // itself, and set the direction to Downward pArchive.addArchiveMember( member->name(), parent->lastPos, &InputTree::Downward); cur_archive = member; file_offset = nested_offset; } else { warning(diag::warn_unrecognized_input_file) << member->path() << pConfig.targets().triple().str(); } } while (Input::Object != member->type()); return size; } /// includeAllMembers - include all object members. This is called if /// --whole-archive is the attribute for this archive file. bool GNUArchiveReader::includeAllMembers(const LinkerConfig& pConfig, Archive& pArchive) { // read the symtab of the archive readSymbolTable(pArchive); // read the strtab of the archive readStringTable(pArchive); // add root archive to ArchiveMemberMap pArchive.addArchiveMember(pArchive.getARFile().name(), pArchive.inputs().root(), &InputTree::Downward); bool isThinAR = isThinArchive(pArchive.getARFile()); uint32_t begin_offset = pArchive.getARFile().fileOffset() + Archive::MAGIC_LEN + sizeof(Archive::MemberHeader) + pArchive.getSymTabSize(); if (pArchive.hasStrTable()) { if ((begin_offset & 1) != 0x0) ++begin_offset; begin_offset += sizeof(Archive::MemberHeader) + pArchive.getStrTable().size(); } uint32_t end_offset = pArchive.getARFile().memArea()->size(); for (uint32_t offset = begin_offset; offset < end_offset; offset += sizeof(Archive::MemberHeader)) { size_t size = includeMember(pConfig, pArchive, offset); if (!isThinAR) { offset += size; } if ((offset & 1) != 0x0) ++offset; } return true; } } // namespace mcld