//===- GNUArchiveReader.cpp -----------------------------------------------===// // // The MCLinker Project // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #include <mcld/MC/MCLDInfo.h> #include <mcld/MC/MCLDInput.h> #include <mcld/MC/InputTree.h> #include <mcld/LD/GNUArchiveReader.h> #include <mcld/LD/ResolveInfo.h> #include <mcld/LD/ELFObjectReader.h> #include <mcld/Support/FileSystem.h> #include <mcld/Support/FileHandle.h> #include <mcld/Support/MemoryArea.h> #include <mcld/Support/MemoryRegion.h> #include <mcld/Support/MemoryAreaFactory.h> #include <mcld/Support/MsgHandling.h> #include <mcld/Support/Path.h> #include <mcld/ADT/SizeTraits.h> #include <llvm/ADT/StringRef.h> #include <llvm/Support/Host.h> #include <cstring> #include <cstdlib> using namespace mcld; GNUArchiveReader::GNUArchiveReader(MCLDInfo& pLDInfo, MemoryAreaFactory& pMemAreaFactory, ELFObjectReader& pELFObjectReader) : m_LDInfo(pLDInfo), m_MemAreaFactory(pMemAreaFactory), m_ELFObjectReader(pELFObjectReader) { } GNUArchiveReader::~GNUArchiveReader() { } /// isMyFormat bool GNUArchiveReader::isMyFormat(Input& pInput) const { assert(pInput.hasMemArea()); MemoryRegion* region = pInput.memArea()->request(pInput.fileOffset(), Archive::MAGIC_LEN); const char* str = reinterpret_cast<const char*>(region->getBuffer()); bool result = false; assert(NULL != str); if (isArchive(str) || isThinArchive(str)) result = true; pInput.memArea()->release(region); return result; } /// isArchive bool GNUArchiveReader::isArchive(const char* pStr) const { return (0 == memcmp(pStr, Archive::MAGIC, Archive::MAGIC_LEN)); } /// isThinArchive bool GNUArchiveReader::isThinArchive(const char* pStr) const { return (0 == memcmp(pStr, Archive::THIN_MAGIC, Archive::MAGIC_LEN)); } /// isThinArchive bool GNUArchiveReader::isThinArchive(Input& pInput) const { assert(pInput.hasMemArea()); MemoryRegion* region = pInput.memArea()->request(pInput.fileOffset(), Archive::MAGIC_LEN); const char* str = reinterpret_cast<const char*>(region->getBuffer()); bool result = false; assert(NULL != str); if (isThinArchive(str)) result = true; pInput.memArea()->release(region); return result; } bool GNUArchiveReader::readArchive(Archive& pArchive) { // read the symtab of the archive readSymbolTable(pArchive); // read the strtab of the archive readStringTable(pArchive); // add root archive to ArchiveMemberMap pArchive.addArchiveMember(pArchive.getARFile().name(), pArchive.inputs().root(), &InputTree::Downward); // include the needed members in the archive and build up the input tree bool willSymResolved; do { willSymResolved = false; for (size_t idx = 0; idx < pArchive.numOfSymbols(); ++idx) { // bypass if we already decided to include this symbol or not if (Archive::Symbol::Unknown != pArchive.getSymbolStatus(idx)) continue; // bypass if another symbol with the same object file offset is included if (pArchive.hasObjectMember(pArchive.getObjFileOffset(idx))) { pArchive.setSymbolStatus(idx, Archive::Symbol::Include); continue; } // check if we should include this defined symbol Archive::Symbol::Status status = shouldIncludeSymbol(pArchive.getSymbolName(idx)); if (Archive::Symbol::Unknown != status) pArchive.setSymbolStatus(idx, status); if (Archive::Symbol::Include == status) { Input* cur_archive = &(pArchive.getARFile()); Input* member = cur_archive; uint32_t file_offset = pArchive.getObjFileOffset(idx); while ((member != NULL) && (Input::Object != member->type())) { uint32_t nested_offset = 0; // use the file offset in current archive to find out the member we // want to include member = readMemberHeader(pArchive, *cur_archive, file_offset, nested_offset); assert(member != NULL); // bypass if we get an archive that is already in the map if (Input::Archive == member->type()) { cur_archive = member; file_offset = nested_offset; continue; } // insert a node into the subtree of current archive. Archive::ArchiveMember* parent = pArchive.getArchiveMember(cur_archive->name()); assert(NULL != parent); pArchive.inputs().insert(parent->lastPos, *(parent->move), *member); // move the iterator to new created node, and also adjust the // direction to Afterward for next insertion in this subtree parent->move->move(parent->lastPos); parent->move = &InputTree::Afterward; if (m_ELFObjectReader.isMyFormat(*member)) { member->setType(Input::Object); pArchive.addObjectMember(pArchive.getObjFileOffset(idx), parent->lastPos); m_ELFObjectReader.readObject(*member); m_ELFObjectReader.readSections(*member); m_ELFObjectReader.readSymbols(*member); } else if (isMyFormat(*member)) { member->setType(Input::Archive); // when adding a new archive node, set the iterator to archive // itself, and set the direction to Downward pArchive.addArchiveMember(member->name(), parent->lastPos, &InputTree::Downward); cur_archive = member; file_offset = nested_offset; } } // end of while willSymResolved = true; } // end of if } // end of for } while (willSymResolved); return true; } /// readMemberHeader - read the header of a member in a archive file and then /// return the corresponding archive member (it may be an input object or /// another archive) /// @param pArchiveRoot - the archive root that holds the strtab (extended /// name table) /// @param pArchiveFile - the archive that contains the needed object /// @param pFileOffset - file offset of the member header in the archive /// @param pNestedOffset - used when we find a nested archive Input* GNUArchiveReader::readMemberHeader(Archive& pArchiveRoot, Input& pArchiveFile, uint32_t pFileOffset, uint32_t& pNestedOffset) { assert(pArchiveFile.hasMemArea()); MemoryRegion* header_region = pArchiveFile.memArea()->request((pArchiveFile.fileOffset() + pFileOffset), sizeof(Archive::MemberHeader)); const Archive::MemberHeader* header = reinterpret_cast<const Archive::MemberHeader*>(header_region->getBuffer()); assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, 2)); // int size = atoi(header->size); // parse the member name and nested offset if any std::string member_name; llvm::StringRef name_field(header->name, 16); if ('/' != header->name[0]) { // this is an object file in an archive size_t pos = name_field.find_first_of('/'); member_name.assign(name_field.substr(0, pos).str()); } else { // this is an object/archive file in a thin archive size_t begin = 1; size_t end = name_field.find_first_of(" :"); uint32_t name_offset = 0; // parse the name offset name_field.substr(begin, end - begin).getAsInteger(10, name_offset); if (':' == name_field[end]) { // there is a nested offset begin = end + 1; end = name_field.find_first_of(' ', begin); name_field.substr(begin, end - begin).getAsInteger(10, pNestedOffset); } // get the member name from the extended name table begin = name_offset; end = pArchiveRoot.getStrTable().find_first_of('\n', begin); member_name.assign(pArchiveRoot.getStrTable().substr(begin, end - begin -1)); } Input* member = NULL; if (!isThinArchive(pArchiveFile)) { // this is an object file in an archive member = m_LDInfo.inputFactory().produce(member_name, pArchiveFile.path(), Input::Unknown, (pFileOffset + sizeof(Archive::MemberHeader))); assert(member != NULL); member->setMemArea(pArchiveFile.memArea()); LDContext *input_context = m_LDInfo.contextFactory().produce(); member->setContext(input_context); } else { // this is a member in a thin archive // try to find if this is a archive already in the map first Archive::ArchiveMember* ar_member = pArchiveRoot.getArchiveMember(member_name); if (NULL != ar_member) { return ar_member->file; } // get nested file path, the nested file's member name is the relative // path to the archive containing it. sys::fs::Path input_path(pArchiveFile.path().parent_path()); if (!input_path.empty()) input_path.append(member_name); else input_path.assign(member_name); member = m_LDInfo.inputFactory().produce(member_name, input_path, Input::Unknown); assert(member != NULL); MemoryArea* input_memory = m_MemAreaFactory.produce(member->path(), FileHandle::ReadOnly); if (input_memory->handler()->isGood()) { member->setMemArea(input_memory); } else { error(diag::err_cannot_open_input) << member->name() << member->path(); return NULL; } LDContext *input_context = m_LDInfo.contextFactory().produce(input_path); member->setContext(input_context); } pArchiveFile.memArea()->release(header_region); return member; } /// readSymbolTable - read the archive symbol map (armap) bool GNUArchiveReader::readSymbolTable(Archive& pArchive) { assert(pArchive.getARFile().hasMemArea()); MemoryRegion* header_region = pArchive.getARFile().memArea()->request((pArchive.getARFile().fileOffset() + Archive::MAGIC_LEN), sizeof(Archive::MemberHeader)); const Archive::MemberHeader* header = reinterpret_cast<const Archive::MemberHeader*>(header_region->getBuffer()); assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, 2)); int symtab_size = atoi(header->size); pArchive.setSymTabSize(symtab_size); MemoryRegion* symtab_region = pArchive.getARFile().memArea()->request((pArchive.getARFile().fileOffset() + Archive::MAGIC_LEN + sizeof(Archive::MemberHeader)), symtab_size); const uint32_t* data = reinterpret_cast<const uint32_t*>(symtab_region->getBuffer()); // read the number of symbols uint32_t number = 0; if (llvm::sys::isLittleEndianHost()) number = bswap32(*data); else number = *data; // set up the pointers for file offset and name offset ++data; const char* name = reinterpret_cast<const char*>(data + number); // add the archive symbols for (uint32_t i = 0; i < number; ++i) { if (llvm::sys::isLittleEndianHost()) pArchive.addSymbol(name, bswap32(*data)); else pArchive.addSymbol(name, *data); name += strlen(name) + 1; ++data; } pArchive.getARFile().memArea()->release(header_region); pArchive.getARFile().memArea()->release(symtab_region); return true; } /// readStringTable - read the strtab for long file name of the archive bool GNUArchiveReader::readStringTable(Archive& pArchive) { size_t offset = Archive::MAGIC_LEN + sizeof(Archive::MemberHeader) + pArchive.getSymTabSize(); if (0x0 != (offset & 1)) ++offset; assert(pArchive.getARFile().hasMemArea()); MemoryRegion* header_region = pArchive.getARFile().memArea()->request((pArchive.getARFile().fileOffset() + offset), sizeof(Archive::MemberHeader)); const Archive::MemberHeader* header = reinterpret_cast<const Archive::MemberHeader*>(header_region->getBuffer()); assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, 2)); int strtab_size = atoi(header->size); MemoryRegion* strtab_region = pArchive.getARFile().memArea()->request((pArchive.getARFile().fileOffset() + offset + sizeof(Archive::MemberHeader)), strtab_size); const char* strtab = reinterpret_cast<const char*>(strtab_region->getBuffer()); pArchive.getStrTable().assign(strtab, strtab_size); pArchive.getARFile().memArea()->release(header_region); pArchive.getARFile().memArea()->release(strtab_region); return true; } /// shouldIncludeStatus - given a sym name from armap and check if including /// the corresponding archive member, and then return the decision enum Archive::Symbol::Status GNUArchiveReader::shouldIncludeSymbol(const llvm::StringRef& pSymName) const { // TODO: handle symbol version issue and user defined symbols ResolveInfo* info = m_LDInfo.getNamePool().findInfo(pSymName); if (NULL != info) { if (!info->isUndef()) return Archive::Symbol::Exclude; if (info->isWeak()) return Archive::Symbol::Unknown; return Archive::Symbol::Include; } return Archive::Symbol::Unknown; }