// Copyright (C) 2016 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <header_abi_util.h>
#include <ir_representation.h>
#include <llvm/Support/CommandLine.h>
#include <llvm/Support/raw_ostream.h>
#include <memory>
#include <mutex>
#include <fstream>
#include <iostream>
#include <string>
#include <thread>
#include <vector>
#include <stdlib.h>
static constexpr std::size_t kSourcesPerBatchThread = 7;
static llvm::cl::OptionCategory header_linker_category(
"header-abi-linker options");
static llvm::cl::list<std::string> dump_files(
llvm::cl::Positional, llvm::cl::desc("<dump-files>"), llvm::cl::Required,
llvm::cl::cat(header_linker_category), llvm::cl::OneOrMore);
static llvm::cl::opt<std::string> linked_dump(
"o", llvm::cl::desc("<linked dump>"), llvm::cl::Required,
llvm::cl::cat(header_linker_category));
static llvm::cl::list<std::string> exported_header_dirs(
"I", llvm::cl::desc("<export_include_dirs>"), llvm::cl::Prefix,
llvm::cl::ZeroOrMore, llvm::cl::cat(header_linker_category));
static llvm::cl::opt<std::string> version_script(
"v", llvm::cl::desc("<version_script>"), llvm::cl::Optional,
llvm::cl::cat(header_linker_category));
static llvm::cl::opt<std::string> api(
"api", llvm::cl::desc("<api>"), llvm::cl::Optional,
llvm::cl::cat(header_linker_category));
static llvm::cl::opt<std::string> arch(
"arch", llvm::cl::desc("<arch>"), llvm::cl::Optional,
llvm::cl::cat(header_linker_category));
static llvm::cl::opt<bool> no_filter(
"no-filter", llvm::cl::desc("Do not filter any abi"), llvm::cl::Optional,
llvm::cl::cat(header_linker_category));
static llvm::cl::opt<std::string> so_file(
"so", llvm::cl::desc("<path to so file>"), llvm::cl::Optional,
llvm::cl::cat(header_linker_category));
static llvm::cl::opt<abi_util::TextFormatIR> text_format(
"text-format", llvm::cl::desc("Specify text format of abi dumps"),
llvm::cl::values(clEnumValN(abi_util::TextFormatIR::ProtobufTextFormat,
"ProtobufTextFormat", "ProtobufTextFormat"),
clEnumValEnd),
llvm::cl::init(abi_util::TextFormatIR::ProtobufTextFormat),
llvm::cl::cat(header_linker_category));
class HeaderAbiLinker {
public:
HeaderAbiLinker(
const std::vector<std::string> &dump_files,
const std::vector<std::string> &exported_header_dirs,
const std::string &version_script,
const std::string &so_file,
const std::string &linked_dump,
const std::string &arch,
const std::string &api)
: dump_files_(dump_files), exported_header_dirs_(exported_header_dirs),
version_script_(version_script), so_file_(so_file),
out_dump_name_(linked_dump), arch_(arch), api_(api) {};
bool LinkAndDump();
private:
template <typename T>
bool LinkDecl(abi_util::IRDumper *dst,
std::set<std::string> *link_set,
std::set<std::string> *regex_matched_link_set,
const std::regex *vs_regex,
const abi_util::AbiElementMap<T> &src,
bool use_version_script);
bool ParseVersionScriptFiles();
bool ParseSoFile();
bool LinkTypes(const abi_util::TextFormatToIRReader *ir_reader,
abi_util::IRDumper *ir_dumper);
bool LinkFunctions(const abi_util::TextFormatToIRReader *ir_reader,
abi_util::IRDumper *ir_dumper);
bool LinkGlobalVars(const abi_util::TextFormatToIRReader *ir_reader,
abi_util::IRDumper *ir_dumper);
bool AddElfSymbols(abi_util::IRDumper *ir_dumper);
private:
const std::vector<std::string> &dump_files_;
const std::vector<std::string> &exported_header_dirs_;
const std::string &version_script_;
const std::string &so_file_;
const std::string &out_dump_name_;
const std::string &arch_;
const std::string &api_;
// TODO: Add to a map of std::sets instead.
std::set<std::string> exported_headers_;
std::set<std::string> types_set_;
std::set<std::string> function_decl_set_;
std::set<std::string> globvar_decl_set_;
// Version Script Regex Matching.
std::set<std::string> functions_regex_matched_set;
std::regex functions_vs_regex_;
// Version Script Regex Matching.
std::set<std::string> globvars_regex_matched_set;
std::regex globvars_vs_regex_;
};
template <typename T, typename Iterable>
static bool AddElfSymbols(abi_util::IRDumper *dst, const Iterable &symbols) {
for (auto &&symbol : symbols) {
T elf_symbol(symbol);
if (!dst->AddElfSymbolMessageIR(&elf_symbol)) {
return false;
}
}
return true;
}
// To be called right after parsing the .so file / version script.
bool HeaderAbiLinker::AddElfSymbols(abi_util::IRDumper *ir_dumper) {
return ::AddElfSymbols<abi_util::ElfFunctionIR>(ir_dumper,
function_decl_set_) &&
::AddElfSymbols<abi_util::ElfObjectIR>(ir_dumper,
globvar_decl_set_);
}
static void DeDuplicateAbiElementsThread(
const std::vector<std::string> &dump_files,
const std::set<std::string> *exported_headers,
abi_util::TextFormatToIRReader *greader, std::mutex *greader_lock,
std::atomic<std::size_t> *cnt) {
std::unique_ptr<abi_util::TextFormatToIRReader> local_reader =
abi_util::TextFormatToIRReader::CreateTextFormatToIRReader(
text_format, exported_headers);
auto begin_it = dump_files.begin();
std::size_t num_sources = dump_files.size();
while (1) {
std::size_t i = cnt->fetch_add(kSourcesPerBatchThread);
if (i >= num_sources) {
break;
}
std::size_t end = std::min(i + kSourcesPerBatchThread, num_sources);
for (auto it = begin_it; it != begin_it + end; it++) {
std::unique_ptr<abi_util::TextFormatToIRReader> reader =
abi_util::TextFormatToIRReader::CreateTextFormatToIRReader(
text_format, exported_headers);
assert(reader != nullptr);
if (!reader->ReadDump(*it)) {
llvm::errs() << "ReadDump failed\n";
::exit(1);
}
// This merge is needed since the iterators might not be contigous.
local_reader->MergeGraphs(*reader);
}
}
std::lock_guard<std::mutex> lock(*greader_lock);
greader->MergeGraphs(*local_reader);
}
bool HeaderAbiLinker::LinkAndDump() {
// If the user specifies that a version script should be used, use that.
if (!so_file_.empty()) {
exported_headers_ =
abi_util::CollectAllExportedHeaders(exported_header_dirs_);
if (!ParseSoFile()) {
llvm::errs() << "Couldn't parse so file\n";
return false;
}
} else if (!ParseVersionScriptFiles()) {
llvm::errs() << "Failed to parse stub files for exported symbols\n";
return false;
}
std::unique_ptr<abi_util::IRDumper> ir_dumper =
abi_util::IRDumper::CreateIRDumper(text_format, out_dump_name_);
assert(ir_dumper != nullptr);
AddElfSymbols(ir_dumper.get());
// Create a reader, on which we never actually call ReadDump(), since multiple
// dump files are associated with it.
std::unique_ptr<abi_util::TextFormatToIRReader> greader =
abi_util::TextFormatToIRReader::CreateTextFormatToIRReader(
text_format, &exported_headers_);
std::size_t max_threads = std::thread::hardware_concurrency();
std::size_t num_threads = kSourcesPerBatchThread < dump_files_.size() ?
std::min(dump_files_.size() / kSourcesPerBatchThread,
max_threads) : 0;
std::vector<std::thread> threads;
std::atomic<std::size_t> cnt(0);
std::mutex greader_lock;
for (std::size_t i = 1; i < num_threads; i++) {
threads.emplace_back(DeDuplicateAbiElementsThread, dump_files_,
&exported_headers_, greader.get(), &greader_lock,
&cnt);
}
DeDuplicateAbiElementsThread(dump_files_, &exported_headers_, greader.get(),
&greader_lock, &cnt);
for (auto &thread : threads) {
thread.join();
}
if (!LinkTypes(greader.get(), ir_dumper.get()) ||
!LinkFunctions(greader.get(), ir_dumper.get()) ||
!LinkGlobalVars(greader.get(), ir_dumper.get())) {
llvm::errs() << "Failed to link elements\n";
return false;
}
if (!ir_dumper->Dump()) {
llvm::errs() << "Serialization to ostream failed\n";
return false;
}
return true;
}
static bool QueryRegexMatches(std::set<std::string> *regex_matched_link_set,
const std::regex *vs_regex,
const std::string &symbol) {
assert(regex_matched_link_set != nullptr);
assert(vs_regex != nullptr);
if (regex_matched_link_set->find(symbol) != regex_matched_link_set->end()) {
return false;
}
if (std::regex_search(symbol, *vs_regex)) {
regex_matched_link_set->insert(symbol);
return true;
}
return false;
}
static std::regex CreateRegexMatchExprFromSet(
const std::set<std::string> &link_set) {
std::string all_regex_match_str = "";
std::set<std::string>::iterator it = link_set.begin();
while (it != link_set.end()) {
std::string regex_match_str_find_glob =
abi_util::FindAndReplace(*it, "\\*", ".*");
all_regex_match_str += "(\\b" + regex_match_str_find_glob + "\\b)";
if (++it != link_set.end()) {
all_regex_match_str += "|";
}
}
if (all_regex_match_str == "") {
return std::regex();
}
return std::regex(all_regex_match_str);
}
template <typename T>
bool HeaderAbiLinker::LinkDecl(
abi_util::IRDumper *dst, std::set<std::string> *link_set,
std::set<std::string> *regex_matched_link_set, const std::regex *vs_regex,
const abi_util::AbiElementMap<T> &src, bool use_version_script_or_so) {
assert(dst != nullptr);
assert(link_set != nullptr);
for (auto &&element : src) {
// If we are not using a version script and exported headers are available,
// filter out unexported abi.
std::string source_file = element.second.GetSourceFile();
// Builtin types will not have source file information.
if (!exported_headers_.empty() && !source_file.empty() &&
exported_headers_.find(source_file) ==
exported_headers_.end()) {
continue;
}
const std::string &element_str = element.first;
// Check for the existence of the element in linked dump / symbol file.
if (use_version_script_or_so) {
std::set<std::string>::iterator it =
link_set->find(element_str);
if (it == link_set->end()) {
if (!QueryRegexMatches(regex_matched_link_set, vs_regex, element_str)) {
continue;
}
} else {
// We get a pre-filled link name set while using version script.
link_set->erase(*it); // Avoid multiple instances of the same symbol.
}
}
if (!dst->AddLinkableMessageIR(&(element.second))) {
llvm::errs() << "Failed to add element to linked dump\n";
return false;
}
}
return true;
}
bool HeaderAbiLinker::LinkTypes(const abi_util::TextFormatToIRReader *reader,
abi_util::IRDumper *ir_dumper) {
assert(reader != nullptr);
assert(ir_dumper != nullptr);
// Even if version scripts are available we take in types, since the symbols
// in the version script might reference a type exposed by the library.
return LinkDecl(ir_dumper, &types_set_, nullptr,
nullptr, reader->GetRecordTypes(), false) &&
LinkDecl(ir_dumper, &types_set_, nullptr,
nullptr, reader->GetEnumTypes(), false) &&
LinkDecl(ir_dumper, &types_set_, nullptr, nullptr,
reader->GetFunctionTypes(), false) &&
LinkDecl(ir_dumper, &types_set_, nullptr,
nullptr, reader->GetBuiltinTypes(), false) &&
LinkDecl(ir_dumper, &types_set_, nullptr,
nullptr, reader->GetPointerTypes(), false) &&
LinkDecl(ir_dumper, &types_set_, nullptr,
nullptr, reader->GetRvalueReferenceTypes(), false) &&
LinkDecl(ir_dumper, &types_set_, nullptr,
nullptr, reader->GetLvalueReferenceTypes(), false) &&
LinkDecl(ir_dumper, &types_set_, nullptr,
nullptr, reader->GetArrayTypes(), false) &&
LinkDecl(ir_dumper, &types_set_, nullptr,
nullptr, reader->GetQualifiedTypes(), false);
}
bool HeaderAbiLinker::LinkFunctions(
const abi_util::TextFormatToIRReader *reader,
abi_util::IRDumper *ir_dumper) {
assert(reader != nullptr);
return LinkDecl(ir_dumper, &function_decl_set_,
&functions_regex_matched_set, &functions_vs_regex_,
reader->GetFunctions(),
(!version_script_.empty() || !so_file_.empty()));
}
bool HeaderAbiLinker::LinkGlobalVars(
const abi_util::TextFormatToIRReader *reader,
abi_util::IRDumper *ir_dumper) {
assert(reader != nullptr);
return LinkDecl(ir_dumper, &globvar_decl_set_,
&globvars_regex_matched_set, &globvars_vs_regex_,
reader->GetGlobalVariables(),
(!version_script.empty() || !so_file_.empty()));
}
bool HeaderAbiLinker::ParseVersionScriptFiles() {
abi_util::VersionScriptParser version_script_parser(version_script_, arch_,
api_);
if (!version_script_parser.Parse()) {
llvm::errs() << "Failed to parse version script\n";
return false;
}
function_decl_set_ = version_script_parser.GetFunctions();
globvar_decl_set_ = version_script_parser.GetGlobVars();
std::set<std::string> function_regexs =
version_script_parser.GetFunctionRegexs();
std::set<std::string> globvar_regexs =
version_script_parser.GetGlobVarRegexs();
functions_vs_regex_ = CreateRegexMatchExprFromSet(function_regexs);
globvars_vs_regex_ = CreateRegexMatchExprFromSet(globvar_regexs);
return true;
}
bool HeaderAbiLinker::ParseSoFile() {
auto Binary = llvm::object::createBinary(so_file_);
if (!Binary) {
llvm::errs() << "Couldn't really create object File \n";
return false;
}
llvm::object::ObjectFile *objfile =
llvm::dyn_cast<llvm::object::ObjectFile>(&(*Binary.get().getBinary()));
if (!objfile) {
llvm::errs() << "Not an object file\n";
return false;
}
std::unique_ptr<abi_util::SoFileParser> so_parser =
abi_util::SoFileParser::Create(objfile);
if (so_parser == nullptr) {
llvm::errs() << "Couldn't create soFile Parser\n";
return false;
}
so_parser->GetSymbols();
function_decl_set_ = so_parser->GetFunctions();
globvar_decl_set_ = so_parser->GetGlobVars();
return true;
}
int main(int argc, const char **argv) {
llvm::cl::ParseCommandLineOptions(argc, argv, "header-linker");
if (so_file.empty() && version_script.empty()) {
llvm::errs() << "One of -so or -v needs to be specified\n";
return -1;
}
if (no_filter) {
static_cast<std::vector<std::string> &>(exported_header_dirs).clear();
}
HeaderAbiLinker Linker(dump_files, exported_header_dirs, version_script,
so_file, linked_dump, arch, api);
if (!Linker.LinkAndDump()) {
llvm::errs() << "Failed to link and dump elements\n";
return -1;
}
return 0;
}