#!/usr/bin/env python # Copyright (c) 2012 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. """Dump functions called by static intializers in a Linux Release binary. Usage example: tools/linux/dump-static-intializers.py out/Release/chrome A brief overview of static initialization: 1) the compiler writes out, per object file, a function that contains the static intializers for that file. 2) the compiler also writes out a pointer to that function in a special section. 3) at link time, the linker concatenates the function pointer sections into a single list of all initializers. 4) at run time, on startup the binary runs all function pointers. The functions in (1) all have mangled names of the form _GLOBAL__I_foobar.cc using objdump, we can disassemble those functions and dump all symbols that they reference. """ import optparse import re import subprocess import sys # A map of symbol => informative text about it. NOTES = { '__cxa_atexit@plt': 'registers a dtor to run at exit', 'std::__ioinit': '#includes <iostream>, use <ostream> instead', } # Determine whether this is a git checkout (as opposed to e.g. svn). IS_GIT_WORKSPACE = (subprocess.Popen( ['git', 'rev-parse'], stderr=subprocess.PIPE).wait() == 0) class Demangler(object): """A wrapper around c++filt to provide a function to demangle symbols.""" def __init__(self): self.cppfilt = subprocess.Popen(['c++filt'], stdin=subprocess.PIPE, stdout=subprocess.PIPE) def Demangle(self, sym): """Given mangled symbol |sym|, return its demangled form.""" self.cppfilt.stdin.write(sym + '\n') return self.cppfilt.stdout.readline().strip() # Matches for example: "cert_logger.pb.cc", capturing "cert_logger". protobuf_filename_re = re.compile(r'(.*)\.pb\.cc$') def QualifyFilenameAsProto(filename): """Attempt to qualify a bare |filename| with a src-relative path, assuming it is a protoc-generated file. If a single match is found, it is returned. Otherwise the original filename is returned.""" if not IS_GIT_WORKSPACE: return filename match = protobuf_filename_re.match(filename) if not match: return filename basename = match.groups(0) gitlsfiles = subprocess.Popen( ['git', 'ls-files', '--', '*/%s.proto' % basename], stdout=subprocess.PIPE) candidate = filename for line in gitlsfiles.stdout: if candidate != filename: return filename # Multiple hits, can't help. candidate = line.strip() return candidate # Regex matching the substring of a symbol's demangled text representation most # likely to appear in a source file. # Example: "v8::internal::Builtins::InitBuiltinFunctionTable()" becomes # "InitBuiltinFunctionTable", since the first (optional & non-capturing) group # picks up any ::-qualification and the last fragment picks up a suffix that # starts with an opener. symbol_code_name_re = re.compile(r'^(?:[^(<[]*::)?([^:(<[]*).*?$') def QualifyFilename(filename, symbol): """Given a bare filename and a symbol that occurs in it, attempt to qualify it with a src-relative path. If more than one file matches, return the original filename.""" if not IS_GIT_WORKSPACE: return filename match = symbol_code_name_re.match(symbol) if not match: return filename symbol = match.group(1) gitgrep = subprocess.Popen( ['git', 'grep', '-l', symbol, '--', '*/%s' % filename], stdout=subprocess.PIPE) candidate = filename for line in gitgrep.stdout: if candidate != filename: # More than one candidate; return bare filename. return filename candidate = line.strip() return candidate # Regex matching nm output for the symbols we're interested in. # See test_ParseNmLine for examples. nm_re = re.compile(r'(\S+) (\S+) t (?:_ZN12)?_GLOBAL__(?:sub_)?I_(.*)') def ParseNmLine(line): """Given a line of nm output, parse static initializers as a (file, start, size) tuple.""" match = nm_re.match(line) if match: addr, size, filename = match.groups() return (filename, int(addr, 16), int(size, 16)) def test_ParseNmLine(): """Verify the nm_re regex matches some sample lines.""" parse = ParseNmLine( '0000000001919920 0000000000000008 t ' '_ZN12_GLOBAL__I_safe_browsing_service.cc') assert parse == ('safe_browsing_service.cc', 26319136, 8), parse parse = ParseNmLine( '00000000026b9eb0 0000000000000024 t ' '_GLOBAL__sub_I_extension_specifics.pb.cc') assert parse == ('extension_specifics.pb.cc', 40607408, 36), parse # Just always run the test; it is fast enough. test_ParseNmLine() def ParseNm(binary): """Given a binary, yield static initializers as (file, start, size) tuples.""" nm = subprocess.Popen(['nm', '-S', binary], stdout=subprocess.PIPE) for line in nm.stdout: parse = ParseNmLine(line) if parse: yield parse # Regex matching objdump output for the symbols we're interested in. # Example line: # 12354ab: (disassembly, including <FunctionReference>) disassembly_re = re.compile(r'^\s+[0-9a-f]+:.*<(\S+)>') def ExtractSymbolReferences(binary, start, end): """Given a span of addresses, returns symbol references from disassembly.""" cmd = ['objdump', binary, '--disassemble', '--start-address=0x%x' % start, '--stop-address=0x%x' % end] objdump = subprocess.Popen(cmd, stdout=subprocess.PIPE) refs = set() for line in objdump.stdout: if '__static_initialization_and_destruction' in line: raise RuntimeError, ('code mentions ' '__static_initialization_and_destruction; ' 'did you accidentally run this on a Debug binary?') match = disassembly_re.search(line) if match: (ref,) = match.groups() if ref.startswith('.LC') or ref.startswith('_DYNAMIC'): # Ignore these, they are uninformative. continue if ref.startswith('_GLOBAL__I_'): # Probably a relative jump within this function. continue refs.add(ref) return sorted(refs) def main(): parser = optparse.OptionParser(usage='%prog [option] filename') parser.add_option('-d', '--diffable', dest='diffable', action='store_true', default=False, help='Prints the filename on each line, for more easily ' 'diff-able output. (Used by sizes.py)') opts, args = parser.parse_args() if len(args) != 1: parser.error('missing filename argument') return 1 binary = args[0] demangler = Demangler() file_count = 0 initializer_count = 0 files = ParseNm(binary) if opts.diffable: files = sorted(files) for filename, addr, size in files: file_count += 1 ref_output = [] qualified_filename = QualifyFilenameAsProto(filename) if size == 2: # gcc generates a two-byte 'repz retq' initializer when there is a # ctor even when the ctor is empty. This is fixed in gcc 4.6, but # Android uses gcc 4.4. ref_output.append('[empty ctor, but it still has cost on gcc <4.6]') else: for ref in ExtractSymbolReferences(binary, addr, addr+size): initializer_count += 1 ref = demangler.Demangle(ref) if qualified_filename == filename: qualified_filename = QualifyFilename(filename, ref) note = '' if ref in NOTES: note = NOTES[ref] elif ref.endswith('_2eproto()'): note = 'protocol compiler bug: crbug.com/105626' if note: ref_output.append('%s [%s]' % (ref, note)) else: ref_output.append(ref) if opts.diffable: print '\n'.join('# ' + qualified_filename + ' ' + r for r in ref_output) else: print '%s (initializer offset 0x%x size 0x%x)' % (qualified_filename, addr, size) print ''.join(' %s\n' % r for r in ref_output) if opts.diffable: print '#', print 'Found %d static initializers in %d files.' % (initializer_count, file_count) return 0 if '__main__' == __name__: sys.exit(main())