#! /usr/bin/python

import os
import sys
import string
import re

## hash from symbol name to list of symbols with that name,
## where the list of symbols contains a list representing each symbol
symbols = {}
roots = {}

def createBacklinks(name, syms):
    for s in syms:
        refs = s[2]
        for r in refs:
            ## for each ref, add ourselves as a referencer
            if symbols.has_key(r):
                targets = symbols[r]
                for t in targets:
                    if name not in t[5]:

def markSymbol(frm, name):
    if not symbols.has_key(name):
        print "%s referenced but was not in the objdump"
    syms = symbols[name]
    ## print ambiguous references unless they are internal noise like ".L129"
    if len(syms) > 1 and name[0] != '.':
        print "Reference to symbol '%s' from '%s' is ambiguous, marking all '%s'" % (name, frm, name)
        print syms
    for s in syms:
        if s[4]:
            pass ## already marked
            s[4] = 1
            refs = s[2]
            for r in refs:
                markSymbol(s[0], r)

def cmpFilename(a, b):
    v = cmp(a[1], b[1])
    if v == 0:
        v = cmp(a[0], b[0])
    return v

def sizeAsString(bytes):
    if bytes < 1024:
        return "%d bytes" % bytes
    elif bytes < 1024*1024:
        return "%.2gK" % (bytes / 1024.0)
        return "%.2gM" % (bytes / 1024.0 / 1024.0)

def printLost():
    list = []
    filename = None
    for (name, syms) in symbols.items():
        s = syms[0] ## we always mark all or none for now
        if not s[4] and name[0] != '.': ## skip .L129 type symbols
            filename = s[3]
            if not filename:
                filename = "unknown file"
            list.append ((name, filename, s[5], s[7]))

    file_summaries = []
    total_unused = 0
    total_this_file = 0
    filename = None
    for l in list:
        next_filename = l[1]
        if next_filename != filename:
            if total_this_file > 0:
                file_summaries.append ("  %s may be unused in %s" % (sizeAsString(total_this_file), filename))
            print "%s has these symbols not reachable from exported symbols:" % next_filename
            filename = next_filename
            total_this_file = 0
        print "    %s %s" % (l[0], sizeAsString(l[3]))
        total_unused = total_unused + l[3]
        total_this_file = total_this_file + l[3]
        for trace in l[2]:
            print "       referenced from %s" % trace

    for fs in file_summaries:
        print fs
    print "%s total may be unused" % sizeAsString(total_unused)

def main():

    ## 0001aa44 <_dbus_message_get_network_data>:
    sym_re = re.compile ('([0-9a-f]+) <([^>]+)>:')
    ## 1aa49:       e8 00 00 00 00          call   1aa4e <_dbus_message_get_network_data+0xa>
    ref_re = re.compile (' <([^>]+)> *$')
    ## /home/hp/dbus-cvs/dbus/dbus/dbus-message.c:139
    file_re = re.compile ('^(\/[^:].*):[0-9]+$')
    ## _dbus_message_get_network_data+0xa
    funcname_re = re.compile ('([^+]+)\+[0-9a-fx]+')
    ## 00005410 T dbus_address_entries_free
    dynsym_re = re.compile ('T ([^ \n]+)$')
    filename = sys.argv[1]

    command = """
    objdump -D --demangle -l %s
    """ % filename

    command = string.strip (command)

    print "Running: %s" % command
    f = os.popen(command)    

    ## first we find which functions reference which other functions
    current_sym = None
    lines = f.readlines()
    for l in lines:
        addr = None
        name = None
        target = None
        file = None
        match = sym_re.match(l)
        if match:
            addr = match.group(1)
            name = match.group(2)
            match = ref_re.search(l)
            if match:
                target = match.group(1)
                match = file_re.match(l)
                if match:
                    file = match.group(1)

        if name:
            ## 0 symname, 1 address, 2 references, 3 filename, 4 reached, 5 referenced-by 6 backlinked 7 approx size
            item = [name, addr, [], None, 0, [], 0, 0]
            if symbols.has_key(name):
                symbols[name] = [item]

            if current_sym:
                prev_addr = long(current_sym[1], 16)
                our_addr = long(item[1], 16)
                item[7] = our_addr - prev_addr
                if item[7] < 0:
                    print "Computed negative size %d for %s" % (item[7], item[0])
                    item[7] = 0
            current_sym = item
        elif target and current_sym:
            match = funcname_re.match(target)
            if match:
                ## dump the "+address"
                target = match.group(1)
            if target == current_sym[0]:
                pass ## skip self-references
                current_sym[2].append (target)

        elif file and current_sym:
            if file.startswith('/usr/include'):
                ## inlined libc thingy
            elif current_sym[0].startswith('.debug'):
                ## debug info
            elif current_sym[3] and current_sym[3] != file:
                raise Exception ("%s in both %s and %s" % (current_sym[0], current_sym[3], file))
                current_sym[3] = file

    ## now we need to find the roots (exported symbols)
    command = "nm -D %s" % filename
    print "Running: %s" % command
    f = os.popen(command)
    lines = f.readlines ()
    for l in lines:
        match = dynsym_re.search(l)
        if match:
            name = match.group(1)
            if roots.has_key(name):
                raise Exception("symbol %s exported twice?" % name)
                roots[name] = 1

    print "%d symbols exported from this object" % len(roots)

    ## these functions are used only indirectly, so we don't
    ## notice they are used. Manually add them as roots...
    vtable_roots = ['unix_finalize',

    for vr in vtable_roots:
        if roots.has_key(vr):
            raise Exception("%s is already a root" % vr)
        roots[vr] = 1

    for k in roots.keys():
        markSymbol("root", k)

    for (k, v) in symbols.items():
        createBacklinks(k, v)

    print """

The symbols mentioned below don't appear to be reachable starting from
the dynamic exports of the library. However, this program is pretty
dumb; a limitation that creates false positives is that it can only
trace 'reachable' through hardcoded function calls, if a function is
called only through a vtable, it won't be marked reachable (and
neither will its children in the call graph).

Also, the sizes mentioned are more or less completely bogus.

    print "The following are hardcoded in as vtable roots: %s" % vtable_roots
if __name__ == "__main__":