普通文本  |  251行  |  8.9 KB

# Copyright 2017 syzkaller project authors. All rights reserved.
# Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.

'''
This module provides classes which implement AST traversal in order to extract
items belonging to a struct.
'''

import collections
import logging

from pycparser import c_ast
from header_preprocessor import HeaderFilePreprocessor


class StructWalkerException(Exception):
    pass


class StructWalker(c_ast.NodeVisitor):
    '''
    Given an ast obtained by parsing a header file, return a hierarchy
    dictionary. The ast is expected to be of type pycparser.c_ast.FileAST.

    Usage :

    >>> import tempfile
    >>> t = tempfile.NamedTemporaryFile()
    >>> contents = """
    ... #define STRUCT_SIZE 1337
    ... struct ARRAY_OF_POINTERS_CONTAINER {
    ... 	unsigned int *ptr[10];
    ... 	int **n;
    ... };
    ... struct ARRAY_CONTAINER {
    ... 	  int g[10];
    ... 		  int h[20][30];
    ... };
    ... struct REGULAR_STRUCT {
    ... 	int x;
    ... 	char *y;
    ... 	void *ptr;
    ... };
    ... struct STRUCT_WITH_STRUCT_PTR {
    ... 	struct REGULAR_STRUCT *struct_ptr;
    ... 	int z;
    ... };
    ... struct STRUCT_WITH_STRUCT_INST {
    ... 	struct REGULAR_STRUCT regular_struct_inst;
    ... 	int a;
    ... };
    ... struct STRUCT_WITH_STRUCT_ARRAY {
    ... 	struct REGULAR_STRUCT regular_struct_array[100];
    ... 	int b;
    ... };
    ... struct STRUCT_WITH_ANONYMOUS_STRUCT {
    ... 	struct {
    ... 		int g;
    ... 		int h;
    ... 		int i;
    ... 	} anonymous_struct;
    ... };
    ... struct STRUCT_WITH_ANONYMOUS_UNION {
    ... 	union {
    ... 		int t;
    ... 		char r[100];
    ... 	} anonymous_union;
    ... };
    ... struct STRUCT_WITH_STRUCT_ARRAY_SIZE_MACRO {
    ... 	struct REGULAR_STRUCT regular_struct_array[STRUCT_SIZE];
    ... };
    ... struct STRUCT_WITH_2D_ARRAY_INST {
    ... 	struct REGULAR_STRUCT regular_struct_array_2D[10][10];
    ... };
    ... struct NESTED_ANONYMOUS_STRUCT {
    ... 	struct {
    ... 		int x;
    ... 		struct {
    ... 			int y;
    ... 			int z;
    ... 		} level_2;
    ... 	} level_1;
    ... };
    ... """
	>>> t.write(contents) ; t.flush()
    >>> struct_walker = StructWalker(filenames=[t.name])
    >>> local_hierarchy = struct_walker.generate_local_hierarchy()
    >>> for k in local_hierarchy:
    ...     print k
    ...     print local_hierarchy[k]
    ARRAY_OF_POINTERS_CONTAINER
    [('unsigned int*[10]', 'ptr'), ('int**', 'n')]
    STRUCT_WITH_STRUCT_ARRAY_SIZE_MACRO
    [('struct REGULAR_STRUCT[1337]', 'regular_struct_array')]
    STRUCT_WITH_2D_ARRAY_INST
    [('struct REGULAR_STRUCT[10][10]', 'regular_struct_array_2D')]
    STRUCT_WITH_STRUCT_ARRAY
    [('struct REGULAR_STRUCT[100]', 'regular_struct_array'), ('int', 'b')]
    NESTED_ANONYMOUS_STRUCT
    [('int', 'level_1.x'), ('int', 'level_1.level_2.y'), ('int', 'level_1.level_2.z')]
    STRUCT_WITH_ANONYMOUS_STRUCT
    [('int', 'anonymous_struct.g'), ('int', 'anonymous_struct.h'), ('int', 'anonymous_struct.i')]
    STRUCT_WITH_ANONYMOUS_UNION
    [('int', 'anonymous_union.t'), ('char[100]', 'anonymous_union.r')]
    STRUCT_WITH_STRUCT_INST
    [('struct REGULAR_STRUCT', 'regular_struct_inst'), ('int', 'a')]
    ARRAY_CONTAINER
    [('int[10]', 'g'), ('int[20][30]', 'h')]
    REGULAR_STRUCT
    [('int', 'x'), ('char*', 'y'), ('void*', 'ptr')]
    STRUCT_WITH_STRUCT_PTR
    [('struct REGULAR_STRUCT*', 'struct_ptr'), ('int', 'z')]
    '''

    def __init__(self, ast=None, filenames=[], include_lines='', loglvl=logging.INFO):
        super(StructWalker, self).__init__()
        self.ast = ast
        self.filenames = filenames

        if not filenames and not ast:
            raise StructWalkerException('Specify either "filename" or "ast" to create'
                                        'StructParser object')

        if not self.ast:
            self.ast = HeaderFilePreprocessor(self.filenames, include_lines=include_lines,
                                              loglvl=loglvl).get_ast()

        self.include_lines = include_lines
        self.local_structs_hierarchy = {}
        self._setuplogging(loglvl)

    def _setuplogging(self, loglvl):
        self.logger = logging.getLogger(self.__class__.__name__)
        formatter = logging.Formatter('DEBUG:%(name)s:%(message)s')
        sh = logging.StreamHandler()
        sh.setFormatter(formatter)
        sh.setLevel(loglvl)
        self.logger.addHandler(sh)
        self.logger.setLevel(loglvl)

    def _format_item(self, processed_item):
        fmt_type = processed_item['type']
        fmt_type = ' '.join(fmt_type)

        self.logger.debug('_format_item : %s', processed_item)

        if 'is_ptr' in processed_item and 'is_fnptr' not in processed_item:
            fmt_type = '%s%s' % (fmt_type, '*' * processed_item['is_ptr'])

        if 'is_array' in processed_item and 'array_size' in processed_item:
            size_str = str(processed_item['array_size']).replace(', ', '][')
            fmt_type = '%s%s' % (fmt_type, size_str)

        fmt_identifier = processed_item['identifier']

        return [(fmt_type, fmt_identifier)]

    def _recursive_process_item(self, item_ast, processed_item, parent):
        self.logger.debug('--- _recursive_process_item : %s', type(item_ast))
        if isinstance(item_ast, c_ast.Decl):
            processed_item['identifier'] = item_ast.name
            return self._recursive_process_item(item_ast.type, processed_item, item_ast)

        elif isinstance(item_ast, c_ast.TypeDecl):
            return self._recursive_process_item(item_ast.type, processed_item, item_ast)

        elif isinstance(item_ast, c_ast.IdentifierType):
            if len(item_ast.names) > 0:
                processed_item['type'] = item_ast.names
                return self._format_item(processed_item)

        elif (isinstance(item_ast, c_ast.Struct) or
              isinstance(item_ast, c_ast.Union)):
            if not item_ast.name:
                nodename, _items_list = self._traverse_ast(item_ast, toplevel=False)
                try:
                    items_list = [(i[0], '%s.%s' % (parent.declname, i[1])) for i in _items_list]
                except AttributeError as e:
                    self.logger.info('-- Encountered anonymous_struct/anonymous_union with no name')
                    raise StructWalkerException('Encountered anonymous_struct/anonymous_union with no name')

                return items_list
            else:
                processed_item['type'] = ['struct %s' % (item_ast.name)]
                return self._format_item(processed_item)

        elif isinstance(item_ast, c_ast.PtrDecl):
            if 'is_ptr' not in processed_item:
                processed_item['is_ptr'] = 0
            processed_item['is_ptr'] = processed_item['is_ptr'] + 1
            return self._recursive_process_item(item_ast.type, processed_item, item_ast)

        elif isinstance(item_ast, c_ast.ArrayDecl):
            processed_item['is_array'] = True
            if 'array_size' not in processed_item:
                processed_item['array_size'] = []
            processed_item['array_size'].append(int(item_ast.dim.value))
            return self._recursive_process_item(item_ast.type, processed_item, item_ast)

        elif isinstance(item_ast, c_ast.Enum):
            processed_item['type'] = ['enum %s' % (item_ast.name)]
            return self._format_item(processed_item)

        elif isinstance(item_ast, c_ast.FuncDecl):
            processed_item['is_fnptr'] = True
            processed_item['type'] = ['void (*)()']
            return self._format_item(processed_item)

    def _traverse_ast(self, node, toplevel=True):
        items_list = []

        # Argument structs are used as types, hence anonymous top-level
        # structs are ignored.
        if toplevel and not node.name:
            return None

        if not node.children():
            return None

        self.logger.debug('>>> Struct name = %s, coord: %s', node.name, node.coord)
        for child in node.children():
            item = self._recursive_process_item(child[1], {}, None)
            items_list.extend(item)

        self.logger.debug('_traverse_ast returns: %s', str((node.name, items_list)))
        return (node.name, items_list)

    def visit_Struct(self, node, *a):
        if node.name in self.local_structs_hierarchy:
            self.logger.info('Encountered %s again. Ignoring.', repr(node.name))
            return

        try:
            desc = self._traverse_ast(node)
        except StructWalkerException as e:
            self.logger.info('-- Exception raised by StructWalkerException in %s,'
                             'inspect manually.',
                             repr(node.name))
            self.logger.info(str(e))
            return

        if not desc:
            return

        struct_name, struct_items = desc
        self.local_structs_hierarchy[struct_name] = struct_items

    def generate_local_hierarchy(self):
        self.visit(self.ast)
        return self.local_structs_hierarchy