cpp.py - Android社区 - https://www.androidos.net.cn/

#!/usr/bin/python
"""A glorified C pre-processor parser."""

import ctypes
import logging
import os
import re
import site
import utils

top = os.getenv('ANDROID_BUILD_TOP')
if top is None:
    utils.panic('ANDROID_BUILD_TOP not set.\n')

# Set up the env vars for libclang.
site.addsitedir(os.path.join(top, 'external/clang/bindings/python'))

import clang.cindex
from clang.cindex import conf
from clang.cindex import Cursor
from clang.cindex import CursorKind
from clang.cindex import SourceLocation
from clang.cindex import SourceRange
from clang.cindex import TokenGroup
from clang.cindex import TokenKind
from clang.cindex import TranslationUnit

# Set up LD_LIBRARY_PATH to include libclang.so, libLLVM.so, and etc.
# Note that setting LD_LIBRARY_PATH with os.putenv() sometimes doesn't help.
clang.cindex.Config.set_library_path(os.path.join(top, 'prebuilts/sdk/tools/linux/lib64'))

from defaults import kCppUndefinedMacro
from defaults import kernel_remove_config_macros
from defaults import kernel_token_replacements

debugBlockParser = False
debugCppExpr = False
debugOptimIf01 = False

###############################################################################
###############################################################################
#####                                                                     #####
#####           C P P   T O K E N S                                       #####
#####                                                                     #####
###############################################################################
###############################################################################

# the list of supported C-preprocessor tokens
# plus a couple of C tokens as well
tokEOF = "\0"
tokLN = "\n"
tokSTRINGIFY = "#"
tokCONCAT = "##"
tokLOGICAND = "&&"
tokLOGICOR = "||"
tokSHL = "<<"
tokSHR = ">>"
tokEQUAL = "=="
tokNEQUAL = "!="
tokLT = "<"
tokLTE = "<="
tokGT = ">"
tokGTE = ">="
tokELLIPSIS = "..."
tokSPACE = " "
tokDEFINED = "defined"
tokLPAREN = "("
tokRPAREN = ")"
tokNOT = "!"
tokPLUS = "+"
tokMINUS = "-"
tokMULTIPLY = "*"
tokDIVIDE = "/"
tokMODULUS = "%"
tokBINAND = "&"
tokBINOR = "|"
tokBINXOR = "^"
tokCOMMA = ","
tokLBRACE = "{"
tokRBRACE = "}"
tokARROW = "->"
tokINCREMENT = "++"
tokDECREMENT = "--"
tokNUMBER = "<number>"
tokIDENT = "<ident>"
tokSTRING = "<string>"

class Token(clang.cindex.Token):
    """A class that represents one token after parsing.

It inherits the class in libclang, with an extra id property to hold the
    new spelling of the token. The spelling property in the base class is
    defined as read-only. New names after macro instantiation are saved in
    their ids now. It also facilitates the renaming of directive optimizations
    like replacing 'ifndef X' with 'if !defined(X)'.

It also overrides the cursor property of the base class. Because the one
    in libclang always queries based on a single token, which usually doesn't
    hold useful information. The cursor in this class can be set by calling
    CppTokenizer.getTokensWithCursors(). Otherwise it returns the one in the
    base class.
    """

def __init__(self, tu=None, group=None, int_data=None, ptr_data=None,
                 cursor=None):
        clang.cindex.Token.__init__(self)
        self._id = None
        self._tu = tu
        self._group = group
        self._cursor = cursor
        # self.int_data and self.ptr_data are from the base class. But
        # self.int_data doesn't accept a None value.
        if int_data is not None:
            self.int_data = int_data
        self.ptr_data = ptr_data

@property
    def id(self):
        """Name of the token."""
        if self._id is None:
            return self.spelling
        else:
            return self._id

@id.setter
    def id(self, new_id):
        """Setting name of the token."""
        self._id = new_id

@property
    def cursor(self):
        if self._cursor is None:
            self._cursor = clang.cindex.Token.cursor
        return self._cursor

@cursor.setter
    def cursor(self, new_cursor):
        self._cursor = new_cursor

def __repr__(self):
        if self.id == 'defined':
            return self.id
        elif self.kind == TokenKind.IDENTIFIER:
            return "(ident %s)" % self.id

return self.id

def __str__(self):
        return self.id

class BadExpectedToken(Exception):
    """An exception that will be raised for unexpected tokens."""
    pass

# The __contains__ function in libclang SourceRange class contains a bug. It
# gives wrong result when dealing with single line range.
# Bug filed with upstream:
# http://llvm.org/bugs/show_bug.cgi?id=22243, http://reviews.llvm.org/D7277
def SourceRange__contains__(self, other):
    """Determine if a given location is inside the range."""
    if not isinstance(other, SourceLocation):
        return False
    if other.file is None and self.start.file is None:
        pass
    elif (self.start.file.name != other.file.name or
          other.file.name != self.end.file.name):
        # same file name
        return False
    # same file, in between lines
    if self.start.line < other.line < self.end.line:
        return True
    # same file, same line
    elif self.start.line == other.line == self.end.line:
        if self.start.column <= other.column <= self.end.column:
            return True
    elif self.start.line == other.line:
        # same file first line
        if self.start.column <= other.column:
            return True
    elif other.line == self.end.line:
        # same file last line
        if other.column <= self.end.column:
            return True
    return False

SourceRange.__contains__ = SourceRange__contains__

################################################################################
################################################################################
#####                                                                      #####
#####           C P P   T O K E N I Z E R                                  #####
#####                                                                      #####
################################################################################
################################################################################

class CppTokenizer(object):
    """A tokenizer that converts some input text into a list of tokens.

It calls libclang's tokenizer to get the parsed tokens. In addition, it
    updates the cursor property in each token after parsing, by calling
    getTokensWithCursors().
    """

clang_flags = ['-E', '-x', 'c']
    options = TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD

def __init__(self):
        """Initialize a new CppTokenizer object."""
        self._indexer = clang.cindex.Index.create()
        self._tu = None
        self._index = 0
        self.tokens = None

def _getTokensWithCursors(self):
        """Helper method to return all tokens with their cursors.

The cursor property in a clang Token doesn't provide enough
        information. Because it is queried based on single token each time
        without any context, i.e. via calling conf.lib.clang_annotateTokens()
        with only one token given. So we often see 'INVALID_FILE' in one
        token's cursor. In this function it passes all the available tokens
        to get more informative cursors.
        """

tokens_memory = ctypes.POINTER(clang.cindex.Token)()
        tokens_count = ctypes.c_uint()

conf.lib.clang_tokenize(self._tu, self._tu.cursor.extent,
                                ctypes.byref(tokens_memory),
                                ctypes.byref(tokens_count))

count = int(tokens_count.value)

# If we get no tokens, no memory was allocated. Be sure not to return
        # anything and potentially call a destructor on nothing.
        if count < 1:
            return

cursors = (Cursor * count)()
        cursors_memory = ctypes.cast(cursors, ctypes.POINTER(Cursor))

conf.lib.clang_annotateTokens(self._tu, tokens_memory, count,
                                      cursors_memory)

tokens_array = ctypes.cast(
            tokens_memory,
            ctypes.POINTER(clang.cindex.Token * count)).contents
        token_group = TokenGroup(self._tu, tokens_memory, tokens_count)

tokens = []
        for i in xrange(0, count):
            token = Token(self._tu, token_group,
                          int_data=tokens_array[i].int_data,
                          ptr_data=tokens_array[i].ptr_data,
                          cursor=cursors[i])
            # We only want non-comment tokens.
            if token.kind != TokenKind.COMMENT:
                tokens.append(token)

return tokens

def parseString(self, lines):
        """Parse a list of text lines into a BlockList object."""
        file_ = 'dummy.c'
        self._tu = self._indexer.parse(file_, self.clang_flags,
                                       unsaved_files=[(file_, lines)],
                                       options=self.options)
        self.tokens = self._getTokensWithCursors()

def parseFile(self, file_):
        """Parse a file into a BlockList object."""
        self._tu = self._indexer.parse(file_, self.clang_flags,
                                       options=self.options)
        self.tokens = self._getTokensWithCursors()

def nextToken(self):
        """Return next token from the list."""
        if self._index < len(self.tokens):
            t = self.tokens[self._index]
            self._index += 1
            return t
        else:
            return None

class CppStringTokenizer(CppTokenizer):
    """A CppTokenizer derived class that accepts a string of text as input."""

def __init__(self, line):
        CppTokenizer.__init__(self)
        self.parseString(line)

class CppFileTokenizer(CppTokenizer):
    """A CppTokenizer derived class that accepts a file as input."""

def __init__(self, file_):
        CppTokenizer.__init__(self)
        self.parseFile(file_)

# Unit testing
#
class CppTokenizerTester(object):
    """A class used to test CppTokenizer classes."""

def __init__(self, tokenizer=None):
        self._tokenizer = tokenizer
        self._token = None

def setTokenizer(self, tokenizer):
        self._tokenizer = tokenizer

def expect(self, id):
        self._token = self._tokenizer.nextToken()
        if self._token is None:
            tokid = ''
        else:
            tokid = self._token.id
        if tokid == id:
            return
        raise BadExpectedToken("###  BAD TOKEN: '%s' expecting '%s'" % (
            tokid, id))

def expectToken(self, id, line, col):
        self.expect(id)
        if self._token.location.line != line:
            raise BadExpectedToken(
                "###  BAD LINENO: token '%s' got '%d' expecting '%d'" % (
                    id, self._token.lineno, line))
        if self._token.location.column != col:
            raise BadExpectedToken("###  BAD COLNO: '%d' expecting '%d'" % (
                self._token.colno, col))

def expectTokens(self, tokens):
        for id, line, col in tokens:
            self.expectToken(id, line, col)

def expectList(self, list_):
        for item in list_:
            self.expect(item)

def test_CppTokenizer():
    tester = CppTokenizerTester()

tester.setTokenizer(CppStringTokenizer("#an/example  && (01923_xy)"))
    tester.expectList(["#", "an", "/", "example", tokLOGICAND, tokLPAREN,
                       "01923_xy", tokRPAREN])

tester.setTokenizer(CppStringTokenizer("FOO(BAR) && defined(BAZ)"))
    tester.expectList(["FOO", tokLPAREN, "BAR", tokRPAREN, tokLOGICAND,
                       "defined", tokLPAREN, "BAZ", tokRPAREN])

tester.setTokenizer(CppStringTokenizer("/*\n#\n*/"))
    tester.expectList([])

tester.setTokenizer(CppStringTokenizer("first\nsecond"))
    tester.expectList(["first", "second"])

tester.setTokenizer(CppStringTokenizer("first second\n  third"))
    tester.expectTokens([("first", 1, 1),
                         ("second", 1, 7),
                         ("third", 2, 3)])

tester.setTokenizer(CppStringTokenizer("boo /* what the\nhell */"))
    tester.expectTokens([("boo", 1, 1)])

tester.setTokenizer(CppStringTokenizer("an \\\n example"))
    tester.expectTokens([("an", 1, 1),
                         ("example", 2, 2)])
    return True

################################################################################
################################################################################
#####                                                                      #####
#####           C P P   E X P R E S S I O N S                              #####
#####                                                                      #####
################################################################################
################################################################################

class CppExpr(object):
    """A class that models the condition of #if directives into an expr tree.

Each node in the tree is of the form (op, arg) or (op, arg1, arg2) where
    "op" is a string describing the operation
    """

unaries = ["!", "~"]
    binaries = ["+", "-", "<", "<=", ">=", ">", "&&", "||", "*", "/", "%",
                "&", "|", "^", "<<", ">>", "==", "!=", "?", ":"]
    precedences = {
        "?": 1, ":": 1,
        "||": 2,
        "&&": 3,
        "|": 4,
        "^": 5,
        "&": 6,
        "==": 7, "!=": 7,
        "<": 8, "<=": 8, ">": 8, ">=": 8,
        "<<": 9, ">>": 9,
        "+": 10, "-": 10,
        "*": 11, "/": 11, "%": 11,
        "!": 12, "~": 12
    }

def __init__(self, tokens):
        """Initialize a CppExpr. 'tokens' must be a CppToken list."""
        self.tokens = tokens
        self._num_tokens = len(tokens)
        self._index = 0

if debugCppExpr:
            print "CppExpr: trying to parse %s" % repr(tokens)
        self.expr = self.parseExpression(0)
        if debugCppExpr:
            print "CppExpr: got " + repr(self.expr)
        if self._index != self._num_tokens:
            self.throw(BadExpectedToken, "crap at end of input (%d != %d): %s"
                       % (self._index, self._num_tokens, repr(tokens)))

def throw(self, exception, msg):
        if self._index < self._num_tokens:
            tok = self.tokens[self._index]
            print "%d:%d: %s" % (tok.location.line, tok.location.column, msg)
        else:
            print "EOF: %s" % msg
        raise exception(msg)

def expectId(self, id):
        """Check that a given token id is at the current position."""
        token = self.tokens[self._index]
        if self._index >= self._num_tokens or token.id != id:
            self.throw(BadExpectedToken,
                       "### expecting '%s' in expression, got '%s'" % (
                           id, token.id))
        self._index += 1

def is_decimal(self):
        token = self.tokens[self._index].id
        if token[-1] in "ULul":
            token = token[:-1]
        try:
            val = int(token, 10)
            self._index += 1
            return ('int', val)
        except ValueError:
            return None

def is_octal(self):
        token = self.tokens[self._index].id
        if token[-1] in "ULul":
            token = token[:-1]
        if len(token) < 2 or token[0] != '0':
            return None
        try:
            val = int(token, 8)
            self._index += 1
            return ('oct', val)
        except ValueError:
            return None

def is_hexadecimal(self):
        token = self.tokens[self._index].id
        if token[-1] in "ULul":
            token = token[:-1]
        if len(token) < 3 or (token[:2] != '0x' and token[:2] != '0X'):
            return None
        try:
            val = int(token, 16)
            self._index += 1
            return ('hex', val)
        except ValueError:
            return None

def is_integer(self):
        if self.tokens[self._index].kind != TokenKind.LITERAL:
            return None

c = self.is_hexadecimal()
        if c:
            return c

c = self.is_octal()
        if c:
            return c

c = self.is_decimal()
        if c:
            return c

return None

def is_number(self):
        t = self.tokens[self._index]
        if t.id == tokMINUS and self._index + 1 < self._num_tokens:
            self._index += 1
            c = self.is_integer()
            if c:
                op, val = c
                return (op, -val)
        if t.id == tokPLUS and self._index + 1 < self._num_tokens:
            self._index += 1
            c = self.is_integer()
            if c:
                return c

return self.is_integer()

def is_defined(self):
        t = self.tokens[self._index]
        if t.id != tokDEFINED:
            return None

# We have the defined keyword, check the rest.
        self._index += 1
        used_parens = False
        if (self._index < self._num_tokens and
            self.tokens[self._index].id == tokLPAREN):
            used_parens = True
            self._index += 1

if self._index >= self._num_tokens:
            self.throw(BadExpectedToken,
                       "### 'defined' must be followed by macro name or left "
                       "paren")

t = self.tokens[self._index]
        if t.kind != TokenKind.IDENTIFIER:
            self.throw(BadExpectedToken,
                       "### 'defined' must be followed by macro name")

self._index += 1
        if used_parens:
            self.expectId(tokRPAREN)

return ("defined", t.id)

def is_call_or_ident(self):
        if self._index >= self._num_tokens:
            return None

t = self.tokens[self._index]
        if t.kind != TokenKind.IDENTIFIER:
            return None

name = t.id

self._index += 1
        if (self._index >= self._num_tokens or
            self.tokens[self._index].id != tokLPAREN):
            return ("ident", name)

params = []
        depth = 1
        self._index += 1
        j = self._index
        while self._index < self._num_tokens:
            id = self.tokens[self._index].id
            if id == tokLPAREN:
                depth += 1
            elif depth == 1 and (id == tokCOMMA or id == tokRPAREN):
                k = self._index
                param = self.tokens[j:k]
                params.append(param)
                if id == tokRPAREN:
                    break
                j = self._index + 1
            elif id == tokRPAREN:
                depth -= 1
            self._index += 1

if self._index >= self._num_tokens:
            return None

self._index += 1
        return ("call", (name, params))

# Implements the "precedence climbing" algorithm from
    # http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm.
    # The "classic" algorithm would be fine if we were using a tool to
    # generate the parser, but we're not. Dijkstra's "shunting yard"
    # algorithm hasn't been necessary yet.

def parseExpression(self, minPrecedence):
        if self._index >= self._num_tokens:
            return None

node = self.parsePrimary()
        while (self.token() and self.isBinary(self.token()) and
               self.precedence(self.token()) >= minPrecedence):
            op = self.token()
            self.nextToken()
            rhs = self.parseExpression(self.precedence(op) + 1)
            node = (op.id, node, rhs)

return node

def parsePrimary(self):
        op = self.token()
        if self.isUnary(op):
            self.nextToken()
            return (op.id, self.parseExpression(self.precedence(op)))

primary = None
        if op.id == tokLPAREN:
            self.nextToken()
            primary = self.parseExpression(0)
            self.expectId(tokRPAREN)
        elif op.id == "?":
            self.nextToken()
            primary = self.parseExpression(0)
            self.expectId(":")
        elif op.id == '+' or op.id == '-' or op.kind == TokenKind.LITERAL:
            primary = self.is_number()
        # Checking for 'defined' needs to come first now because 'defined' is
        # recognized as IDENTIFIER.
        elif op.id == tokDEFINED:
            primary = self.is_defined()
        elif op.kind == TokenKind.IDENTIFIER:
            primary = self.is_call_or_ident()
        else:
            self.throw(BadExpectedToken,
                       "didn't expect to see a %s in factor" % (
                           self.tokens[self._index].id))
        return primary

def isBinary(self, token):
        return token.id in self.binaries

def isUnary(self, token):
        return token.id in self.unaries

def precedence(self, token):
        return self.precedences.get(token.id)

def token(self):
        if self._index >= self._num_tokens:
            return None
        return self.tokens[self._index]

def nextToken(self):
        self._index += 1
        if self._index >= self._num_tokens:
            return None
        return self.tokens[self._index]

def dump_node(self, e):
        op = e[0]
        line = "(" + op
        if op == "int":
            line += " %d)" % e[1]
        elif op == "oct":
            line += " 0%o)" % e[1]
        elif op == "hex":
            line += " 0x%x)" % e[1]
        elif op == "ident":
            line += " %s)" % e[1]
        elif op == "defined":
            line += " %s)" % e[1]
        elif op == "call":
            arg = e[1]
            line += " %s [" % arg[0]
            prefix = ""
            for param in arg[1]:
                par = ""
                for tok in param:
                    par += str(tok)
                line += "%s%s" % (prefix, par)
                prefix = ","
            line += "])"
        elif op in CppExpr.unaries:
            line += " %s)" % self.dump_node(e[1])
        elif op in CppExpr.binaries:
            line += " %s %s)" % (self.dump_node(e[1]), self.dump_node(e[2]))
        else:
            line += " ?%s)" % repr(e[1])

return line

def __repr__(self):
        return self.dump_node(self.expr)

def source_node(self, e):
        op = e[0]
        if op == "int":
            return "%d" % e[1]
        if op == "hex":
            return "0x%x" % e[1]
        if op == "oct":
            return "0%o" % e[1]
        if op == "ident":
            # XXX: should try to expand
            return e[1]
        if op == "defined":
            return "defined(%s)" % e[1]

prec = CppExpr.precedences.get(op, 1000)
        arg = e[1]
        if op in CppExpr.unaries:
            arg_src = self.source_node(arg)
            arg_op = arg[0]
            arg_prec = CppExpr.precedences.get(arg_op, 1000)
            if arg_prec < prec:
                return "!(" + arg_src + ")"
            else:
                return "!" + arg_src
        if op in CppExpr.binaries:
            arg2 = e[2]
            arg1_op = arg[0]
            arg2_op = arg2[0]
            arg1_src = self.source_node(arg)
            arg2_src = self.source_node(arg2)
            if CppExpr.precedences.get(arg1_op, 1000) < prec:
                arg1_src = "(%s)" % arg1_src
            if CppExpr.precedences.get(arg2_op, 1000) < prec:
                arg2_src = "(%s)" % arg2_src

return "%s %s %s" % (arg1_src, op, arg2_src)
        return "???"

def __str__(self):
        return self.source_node(self.expr)

@staticmethod
    def int_node(e):
        if e[0] in ["int", "oct", "hex"]:
            return e[1]
        else:
            return None

def toInt(self):
        return self.int_node(self.expr)

def optimize_node(self, e, macros=None):
        if macros is None:
            macros = {}
        op = e[0]

if op == "defined":
            op, name = e
            if macros.has_key(name):
                if macros[name] == kCppUndefinedMacro:
                    return ("int", 0)
                else:
                    try:
                        value = int(macros[name])
                        return ("int", value)
                    except ValueError:
                        return ("defined", macros[name])

if kernel_remove_config_macros and name.startswith("CONFIG_"):
                return ("int", 0)

return e

elif op == "ident":
            op, name = e
            if macros.has_key(name):
                try:
                    value = int(macros[name])
                    expanded = ("int", value)
                except ValueError:
                    expanded = ("ident", macros[name])
                return self.optimize_node(expanded, macros)
            return e

elif op == "!":
            op, v = e
            v = self.optimize_node(v, macros)
            if v[0] == "int":
                if v[1] == 0:
                    return ("int", 1)
                else:
                    return ("int", 0)
            return ('!', v)

elif op == "&&":
            op, l, r = e
            l = self.optimize_node(l, macros)
            r = self.optimize_node(r, macros)
            li = self.int_node(l)
            ri = self.int_node(r)
            if li is not None:
                if li == 0:
                    return ("int", 0)
                else:
                    return r
            elif ri is not None:
                if ri == 0:
                    return ("int", 0)
                else:
                    return l
            return (op, l, r)

elif op == "||":
            op, l, r = e
            l = self.optimize_node(l, macros)
            r = self.optimize_node(r, macros)
            li = self.int_node(l)
            ri = self.int_node(r)
            if li is not None:
                if li == 0:
                    return r
                else:
                    return ("int", 1)
            elif ri is not None:
                if ri == 0:
                    return l
                else:
                    return ("int", 1)
            return (op, l, r)

else:
            return e

def optimize(self, macros=None):
        if macros is None:
            macros = {}
        self.expr = self.optimize_node(self.expr, macros)

def test_cpp_expr(expr, expected):
    e = CppExpr(CppStringTokenizer(expr).tokens)
    s1 = repr(e)
    if s1 != expected:
        print ("[FAIL]: expression '%s' generates '%s', should be "
               "'%s'" % (expr, s1, expected))
        global failure_count
        failure_count += 1

def test_cpp_expr_optim(expr, expected, macros=None):
    if macros is None:
        macros = {}
    e = CppExpr(CppStringTokenizer(expr).tokens)
    e.optimize(macros)
    s1 = repr(e)
    if s1 != expected:
        print ("[FAIL]: optimized expression '%s' generates '%s' with "
               "macros %s, should be '%s'" % (expr, s1, macros, expected))
        global failure_count
        failure_count += 1

def test_cpp_expr_source(expr, expected):
    e = CppExpr(CppStringTokenizer(expr).tokens)
    s1 = str(e)
    if s1 != expected:
        print ("[FAIL]: source expression '%s' generates '%s', should "
               "be '%s'" % (expr, s1, expected))
        global failure_count
        failure_count += 1

def test_CppExpr():
    test_cpp_expr("0", "(int 0)")
    test_cpp_expr("1", "(int 1)")
    test_cpp_expr("-5", "(int -5)")
    test_cpp_expr("+1", "(int 1)")
    test_cpp_expr("0U", "(int 0)")
    test_cpp_expr("015", "(oct 015)")
    test_cpp_expr("015l", "(oct 015)")
    test_cpp_expr("0x3e", "(hex 0x3e)")
    test_cpp_expr("(0)", "(int 0)")
    test_cpp_expr("1 && 1", "(&& (int 1) (int 1))")
    test_cpp_expr("1 && 0", "(&& (int 1) (int 0))")
    test_cpp_expr("EXAMPLE", "(ident EXAMPLE)")
    test_cpp_expr("EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))")
    test_cpp_expr("defined(EXAMPLE)", "(defined EXAMPLE)")
    test_cpp_expr("defined ( EXAMPLE ) ", "(defined EXAMPLE)")
    test_cpp_expr("!defined(EXAMPLE)", "(! (defined EXAMPLE))")
    test_cpp_expr("defined(ABC) || defined(BINGO)",
                  "(|| (defined ABC) (defined BINGO))")
    test_cpp_expr("FOO(BAR,5)", "(call FOO [BAR,5])")
    test_cpp_expr("A == 1 || defined(B)",
                  "(|| (== (ident A) (int 1)) (defined B))")

test_cpp_expr_optim("0", "(int 0)")
    test_cpp_expr_optim("1", "(int 1)")
    test_cpp_expr_optim("1 && 1", "(int 1)")
    test_cpp_expr_optim("1 && +1", "(int 1)")
    test_cpp_expr_optim("0x1 && 01", "(oct 01)")
    test_cpp_expr_optim("1 && 0", "(int 0)")
    test_cpp_expr_optim("0 && 1", "(int 0)")
    test_cpp_expr_optim("0 && 0", "(int 0)")
    test_cpp_expr_optim("1 || 1", "(int 1)")
    test_cpp_expr_optim("1 || 0", "(int 1)")
    test_cpp_expr_optim("0 || 1", "(int 1)")
    test_cpp_expr_optim("0 || 0", "(int 0)")
    test_cpp_expr_optim("A", "(ident A)")
    test_cpp_expr_optim("A", "(int 1)", {"A": 1})
    test_cpp_expr_optim("A || B", "(int 1)", {"A": 1})
    test_cpp_expr_optim("A || B", "(int 1)", {"B": 1})
    test_cpp_expr_optim("A && B", "(ident B)", {"A": 1})
    test_cpp_expr_optim("A && B", "(ident A)", {"B": 1})
    test_cpp_expr_optim("A && B", "(&& (ident A) (ident B))")
    test_cpp_expr_optim("EXAMPLE", "(ident EXAMPLE)")
    test_cpp_expr_optim("EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))")
    test_cpp_expr_optim("defined(EXAMPLE)", "(defined EXAMPLE)")
    test_cpp_expr_optim("defined(EXAMPLE)", "(defined XOWOE)",
                        {"EXAMPLE": "XOWOE"})
    test_cpp_expr_optim("defined(EXAMPLE)", "(int 0)",
                        {"EXAMPLE": kCppUndefinedMacro})
    test_cpp_expr_optim("!defined(EXAMPLE)", "(! (defined EXAMPLE))")
    test_cpp_expr_optim("!defined(EXAMPLE)", "(! (defined XOWOE))",
                        {"EXAMPLE": "XOWOE"})
    test_cpp_expr_optim("!defined(EXAMPLE)", "(int 1)",
                        {"EXAMPLE": kCppUndefinedMacro})
    test_cpp_expr_optim("defined(A) || defined(B)",
                        "(|| (defined A) (defined B))")
    test_cpp_expr_optim("defined(A) || defined(B)", "(int 1)", {"A": "1"})
    test_cpp_expr_optim("defined(A) || defined(B)", "(int 1)", {"B": "1"})
    test_cpp_expr_optim("defined(A) || defined(B)", "(defined A)",
                        {"B": kCppUndefinedMacro})
    test_cpp_expr_optim("defined(A) || defined(B)", "(int 0)",
                        {"A": kCppUndefinedMacro, "B": kCppUndefinedMacro})
    test_cpp_expr_optim("defined(A) && defined(B)",
                        "(&& (defined A) (defined B))")
    test_cpp_expr_optim("defined(A) && defined(B)",
                        "(defined B)", {"A": "1"})
    test_cpp_expr_optim("defined(A) && defined(B)",
                        "(defined A)", {"B": "1"})
    test_cpp_expr_optim("defined(A) && defined(B)", "(int 0)",
                        {"B": kCppUndefinedMacro})
    test_cpp_expr_optim("defined(A) && defined(B)",
                        "(int 0)", {"A": kCppUndefinedMacro})
    test_cpp_expr_optim("A == 1 || defined(B)",
                        "(|| (== (ident A) (int 1)) (defined B))")
    test_cpp_expr_optim(
        "defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)",
        "(|| (! (defined __GLIBC__)) (< (ident __GLIBC__) (int 2)))",
        {"__KERNEL__": kCppUndefinedMacro})

test_cpp_expr_source("0", "0")
    test_cpp_expr_source("1", "1")
    test_cpp_expr_source("1 && 1", "1 && 1")
    test_cpp_expr_source("1 && 0", "1 && 0")
    test_cpp_expr_source("0 && 1", "0 && 1")
    test_cpp_expr_source("0 && 0", "0 && 0")
    test_cpp_expr_source("1 || 1", "1 || 1")
    test_cpp_expr_source("1 || 0", "1 || 0")
    test_cpp_expr_source("0 || 1", "0 || 1")
    test_cpp_expr_source("0 || 0", "0 || 0")
    test_cpp_expr_source("EXAMPLE", "EXAMPLE")
    test_cpp_expr_source("EXAMPLE - 3", "EXAMPLE - 3")
    test_cpp_expr_source("defined(EXAMPLE)", "defined(EXAMPLE)")
    test_cpp_expr_source("defined EXAMPLE", "defined(EXAMPLE)")
    test_cpp_expr_source("A == 1 || defined(B)", "A == 1 || defined(B)")

################################################################################
################################################################################
#####                                                                      #####
#####          C P P   B L O C K                                           #####
#####                                                                      #####
################################################################################
################################################################################

class Block(object):
    """A class used to model a block of input source text.

There are two block types:
      - directive blocks: contain the tokens of a single pre-processor
        directive (e.g. #if)
      - text blocks, contain the tokens of non-directive blocks

The cpp parser class below will transform an input source file into a list
    of Block objects (grouped in a BlockList object for convenience)
    """

def __init__(self, tokens, directive=None, lineno=0, identifier=None):
        """Initialize a new block, if 'directive' is None, it is a text block.

NOTE: This automatically converts '#ifdef MACRO' into
        '#if defined(MACRO)' and '#ifndef MACRO' into '#if !defined(MACRO)'.
        """

if directive == "ifdef":
            tok = Token()
            tok.id = tokDEFINED
            tokens = [tok] + tokens
            directive = "if"

elif directive == "ifndef":
            tok1 = Token()
            tok2 = Token()
            tok1.id = tokNOT
            tok2.id = tokDEFINED
            tokens = [tok1, tok2] + tokens
            directive = "if"

self.tokens = tokens
        self.directive = directive
        self.define_id = identifier
        if lineno > 0:
            self.lineno = lineno
        else:
            self.lineno = self.tokens[0].location.line

if self.isIf():
            self.expr = CppExpr(self.tokens)

def isDirective(self):
        """Return True iff this is a directive block."""
        return self.directive is not None

def isConditional(self):
        """Return True iff this is a conditional directive block."""
        return self.directive in ["if", "ifdef", "ifndef", "else", "elif",
                                  "endif"]

def isDefine(self):
        """Return the macro name in a #define directive, or None otherwise."""
        if self.directive != "define":
            return None
        return self.define_id

def isIf(self):
        """Return True iff this is an #if-like directive block."""
        return self.directive in ["if", "ifdef", "ifndef", "elif"]

def isEndif(self):
        """Return True iff this is an #endif directive block."""
        return self.directive == "endif"

def isInclude(self):
        """Check whether this is a #include directive.

If true, returns the corresponding file name (with brackets or
        double-qoutes). None otherwise.
        """

if self.directive != "include":
            return None
        return ''.join([str(x) for x in self.tokens])

@staticmethod
    def format_blocks(tokens, indent=0):
        """Return the formatted lines of strings with proper indentation."""
        newline = True
        result = []
        buf = ''
        i = 0
        while i < len(tokens):
            t = tokens[i]
            if t.id == '{':
                buf += ' {'
                result.append(strip_space(buf))
                indent += 2
                buf = ''
                newline = True
            elif t.id == '}':
                indent -= 2
                if not newline:
                    result.append(strip_space(buf))
                # Look ahead to determine if it's the end of line.
                if (i + 1 < len(tokens) and
                    (tokens[i+1].id == ';' or
                     tokens[i+1].id in ['else', '__attribute__',
                                        '__attribute', '__packed'] or
                     tokens[i+1].kind == TokenKind.IDENTIFIER)):
                    buf = ' ' * indent + '}'
                    newline = False
                else:
                    result.append(' ' * indent + '}')
                    buf = ''
                    newline = True
            elif t.id == ';':
                result.append(strip_space(buf) + ';')
                buf = ''
                newline = True
            # We prefer a new line for each constant in enum.
            elif t.id == ',' and t.cursor.kind == CursorKind.ENUM_DECL:
                result.append(strip_space(buf) + ',')
                buf = ''
                newline = True
            else:
                if newline:
                    buf += ' ' * indent + str(t)
                else:
                    buf += ' ' + str(t)
                newline = False
            i += 1

if buf:
            result.append(strip_space(buf))

return result, indent

def writeWithWarning(self, out, warning, left_count, repeat_count, indent):
        """Dump the current block with warnings."""
        # removeWhiteSpace() will sometimes creates non-directive blocks
        # without any tokens. These come from blocks that only contained
        # empty lines and spaces. They should not be printed in the final
        # output, and then should not be counted for this operation.
        #
        if self.directive is None and not self.tokens:
            return left_count, indent

if self.directive:
            out.write(str(self) + '\n')
            left_count -= 1
            if left_count == 0:
                out.write(warning)
                left_count = repeat_count

else:
            lines, indent = self.format_blocks(self.tokens, indent)
            for line in lines:
                out.write(line + '\n')
                left_count -= 1
                if left_count == 0:
                    out.write(warning)
                    left_count = repeat_count

return left_count, indent

def __repr__(self):
        """Generate the representation of a given block."""
        if self.directive:
            result = "#%s " % self.directive
            if self.isIf():
                result += repr(self.expr)
            else:
                for tok in self.tokens:
                    result += repr(tok)
        else:
            result = ""
            for tok in self.tokens:
                result += repr(tok)

return result

def __str__(self):
        """Generate the string representation of a given block."""
        if self.directive:
            # "#if"
            if self.directive == "if":
                # small optimization to re-generate #ifdef and #ifndef
                e = self.expr.expr
                op = e[0]
                if op == "defined":
                    result = "#ifdef %s" % e[1]
                elif op == "!" and e[1][0] == "defined":
                    result = "#ifndef %s" % e[1][1]
                else:
                    result = "#if " + str(self.expr)

# "#define"
            elif self.isDefine():
                result = "#%s %s" % (self.directive, self.define_id)
                if self.tokens:
                    result += " "
                expr = strip_space(' '.join([tok.id for tok in self.tokens]))
                # remove the space between name and '(' in function call
                result += re.sub(r'(\w+) \(', r'\1(', expr)

# "#error"
            # Concatenating tokens with a space separator, because they may
            # not be quoted and broken into several tokens
            elif self.directive == "error":
                result = "#error %s" % ' '.join([tok.id for tok in self.tokens])

else:
                result = "#%s" % self.directive
                if self.tokens:
                    result += " "
                result += ''.join([tok.id for tok in self.tokens])
        else:
            lines, _ = self.format_blocks(self.tokens)
            result = '\n'.join(lines)

return result

class BlockList(object):
    """A convenience class used to hold and process a list of blocks.

It calls the cpp parser to get the blocks.
    """

def __init__(self, blocks):
        self.blocks = blocks

def __len__(self):
        return len(self.blocks)

def __getitem__(self, n):
        return self.blocks[n]

def __repr__(self):
        return repr(self.blocks)

def __str__(self):
        result = '\n'.join([str(b) for b in self.blocks])
        return result

def dump(self):
        """Dump all the blocks in current BlockList."""
        print '##### BEGIN #####'
        for i, b in enumerate(self.blocks):
            print '### BLOCK %d ###' % i
            print b
        print '##### END #####'

def optimizeIf01(self):
        """Remove the code between #if 0 .. #endif in a BlockList."""
        self.blocks = optimize_if01(self.blocks)

def optimizeMacros(self, macros):
        """Remove known defined and undefined macros from a BlockList."""
        for b in self.blocks:
            if b.isIf():
                b.expr.optimize(macros)

def removeMacroDefines(self, macros):
        """Remove known macro definitions from a BlockList."""
        self.blocks = remove_macro_defines(self.blocks, macros)

def optimizeAll(self, macros):
        self.optimizeMacros(macros)
        self.optimizeIf01()
        return

def findIncludes(self):
        """Return the list of included files in a BlockList."""
        result = []
        for b in self.blocks:
            i = b.isInclude()
            if i:
                result.append(i)
        return result

def write(self, out):
        out.write(str(self))

def writeWithWarning(self, out, warning, repeat_count):
        left_count = repeat_count
        indent = 0
        for b in self.blocks:
            left_count, indent = b.writeWithWarning(out, warning, left_count,
                                                    repeat_count, indent)

def removeVarsAndFuncs(self, knownStatics=None):
        """Remove variable and function declarations.

All extern and static declarations corresponding to variable and
        function declarations are removed. We only accept typedefs and
        enum/structs/union declarations.

However, we keep the definitions corresponding to the set of known
        static inline functions in the set 'knownStatics', which is useful
        for optimized byteorder swap functions and stuff like that.
        """

# NOTE: It's also removing function-like macros, such as __SYSCALL(...)
        # in uapi/asm-generic/unistd.h, or KEY_FIELD(...) in linux/bcache.h.
        # It could be problematic when we have function-like macros but without
        # '}' following them. It will skip all the tokens/blocks until seeing a
        # '}' as the function end. Fortunately we don't have such cases in the
        # current kernel headers.

# state = 0 => normal (i.e. LN + spaces)
        # state = 1 => typedef/struct encountered, ends with ";"
        # state = 2 => var declaration encountered, ends with ";"
        # state = 3 => func declaration encountered, ends with "}"

if knownStatics is None:
            knownStatics = set()
        state = 0
        depth = 0
        blocks2 = []
        skipTokens = False
        for b in self.blocks:
            if b.isDirective():
                blocks2.append(b)
            else:
                n = len(b.tokens)
                i = 0
                if skipTokens:
                    first = n
                else:
                    first = 0
                while i < n:
                    tok = b.tokens[i]
                    tokid = tok.id
                    # If we are not looking for the start of a new
                    # type/var/func, then skip over tokens until
                    # we find our terminator, managing the depth of
                    # accolades as we go.
                    if state > 0:
                        terminator = False
                        if tokid == '{':
                            depth += 1
                        elif tokid == '}':
                            if depth > 0:
                                depth -= 1
                            if (depth == 0) and (state == 3):
                                terminator = True
                        elif tokid == ';' and depth == 0:
                            terminator = True

if terminator:
                            # we found the terminator
                            state = 0
                            if skipTokens:
                                skipTokens = False
                                first = i + 1

i += 1
                        continue

# Is it a new type definition, then start recording it
                    if tok.id in ['struct', 'typedef', 'enum', 'union',
                                  '__extension__']:
                        state = 1
                        i += 1
                        continue

# Is it a variable or function definition. If so, first
                    # try to determine which type it is, and also extract
                    # its name.
                    #
                    # We're going to parse the next tokens of the same block
                    # until we find a semi-column or a left parenthesis.
                    #
                    # The semi-column corresponds to a variable definition,
                    # the left-parenthesis to a function definition.
                    #
                    # We also assume that the var/func name is the last
                    # identifier before the terminator.
                    #
                    j = i + 1
                    ident = ""
                    while j < n:
                        tokid = b.tokens[j].id
                        if tokid == '(':  # a function declaration
                            state = 3
                            break
                        elif tokid == ';':  # a variable declaration
                            state = 2
                            break
                        if b.tokens[j].kind == TokenKind.IDENTIFIER:
                            ident = b.tokens[j].id
                        j += 1

if j >= n:
                        # This can only happen when the declaration
                        # does not end on the current block (e.g. with
                        # a directive mixed inside it.
                        #
                        # We will treat it as malformed because
                        # it's very hard to recover from this case
                        # without making our parser much more
                        # complex.
                        #
                        logging.debug("### skip unterminated static '%s'",
                                      ident)
                        break

if ident in knownStatics:
                        logging.debug("### keep var/func '%s': %s", ident,
                                      repr(b.tokens[i:j]))
                    else:
                        # We're going to skip the tokens for this declaration
                        logging.debug("### skip var/func '%s': %s", ident,
                                      repr(b.tokens[i:j]))
                        if i > first:
                            blocks2.append(Block(b.tokens[first:i]))
                        skipTokens = True
                        first = n

i += 1

if i > first:
                    # print "### final '%s'" % repr(b.tokens[first:i])
                    blocks2.append(Block(b.tokens[first:i]))

self.blocks = blocks2

def replaceTokens(self, replacements):
        """Replace tokens according to the given dict."""
        for b in self.blocks:
            made_change = False
            if b.isInclude() is None:
                for tok in b.tokens:
                    if tok.kind == TokenKind.IDENTIFIER:
                        if tok.id in replacements:
                            tok.id = replacements[tok.id]
                            made_change = True

if b.isDefine() and b.define_id in replacements:
                    b.define_id = replacements[b.define_id]
                    made_change = True

if made_change and b.isIf():
                # Keep 'expr' in sync with 'tokens'.
                b.expr = CppExpr(b.tokens)

def strip_space(s):
    """Strip out redundant space in a given string."""

# NOTE: It ought to be more clever to not destroy spaces in string tokens.
    replacements = {' . ': '.',
                    ' [': '[',
                    '[ ': '[',
                    ' ]': ']',
                    '( ': '(',
                    ' )': ')',
                    ' ,': ',',
                    '# ': '#',
                    ' ;': ';',
                    '~ ': '~',
                    ' -> ': '->'}
    result = s
    for r in replacements:
        result = result.replace(r, replacements[r])

# Remove the space between function name and the parenthesis.
    result = re.sub(r'(\w+) \(', r'\1(', result)
    return result

class BlockParser(object):
    """A class that converts an input source file into a BlockList object."""

def __init__(self, tokzer=None):
        """Initialize a block parser.

The input source is provided through a Tokenizer object.
        """
        self._tokzer = tokzer
        self._parsed = False

@property
    def parsed(self):
        return self._parsed

@staticmethod
    def _short_extent(extent):
        return '%d:%d - %d:%d' % (extent.start.line, extent.start.column,
                                  extent.end.line, extent.end.column)

def getBlocks(self, tokzer=None):
        """Return all the blocks parsed."""

def consume_extent(i, tokens, extent=None, detect_change=False):
            """Return tokens that belong to the given extent.

It parses all the tokens that follow tokens[i], until getting out
            of the extent. When detect_change is True, it may terminate early
            when detecting preprocessing directives inside the extent.
            """

result = []
            if extent is None:
                extent = tokens[i].cursor.extent

while i < len(tokens) and tokens[i].location in extent:
                t = tokens[i]
                if debugBlockParser:
                    print ' ' * 2, t.id, t.kind, t.cursor.kind
                if (detect_change and t.cursor.extent != extent and
                    t.cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE):
                    break
                result.append(t)
                i += 1
            return (i, result)

def consume_line(i, tokens):
            """Return tokens that follow tokens[i] in the same line."""
            result = []
            line = tokens[i].location.line
            while i < len(tokens) and tokens[i].location.line == line:
                if tokens[i].cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE:
                    break
                result.append(tokens[i])
                i += 1
            return (i, result)

if tokzer is None:
            tokzer = self._tokzer
        tokens = tokzer.tokens

blocks = []
        buf = []
        i = 0

while i < len(tokens):
            t = tokens[i]
            cursor = t.cursor

if debugBlockParser:
                print ("%d: Processing [%s], kind=[%s], cursor=[%s], "
                       "extent=[%s]" % (t.location.line, t.spelling, t.kind,
                                        cursor.kind,
                                        self._short_extent(cursor.extent)))

if cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE:
                if buf:
                    blocks.append(Block(buf))
                    buf = []

j = i
                if j + 1 >= len(tokens):
                    raise BadExpectedToken("### BAD TOKEN at %s" % (t.location))
                directive = tokens[j+1].id

if directive == 'define':
                    if i+2 >= len(tokens):
                        raise BadExpectedToken("### BAD TOKEN at %s" %
                                               (tokens[i].location))

# Skip '#' and 'define'.
                    extent = tokens[i].cursor.extent
                    i += 2
                    id = ''
                    # We need to separate the id from the remaining of
                    # the line, especially for the function-like macro.
                    if (i + 1 < len(tokens) and tokens[i+1].id == '(' and
                        (tokens[i].location.column + len(tokens[i].spelling) ==
                         tokens[i+1].location.column)):
                        while i < len(tokens):
                            id += tokens[i].id
                            if tokens[i].spelling == ')':
                                i += 1
                                break
                            i += 1
                    else:
                        id += tokens[i].id
                        # Advance to the next token that follows the macro id
                        i += 1

(i, ret) = consume_extent(i, tokens, extent=extent)
                    blocks.append(Block(ret, directive=directive,
                                        lineno=t.location.line, identifier=id))

else:
                    (i, ret) = consume_extent(i, tokens)
                    blocks.append(Block(ret[2:], directive=directive,
                                        lineno=t.location.line))

elif cursor.kind == CursorKind.INCLUSION_DIRECTIVE:
                if buf:
                    blocks.append(Block(buf))
                    buf = []
                directive = tokens[i+1].id
                (i, ret) = consume_extent(i, tokens)

blocks.append(Block(ret[2:], directive=directive,
                                    lineno=t.location.line))

elif cursor.kind == CursorKind.VAR_DECL:
                if buf:
                    blocks.append(Block(buf))
                    buf = []

(i, ret) = consume_extent(i, tokens, detect_change=True)
                buf += ret

elif cursor.kind == CursorKind.FUNCTION_DECL:
                if buf:
                    blocks.append(Block(buf))
                    buf = []

(i, ret) = consume_extent(i, tokens, detect_change=True)
                buf += ret

else:
                (i, ret) = consume_line(i, tokens)
                buf += ret

if buf:
            blocks.append(Block(buf))

# _parsed=True indicates a successful parsing, although may result an
        # empty BlockList.
        self._parsed = True

return BlockList(blocks)

def parse(self, tokzer):
        return self.getBlocks(tokzer)

def parseFile(self, path):
        return self.getBlocks(CppFileTokenizer(path))

def test_block_parsing(lines, expected):
    """Helper method to test the correctness of BlockParser.parse."""
    blocks = BlockParser().parse(CppStringTokenizer('\n'.join(lines)))
    if len(blocks) != len(expected):
        raise BadExpectedToken("BlockParser.parse() returned '%s' expecting "
                               "'%s'" % (str(blocks), repr(expected)))
    for n in range(len(blocks)):
        if str(blocks[n]) != expected[n]:
            raise BadExpectedToken("BlockParser.parse()[%d] is '%s', "
                                   "expecting '%s'" % (n, str(blocks[n]),
                                                       expected[n]))

def test_BlockParser():
    test_block_parsing(["#error hello"], ["#error hello"])
    test_block_parsing(["foo", "", "bar"], ["foo bar"])

# We currently cannot handle the following case with libclang properly.
    # Fortunately it doesn't appear in current headers.
    # test_block_parsing(["foo", "  #  ", "bar"], ["foo", "bar"])

test_block_parsing(["foo",
                        "  #  /* ahah */ if defined(__KERNEL__) /* more */",
                        "bar", "#endif"],
                       ["foo", "#ifdef __KERNEL__", "bar", "#endif"])

################################################################################
################################################################################
#####                                                                      #####
#####        B L O C K   L I S T   O P T I M I Z A T I O N                 #####
#####                                                                      #####
################################################################################
################################################################################

def remove_macro_defines(blocks, excludedMacros=None):
    """Remove macro definitions like #define <macroName>  ...."""
    if excludedMacros is None:
        excludedMacros = set()
    result = []
    for b in blocks:
        macroName = b.isDefine()
        if macroName is None or macroName not in excludedMacros:
            result.append(b)

return result

def find_matching_endif(blocks, i):
    """Traverse the blocks to find out the matching #endif."""
    n = len(blocks)
    depth = 1
    while i < n:
        if blocks[i].isDirective():
            dir_ = blocks[i].directive
            if dir_ in ["if", "ifndef", "ifdef"]:
                depth += 1
            elif depth == 1 and dir_ in ["else", "elif"]:
                return i
            elif dir_ == "endif":
                depth -= 1
                if depth == 0:
                    return i
        i += 1
    return i

def optimize_if01(blocks):
    """Remove the code between #if 0 .. #endif in a list of CppBlocks."""
    i = 0
    n = len(blocks)
    result = []
    while i < n:
        j = i
        while j < n and not blocks[j].isIf():
            j += 1
        if j > i:
            logging.debug("appending lines %d to %d", blocks[i].lineno,
                          blocks[j-1].lineno)
            result += blocks[i:j]
        if j >= n:
            break
        expr = blocks[j].expr
        r = expr.toInt()
        if r is None:
            result.append(blocks[j])
            i = j + 1
            continue

if r == 0:
            # if 0 => skip everything until the corresponding #endif
            j = find_matching_endif(blocks, j + 1)
            if j >= n:
                # unterminated #if 0, finish here
                break
            dir_ = blocks[j].directive
            if dir_ == "endif":
                logging.debug("remove 'if 0' .. 'endif' (lines %d to %d)",
                              blocks[i].lineno, blocks[j].lineno)
                i = j + 1
            elif dir_ == "else":
                # convert 'else' into 'if 1'
                logging.debug("convert 'if 0' .. 'else' into 'if 1' (lines %d "
                              "to %d)", blocks[i].lineno, blocks[j-1].lineno)
                blocks[j].directive = "if"
                blocks[j].expr = CppExpr(CppStringTokenizer("1").tokens)
                i = j
            elif dir_ == "elif":
                # convert 'elif' into 'if'
                logging.debug("convert 'if 0' .. 'elif' into 'if'")
                blocks[j].directive = "if"
                i = j
            continue

# if 1 => find corresponding endif and remove/transform them
        k = find_matching_endif(blocks, j + 1)
        if k >= n:
            # unterminated #if 1, finish here
            logging.debug("unterminated 'if 1'")
            result += blocks[j+1:k]
            break

dir_ = blocks[k].directive
        if dir_ == "endif":
            logging.debug("convert 'if 1' .. 'endif' (lines %d to %d)",
                          blocks[j].lineno, blocks[k].lineno)
            result += optimize_if01(blocks[j+1:k])
            i = k + 1
        elif dir_ == "else":
            # convert 'else' into 'if 0'
            logging.debug("convert 'if 1' .. 'else' (lines %d to %d)",
                          blocks[j].lineno, blocks[k].lineno)
            result += optimize_if01(blocks[j+1:k])
            blocks[k].directive = "if"
            blocks[k].expr = CppExpr(CppStringTokenizer("0").tokens)
            i = k
        elif dir_ == "elif":
            # convert 'elif' into 'if 0'
            logging.debug("convert 'if 1' .. 'elif' (lines %d to %d)",
                          blocks[j].lineno, blocks[k].lineno)
            result += optimize_if01(blocks[j+1:k])
            blocks[k].expr = CppExpr(CppStringTokenizer("0").tokens)
            i = k
    return result

def test_optimizeAll():
    text = """\
#if 1
#define  GOOD_1
#endif
#if 0
#define  BAD_2
#define  BAD_3
#endif

#if 1
#define  GOOD_2
#else
#define  BAD_4
#endif

#if 0
#define  BAD_5
#else
#define  GOOD_3
#endif

#if defined(__KERNEL__)
#define BAD_KERNEL
#endif

#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)
#define X
#endif

#ifndef SIGRTMAX
#define SIGRTMAX 123
#endif /* SIGRTMAX */

#if 0
#if 1
#define  BAD_6
#endif
#endif\
"""

expected = """\
#define GOOD_1
#define GOOD_2
#define GOOD_3
#if !defined(__GLIBC__) || __GLIBC__ < 2
#define X
#endif
#ifndef __SIGRTMAX
#define __SIGRTMAX 123
#endif\
"""

out = utils.StringOutput()
    blocks = BlockParser().parse(CppStringTokenizer(text))
    blocks.replaceTokens(kernel_token_replacements)
    blocks.optimizeAll({"__KERNEL__": kCppUndefinedMacro})
    blocks.write(out)
    if out.get() != expected:
        print "[FAIL]: macro optimization failed\n"
        print "<<<< expecting '",
        print expected,
        print "'\n>>>> result '",
        print out.get(),
        print "'\n----"
        global failure_count
        failure_count += 1

def runUnitTests():
    """Always run all unit tests for this program."""
    test_CppTokenizer()
    test_CppExpr()
    test_optimizeAll()
    test_BlockParser()

failure_count = 0
runUnitTests()
if failure_count != 0:
    utils.panic("Unit tests failed in cpp.py.\n")

普通文本 | 1829行 | 61.23 KB

#!/usr/bin/python
"""A glorified C pre-processor parser."""

import ctypes
import logging
import os
import re
import site
import utils

top = os.getenv('ANDROID_BUILD_TOP')
if top is None:
    utils.panic('ANDROID_BUILD_TOP not set.\n')

# Set up the env vars for libclang.
site.addsitedir(os.path.join(top, 'external/clang/bindings/python'))

import clang.cindex
from clang.cindex import conf
from clang.cindex import Cursor
from clang.cindex import CursorKind
from clang.cindex import SourceLocation
from clang.cindex import SourceRange
from clang.cindex import TokenGroup
from clang.cindex import TokenKind
from clang.cindex import TranslationUnit

# Set up LD_LIBRARY_PATH to include libclang.so, libLLVM.so, and etc.
# Note that setting LD_LIBRARY_PATH with os.putenv() sometimes doesn't help.
clang.cindex.Config.set_library_path(os.path.join(top, 'prebuilts/sdk/tools/linux/lib64'))

from defaults import kCppUndefinedMacro
from defaults import kernel_remove_config_macros
from defaults import kernel_token_replacements


debugBlockParser = False
debugCppExpr = False
debugOptimIf01 = False

###############################################################################
###############################################################################
#####                                                                     #####
#####           C P P   T O K E N S                                       #####
#####                                                                     #####
###############################################################################
###############################################################################

# the list of supported C-preprocessor tokens
# plus a couple of C tokens as well
tokEOF = "\0"
tokLN = "\n"
tokSTRINGIFY = "#"
tokCONCAT = "##"
tokLOGICAND = "&&"
tokLOGICOR = "||"
tokSHL = "<<"
tokSHR = ">>"
tokEQUAL = "=="
tokNEQUAL = "!="
tokLT = "<"
tokLTE = "<="
tokGT = ">"
tokGTE = ">="
tokELLIPSIS = "..."
tokSPACE = " "
tokDEFINED = "defined"
tokLPAREN = "("
tokRPAREN = ")"
tokNOT = "!"
tokPLUS = "+"
tokMINUS = "-"
tokMULTIPLY = "*"
tokDIVIDE = "/"
tokMODULUS = "%"
tokBINAND = "&"
tokBINOR = "|"
tokBINXOR = "^"
tokCOMMA = ","
tokLBRACE = "{"
tokRBRACE = "}"
tokARROW = "->"
tokINCREMENT = "++"
tokDECREMENT = "--"
tokNUMBER = "<number>"
tokIDENT = "<ident>"
tokSTRING = "<string>"


class Token(clang.cindex.Token):
    """A class that represents one token after parsing.

    It inherits the class in libclang, with an extra id property to hold the
    new spelling of the token. The spelling property in the base class is
    defined as read-only. New names after macro instantiation are saved in
    their ids now. It also facilitates the renaming of directive optimizations
    like replacing 'ifndef X' with 'if !defined(X)'.

    It also overrides the cursor property of the base class. Because the one
    in libclang always queries based on a single token, which usually doesn't
    hold useful information. The cursor in this class can be set by calling
    CppTokenizer.getTokensWithCursors(). Otherwise it returns the one in the
    base class.
    """

    def __init__(self, tu=None, group=None, int_data=None, ptr_data=None,
                 cursor=None):
        clang.cindex.Token.__init__(self)
        self._id = None
        self._tu = tu
        self._group = group
        self._cursor = cursor
        # self.int_data and self.ptr_data are from the base class. But
        # self.int_data doesn't accept a None value.
        if int_data is not None:
            self.int_data = int_data
        self.ptr_data = ptr_data

    @property
    def id(self):
        """Name of the token."""
        if self._id is None:
            return self.spelling
        else:
            return self._id

    @id.setter
    def id(self, new_id):
        """Setting name of the token."""
        self._id = new_id

    @property
    def cursor(self):
        if self._cursor is None:
            self._cursor = clang.cindex.Token.cursor
        return self._cursor

    @cursor.setter
    def cursor(self, new_cursor):
        self._cursor = new_cursor

    def __repr__(self):
        if self.id == 'defined':
            return self.id
        elif self.kind == TokenKind.IDENTIFIER:
            return "(ident %s)" % self.id

        return self.id

    def __str__(self):
        return self.id


class BadExpectedToken(Exception):
    """An exception that will be raised for unexpected tokens."""
    pass


# The __contains__ function in libclang SourceRange class contains a bug. It
# gives wrong result when dealing with single line range.
# Bug filed with upstream:
# http://llvm.org/bugs/show_bug.cgi?id=22243, http://reviews.llvm.org/D7277
def SourceRange__contains__(self, other):
    """Determine if a given location is inside the range."""
    if not isinstance(other, SourceLocation):
        return False
    if other.file is None and self.start.file is None:
        pass
    elif (self.start.file.name != other.file.name or
          other.file.name != self.end.file.name):
        # same file name
        return False
    # same file, in between lines
    if self.start.line < other.line < self.end.line:
        return True
    # same file, same line
    elif self.start.line == other.line == self.end.line:
        if self.start.column <= other.column <= self.end.column:
            return True
    elif self.start.line == other.line:
        # same file first line
        if self.start.column <= other.column:
            return True
    elif other.line == self.end.line:
        # same file last line
        if other.column <= self.end.column:
            return True
    return False


SourceRange.__contains__ = SourceRange__contains__


################################################################################
################################################################################
#####                                                                      #####
#####           C P P   T O K E N I Z E R                                  #####
#####                                                                      #####
################################################################################
################################################################################


class CppTokenizer(object):
    """A tokenizer that converts some input text into a list of tokens.

    It calls libclang's tokenizer to get the parsed tokens. In addition, it
    updates the cursor property in each token after parsing, by calling
    getTokensWithCursors().
    """

    clang_flags = ['-E', '-x', 'c']
    options = TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD

    def __init__(self):
        """Initialize a new CppTokenizer object."""
        self._indexer = clang.cindex.Index.create()
        self._tu = None
        self._index = 0
        self.tokens = None

    def _getTokensWithCursors(self):
        """Helper method to return all tokens with their cursors.

        The cursor property in a clang Token doesn't provide enough
        information. Because it is queried based on single token each time
        without any context, i.e. via calling conf.lib.clang_annotateTokens()
        with only one token given. So we often see 'INVALID_FILE' in one
        token's cursor. In this function it passes all the available tokens
        to get more informative cursors.
        """

        tokens_memory = ctypes.POINTER(clang.cindex.Token)()
        tokens_count = ctypes.c_uint()

        conf.lib.clang_tokenize(self._tu, self._tu.cursor.extent,
                                ctypes.byref(tokens_memory),
                                ctypes.byref(tokens_count))

        count = int(tokens_count.value)

        # If we get no tokens, no memory was allocated. Be sure not to return
        # anything and potentially call a destructor on nothing.
        if count < 1:
            return

        cursors = (Cursor * count)()
        cursors_memory = ctypes.cast(cursors, ctypes.POINTER(Cursor))

        conf.lib.clang_annotateTokens(self._tu, tokens_memory, count,
                                      cursors_memory)

        tokens_array = ctypes.cast(
            tokens_memory,
            ctypes.POINTER(clang.cindex.Token * count)).contents
        token_group = TokenGroup(self._tu, tokens_memory, tokens_count)

        tokens = []
        for i in xrange(0, count):
            token = Token(self._tu, token_group,
                          int_data=tokens_array[i].int_data,
                          ptr_data=tokens_array[i].ptr_data,
                          cursor=cursors[i])
            # We only want non-comment tokens.
            if token.kind != TokenKind.COMMENT:
                tokens.append(token)

        return tokens

    def parseString(self, lines):
        """Parse a list of text lines into a BlockList object."""
        file_ = 'dummy.c'
        self._tu = self._indexer.parse(file_, self.clang_flags,
                                       unsaved_files=[(file_, lines)],
                                       options=self.options)
        self.tokens = self._getTokensWithCursors()

    def parseFile(self, file_):
        """Parse a file into a BlockList object."""
        self._tu = self._indexer.parse(file_, self.clang_flags,
                                       options=self.options)
        self.tokens = self._getTokensWithCursors()

    def nextToken(self):
        """Return next token from the list."""
        if self._index < len(self.tokens):
            t = self.tokens[self._index]
            self._index += 1
            return t
        else:
            return None


class CppStringTokenizer(CppTokenizer):
    """A CppTokenizer derived class that accepts a string of text as input."""

    def __init__(self, line):
        CppTokenizer.__init__(self)
        self.parseString(line)


class CppFileTokenizer(CppTokenizer):
    """A CppTokenizer derived class that accepts a file as input."""

    def __init__(self, file_):
        CppTokenizer.__init__(self)
        self.parseFile(file_)


# Unit testing
#
class CppTokenizerTester(object):
    """A class used to test CppTokenizer classes."""

    def __init__(self, tokenizer=None):
        self._tokenizer = tokenizer
        self._token = None

    def setTokenizer(self, tokenizer):
        self._tokenizer = tokenizer

    def expect(self, id):
        self._token = self._tokenizer.nextToken()
        if self._token is None:
            tokid = ''
        else:
            tokid = self._token.id
        if tokid == id:
            return
        raise BadExpectedToken("###  BAD TOKEN: '%s' expecting '%s'" % (
            tokid, id))

    def expectToken(self, id, line, col):
        self.expect(id)
        if self._token.location.line != line:
            raise BadExpectedToken(
                "###  BAD LINENO: token '%s' got '%d' expecting '%d'" % (
                    id, self._token.lineno, line))
        if self._token.location.column != col:
            raise BadExpectedToken("###  BAD COLNO: '%d' expecting '%d'" % (
                self._token.colno, col))

    def expectTokens(self, tokens):
        for id, line, col in tokens:
            self.expectToken(id, line, col)

    def expectList(self, list_):
        for item in list_:
            self.expect(item)


def test_CppTokenizer():
    tester = CppTokenizerTester()

    tester.setTokenizer(CppStringTokenizer("#an/example  && (01923_xy)"))
    tester.expectList(["#", "an", "/", "example", tokLOGICAND, tokLPAREN,
                       "01923_xy", tokRPAREN])

    tester.setTokenizer(CppStringTokenizer("FOO(BAR) && defined(BAZ)"))
    tester.expectList(["FOO", tokLPAREN, "BAR", tokRPAREN, tokLOGICAND,
                       "defined", tokLPAREN, "BAZ", tokRPAREN])

    tester.setTokenizer(CppStringTokenizer("/*\n#\n*/"))
    tester.expectList([])

    tester.setTokenizer(CppStringTokenizer("first\nsecond"))
    tester.expectList(["first", "second"])

    tester.setTokenizer(CppStringTokenizer("first second\n  third"))
    tester.expectTokens([("first", 1, 1),
                         ("second", 1, 7),
                         ("third", 2, 3)])

    tester.setTokenizer(CppStringTokenizer("boo /* what the\nhell */"))
    tester.expectTokens([("boo", 1, 1)])

    tester.setTokenizer(CppStringTokenizer("an \\\n example"))
    tester.expectTokens([("an", 1, 1),
                         ("example", 2, 2)])
    return True


################################################################################
################################################################################
#####                                                                      #####
#####           C P P   E X P R E S S I O N S                              #####
#####                                                                      #####
################################################################################
################################################################################


class CppExpr(object):
    """A class that models the condition of #if directives into an expr tree.

    Each node in the tree is of the form (op, arg) or (op, arg1, arg2) where
    "op" is a string describing the operation
    """

    unaries = ["!", "~"]
    binaries = ["+", "-", "<", "<=", ">=", ">", "&&", "||", "*", "/", "%",
                "&", "|", "^", "<<", ">>", "==", "!=", "?", ":"]
    precedences = {
        "?": 1, ":": 1,
        "||": 2,
        "&&": 3,
        "|": 4,
        "^": 5,
        "&": 6,
        "==": 7, "!=": 7,
        "<": 8, "<=": 8, ">": 8, ">=": 8,
        "<<": 9, ">>": 9,
        "+": 10, "-": 10,
        "*": 11, "/": 11, "%": 11,
        "!": 12, "~": 12
    }

    def __init__(self, tokens):
        """Initialize a CppExpr. 'tokens' must be a CppToken list."""
        self.tokens = tokens
        self._num_tokens = len(tokens)
        self._index = 0

        if debugCppExpr:
            print "CppExpr: trying to parse %s" % repr(tokens)
        self.expr = self.parseExpression(0)
        if debugCppExpr:
            print "CppExpr: got " + repr(self.expr)
        if self._index != self._num_tokens:
            self.throw(BadExpectedToken, "crap at end of input (%d != %d): %s"
                       % (self._index, self._num_tokens, repr(tokens)))

    def throw(self, exception, msg):
        if self._index < self._num_tokens:
            tok = self.tokens[self._index]
            print "%d:%d: %s" % (tok.location.line, tok.location.column, msg)
        else:
            print "EOF: %s" % msg
        raise exception(msg)

    def expectId(self, id):
        """Check that a given token id is at the current position."""
        token = self.tokens[self._index]
        if self._index >= self._num_tokens or token.id != id:
            self.throw(BadExpectedToken,
                       "### expecting '%s' in expression, got '%s'" % (
                           id, token.id))
        self._index += 1

    def is_decimal(self):
        token = self.tokens[self._index].id
        if token[-1] in "ULul":
            token = token[:-1]
        try:
            val = int(token, 10)
            self._index += 1
            return ('int', val)
        except ValueError:
            return None

    def is_octal(self):
        token = self.tokens[self._index].id
        if token[-1] in "ULul":
            token = token[:-1]
        if len(token) < 2 or token[0] != '0':
            return None
        try:
            val = int(token, 8)
            self._index += 1
            return ('oct', val)
        except ValueError:
            return None

    def is_hexadecimal(self):
        token = self.tokens[self._index].id
        if token[-1] in "ULul":
            token = token[:-1]
        if len(token) < 3 or (token[:2] != '0x' and token[:2] != '0X'):
            return None
        try:
            val = int(token, 16)
            self._index += 1
            return ('hex', val)
        except ValueError:
            return None

    def is_integer(self):
        if self.tokens[self._index].kind != TokenKind.LITERAL:
            return None

        c = self.is_hexadecimal()
        if c:
            return c

        c = self.is_octal()
        if c:
            return c

        c = self.is_decimal()
        if c:
            return c

        return None

    def is_number(self):
        t = self.tokens[self._index]
        if t.id == tokMINUS and self._index + 1 < self._num_tokens:
            self._index += 1
            c = self.is_integer()
            if c:
                op, val = c
                return (op, -val)
        if t.id == tokPLUS and self._index + 1 < self._num_tokens:
            self._index += 1
            c = self.is_integer()
            if c:
                return c

        return self.is_integer()

    def is_defined(self):
        t = self.tokens[self._index]
        if t.id != tokDEFINED:
            return None

        # We have the defined keyword, check the rest.
        self._index += 1
        used_parens = False
        if (self._index < self._num_tokens and
            self.tokens[self._index].id == tokLPAREN):
            used_parens = True
            self._index += 1

        if self._index >= self._num_tokens:
            self.throw(BadExpectedToken,
                       "### 'defined' must be followed by macro name or left "
                       "paren")

        t = self.tokens[self._index]
        if t.kind != TokenKind.IDENTIFIER:
            self.throw(BadExpectedToken,
                       "### 'defined' must be followed by macro name")

        self._index += 1
        if used_parens:
            self.expectId(tokRPAREN)

        return ("defined", t.id)

    def is_call_or_ident(self):
        if self._index >= self._num_tokens:
            return None

        t = self.tokens[self._index]
        if t.kind != TokenKind.IDENTIFIER:
            return None

        name = t.id

        self._index += 1
        if (self._index >= self._num_tokens or
            self.tokens[self._index].id != tokLPAREN):
            return ("ident", name)

        params = []
        depth = 1
        self._index += 1
        j = self._index
        while self._index < self._num_tokens:
            id = self.tokens[self._index].id
            if id == tokLPAREN:
                depth += 1
            elif depth == 1 and (id == tokCOMMA or id == tokRPAREN):
                k = self._index
                param = self.tokens[j:k]
                params.append(param)
                if id == tokRPAREN:
                    break
                j = self._index + 1
            elif id == tokRPAREN:
                depth -= 1
            self._index += 1

        if self._index >= self._num_tokens:
            return None

        self._index += 1
        return ("call", (name, params))

    # Implements the "precedence climbing" algorithm from
    # http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm.
    # The "classic" algorithm would be fine if we were using a tool to
    # generate the parser, but we're not. Dijkstra's "shunting yard"
    # algorithm hasn't been necessary yet.

    def parseExpression(self, minPrecedence):
        if self._index >= self._num_tokens:
            return None

        node = self.parsePrimary()
        while (self.token() and self.isBinary(self.token()) and
               self.precedence(self.token()) >= minPrecedence):
            op = self.token()
            self.nextToken()
            rhs = self.parseExpression(self.precedence(op) + 1)
            node = (op.id, node, rhs)

        return node

    def parsePrimary(self):
        op = self.token()
        if self.isUnary(op):
            self.nextToken()
            return (op.id, self.parseExpression(self.precedence(op)))

        primary = None
        if op.id == tokLPAREN:
            self.nextToken()
            primary = self.parseExpression(0)
            self.expectId(tokRPAREN)
        elif op.id == "?":
            self.nextToken()
            primary = self.parseExpression(0)
            self.expectId(":")
        elif op.id == '+' or op.id == '-' or op.kind == TokenKind.LITERAL:
            primary = self.is_number()
        # Checking for 'defined' needs to come first now because 'defined' is
        # recognized as IDENTIFIER.
        elif op.id == tokDEFINED:
            primary = self.is_defined()
        elif op.kind == TokenKind.IDENTIFIER:
            primary = self.is_call_or_ident()
        else:
            self.throw(BadExpectedToken,
                       "didn't expect to see a %s in factor" % (
                           self.tokens[self._index].id))
        return primary

    def isBinary(self, token):
        return token.id in self.binaries

    def isUnary(self, token):
        return token.id in self.unaries

    def precedence(self, token):
        return self.precedences.get(token.id)

    def token(self):
        if self._index >= self._num_tokens:
            return None
        return self.tokens[self._index]

    def nextToken(self):
        self._index += 1
        if self._index >= self._num_tokens:
            return None
        return self.tokens[self._index]

    def dump_node(self, e):
        op = e[0]
        line = "(" + op
        if op == "int":
            line += " %d)" % e[1]
        elif op == "oct":
            line += " 0%o)" % e[1]
        elif op == "hex":
            line += " 0x%x)" % e[1]
        elif op == "ident":
            line += " %s)" % e[1]
        elif op == "defined":
            line += " %s)" % e[1]
        elif op == "call":
            arg = e[1]
            line += " %s [" % arg[0]
            prefix = ""
            for param in arg[1]:
                par = ""
                for tok in param:
                    par += str(tok)
                line += "%s%s" % (prefix, par)
                prefix = ","
            line += "])"
        elif op in CppExpr.unaries:
            line += " %s)" % self.dump_node(e[1])
        elif op in CppExpr.binaries:
            line += " %s %s)" % (self.dump_node(e[1]), self.dump_node(e[2]))
        else:
            line += " ?%s)" % repr(e[1])

        return line

    def __repr__(self):
        return self.dump_node(self.expr)

    def source_node(self, e):
        op = e[0]
        if op == "int":
            return "%d" % e[1]
        if op == "hex":
            return "0x%x" % e[1]
        if op == "oct":
            return "0%o" % e[1]
        if op == "ident":
            # XXX: should try to expand
            return e[1]
        if op == "defined":
            return "defined(%s)" % e[1]

        prec = CppExpr.precedences.get(op, 1000)
        arg = e[1]
        if op in CppExpr.unaries:
            arg_src = self.source_node(arg)
            arg_op = arg[0]
            arg_prec = CppExpr.precedences.get(arg_op, 1000)
            if arg_prec < prec:
                return "!(" + arg_src + ")"
            else:
                return "!" + arg_src
        if op in CppExpr.binaries:
            arg2 = e[2]
            arg1_op = arg[0]
            arg2_op = arg2[0]
            arg1_src = self.source_node(arg)
            arg2_src = self.source_node(arg2)
            if CppExpr.precedences.get(arg1_op, 1000) < prec:
                arg1_src = "(%s)" % arg1_src
            if CppExpr.precedences.get(arg2_op, 1000) < prec:
                arg2_src = "(%s)" % arg2_src

            return "%s %s %s" % (arg1_src, op, arg2_src)
        return "???"

    def __str__(self):
        return self.source_node(self.expr)

    @staticmethod
    def int_node(e):
        if e[0] in ["int", "oct", "hex"]:
            return e[1]
        else:
            return None

    def toInt(self):
        return self.int_node(self.expr)

    def optimize_node(self, e, macros=None):
        if macros is None:
            macros = {}
        op = e[0]

        if op == "defined":
            op, name = e
            if macros.has_key(name):
                if macros[name] == kCppUndefinedMacro:
                    return ("int", 0)
                else:
                    try:
                        value = int(macros[name])
                        return ("int", value)
                    except ValueError:
                        return ("defined", macros[name])

            if kernel_remove_config_macros and name.startswith("CONFIG_"):
                return ("int", 0)

            return e

        elif op == "ident":
            op, name = e
            if macros.has_key(name):
                try:
                    value = int(macros[name])
                    expanded = ("int", value)
                except ValueError:
                    expanded = ("ident", macros[name])
                return self.optimize_node(expanded, macros)
            return e

        elif op == "!":
            op, v = e
            v = self.optimize_node(v, macros)
            if v[0] == "int":
                if v[1] == 0:
                    return ("int", 1)
                else:
                    return ("int", 0)
            return ('!', v)

        elif op == "&&":
            op, l, r = e
            l = self.optimize_node(l, macros)
            r = self.optimize_node(r, macros)
            li = self.int_node(l)
            ri = self.int_node(r)
            if li is not None:
                if li == 0:
                    return ("int", 0)
                else:
                    return r
            elif ri is not None:
                if ri == 0:
                    return ("int", 0)
                else:
                    return l
            return (op, l, r)

        elif op == "||":
            op, l, r = e
            l = self.optimize_node(l, macros)
            r = self.optimize_node(r, macros)
            li = self.int_node(l)
            ri = self.int_node(r)
            if li is not None:
                if li == 0:
                    return r
                else:
                    return ("int", 1)
            elif ri is not None:
                if ri == 0:
                    return l
                else:
                    return ("int", 1)
            return (op, l, r)

        else:
            return e

    def optimize(self, macros=None):
        if macros is None:
            macros = {}
        self.expr = self.optimize_node(self.expr, macros)


def test_cpp_expr(expr, expected):
    e = CppExpr(CppStringTokenizer(expr).tokens)
    s1 = repr(e)
    if s1 != expected:
        print ("[FAIL]: expression '%s' generates '%s', should be "
               "'%s'" % (expr, s1, expected))
        global failure_count
        failure_count += 1


def test_cpp_expr_optim(expr, expected, macros=None):
    if macros is None:
        macros = {}
    e = CppExpr(CppStringTokenizer(expr).tokens)
    e.optimize(macros)
    s1 = repr(e)
    if s1 != expected:
        print ("[FAIL]: optimized expression '%s' generates '%s' with "
               "macros %s, should be '%s'" % (expr, s1, macros, expected))
        global failure_count
        failure_count += 1


def test_cpp_expr_source(expr, expected):
    e = CppExpr(CppStringTokenizer(expr).tokens)
    s1 = str(e)
    if s1 != expected:
        print ("[FAIL]: source expression '%s' generates '%s', should "
               "be '%s'" % (expr, s1, expected))
        global failure_count
        failure_count += 1


def test_CppExpr():
    test_cpp_expr("0", "(int 0)")
    test_cpp_expr("1", "(int 1)")
    test_cpp_expr("-5", "(int -5)")
    test_cpp_expr("+1", "(int 1)")
    test_cpp_expr("0U", "(int 0)")
    test_cpp_expr("015", "(oct 015)")
    test_cpp_expr("015l", "(oct 015)")
    test_cpp_expr("0x3e", "(hex 0x3e)")
    test_cpp_expr("(0)", "(int 0)")
    test_cpp_expr("1 && 1", "(&& (int 1) (int 1))")
    test_cpp_expr("1 && 0", "(&& (int 1) (int 0))")
    test_cpp_expr("EXAMPLE", "(ident EXAMPLE)")
    test_cpp_expr("EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))")
    test_cpp_expr("defined(EXAMPLE)", "(defined EXAMPLE)")
    test_cpp_expr("defined ( EXAMPLE ) ", "(defined EXAMPLE)")
    test_cpp_expr("!defined(EXAMPLE)", "(! (defined EXAMPLE))")
    test_cpp_expr("defined(ABC) || defined(BINGO)",
                  "(|| (defined ABC) (defined BINGO))")
    test_cpp_expr("FOO(BAR,5)", "(call FOO [BAR,5])")
    test_cpp_expr("A == 1 || defined(B)",
                  "(|| (== (ident A) (int 1)) (defined B))")

    test_cpp_expr_optim("0", "(int 0)")
    test_cpp_expr_optim("1", "(int 1)")
    test_cpp_expr_optim("1 && 1", "(int 1)")
    test_cpp_expr_optim("1 && +1", "(int 1)")
    test_cpp_expr_optim("0x1 && 01", "(oct 01)")
    test_cpp_expr_optim("1 && 0", "(int 0)")
    test_cpp_expr_optim("0 && 1", "(int 0)")
    test_cpp_expr_optim("0 && 0", "(int 0)")
    test_cpp_expr_optim("1 || 1", "(int 1)")
    test_cpp_expr_optim("1 || 0", "(int 1)")
    test_cpp_expr_optim("0 || 1", "(int 1)")
    test_cpp_expr_optim("0 || 0", "(int 0)")
    test_cpp_expr_optim("A", "(ident A)")
    test_cpp_expr_optim("A", "(int 1)", {"A": 1})
    test_cpp_expr_optim("A || B", "(int 1)", {"A": 1})
    test_cpp_expr_optim("A || B", "(int 1)", {"B": 1})
    test_cpp_expr_optim("A && B", "(ident B)", {"A": 1})
    test_cpp_expr_optim("A && B", "(ident A)", {"B": 1})
    test_cpp_expr_optim("A && B", "(&& (ident A) (ident B))")
    test_cpp_expr_optim("EXAMPLE", "(ident EXAMPLE)")
    test_cpp_expr_optim("EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))")
    test_cpp_expr_optim("defined(EXAMPLE)", "(defined EXAMPLE)")
    test_cpp_expr_optim("defined(EXAMPLE)", "(defined XOWOE)",
                        {"EXAMPLE": "XOWOE"})
    test_cpp_expr_optim("defined(EXAMPLE)", "(int 0)",
                        {"EXAMPLE": kCppUndefinedMacro})
    test_cpp_expr_optim("!defined(EXAMPLE)", "(! (defined EXAMPLE))")
    test_cpp_expr_optim("!defined(EXAMPLE)", "(! (defined XOWOE))",
                        {"EXAMPLE": "XOWOE"})
    test_cpp_expr_optim("!defined(EXAMPLE)", "(int 1)",
                        {"EXAMPLE": kCppUndefinedMacro})
    test_cpp_expr_optim("defined(A) || defined(B)",
                        "(|| (defined A) (defined B))")
    test_cpp_expr_optim("defined(A) || defined(B)", "(int 1)", {"A": "1"})
    test_cpp_expr_optim("defined(A) || defined(B)", "(int 1)", {"B": "1"})
    test_cpp_expr_optim("defined(A) || defined(B)", "(defined A)",
                        {"B": kCppUndefinedMacro})
    test_cpp_expr_optim("defined(A) || defined(B)", "(int 0)",
                        {"A": kCppUndefinedMacro, "B": kCppUndefinedMacro})
    test_cpp_expr_optim("defined(A) && defined(B)",
                        "(&& (defined A) (defined B))")
    test_cpp_expr_optim("defined(A) && defined(B)",
                        "(defined B)", {"A": "1"})
    test_cpp_expr_optim("defined(A) && defined(B)",
                        "(defined A)", {"B": "1"})
    test_cpp_expr_optim("defined(A) && defined(B)", "(int 0)",
                        {"B": kCppUndefinedMacro})
    test_cpp_expr_optim("defined(A) && defined(B)",
                        "(int 0)", {"A": kCppUndefinedMacro})
    test_cpp_expr_optim("A == 1 || defined(B)",
                        "(|| (== (ident A) (int 1)) (defined B))")
    test_cpp_expr_optim(
        "defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)",
        "(|| (! (defined __GLIBC__)) (< (ident __GLIBC__) (int 2)))",
        {"__KERNEL__": kCppUndefinedMacro})

    test_cpp_expr_source("0", "0")
    test_cpp_expr_source("1", "1")
    test_cpp_expr_source("1 && 1", "1 && 1")
    test_cpp_expr_source("1 && 0", "1 && 0")
    test_cpp_expr_source("0 && 1", "0 && 1")
    test_cpp_expr_source("0 && 0", "0 && 0")
    test_cpp_expr_source("1 || 1", "1 || 1")
    test_cpp_expr_source("1 || 0", "1 || 0")
    test_cpp_expr_source("0 || 1", "0 || 1")
    test_cpp_expr_source("0 || 0", "0 || 0")
    test_cpp_expr_source("EXAMPLE", "EXAMPLE")
    test_cpp_expr_source("EXAMPLE - 3", "EXAMPLE - 3")
    test_cpp_expr_source("defined(EXAMPLE)", "defined(EXAMPLE)")
    test_cpp_expr_source("defined EXAMPLE", "defined(EXAMPLE)")
    test_cpp_expr_source("A == 1 || defined(B)", "A == 1 || defined(B)")


################################################################################
################################################################################
#####                                                                      #####
#####          C P P   B L O C K                                           #####
#####                                                                      #####
################################################################################
################################################################################


class Block(object):
    """A class used to model a block of input source text.

    There are two block types:
      - directive blocks: contain the tokens of a single pre-processor
        directive (e.g. #if)
      - text blocks, contain the tokens of non-directive blocks

    The cpp parser class below will transform an input source file into a list
    of Block objects (grouped in a BlockList object for convenience)
    """

    def __init__(self, tokens, directive=None, lineno=0, identifier=None):
        """Initialize a new block, if 'directive' is None, it is a text block.

        NOTE: This automatically converts '#ifdef MACRO' into
        '#if defined(MACRO)' and '#ifndef MACRO' into '#if !defined(MACRO)'.
        """

        if directive == "ifdef":
            tok = Token()
            tok.id = tokDEFINED
            tokens = [tok] + tokens
            directive = "if"

        elif directive == "ifndef":
            tok1 = Token()
            tok2 = Token()
            tok1.id = tokNOT
            tok2.id = tokDEFINED
            tokens = [tok1, tok2] + tokens
            directive = "if"

        self.tokens = tokens
        self.directive = directive
        self.define_id = identifier
        if lineno > 0:
            self.lineno = lineno
        else:
            self.lineno = self.tokens[0].location.line

        if self.isIf():
            self.expr = CppExpr(self.tokens)

    def isDirective(self):
        """Return True iff this is a directive block."""
        return self.directive is not None

    def isConditional(self):
        """Return True iff this is a conditional directive block."""
        return self.directive in ["if", "ifdef", "ifndef", "else", "elif",
                                  "endif"]

    def isDefine(self):
        """Return the macro name in a #define directive, or None otherwise."""
        if self.directive != "define":
            return None
        return self.define_id

    def isIf(self):
        """Return True iff this is an #if-like directive block."""
        return self.directive in ["if", "ifdef", "ifndef", "elif"]

    def isEndif(self):
        """Return True iff this is an #endif directive block."""
        return self.directive == "endif"

    def isInclude(self):
        """Check whether this is a #include directive.

        If true, returns the corresponding file name (with brackets or
        double-qoutes). None otherwise.
        """

        if self.directive != "include":
            return None
        return ''.join([str(x) for x in self.tokens])

    @staticmethod
    def format_blocks(tokens, indent=0):
        """Return the formatted lines of strings with proper indentation."""
        newline = True
        result = []
        buf = ''
        i = 0
        while i < len(tokens):
            t = tokens[i]
            if t.id == '{':
                buf += ' {'
                result.append(strip_space(buf))
                indent += 2
                buf = ''
                newline = True
            elif t.id == '}':
                indent -= 2
                if not newline:
                    result.append(strip_space(buf))
                # Look ahead to determine if it's the end of line.
                if (i + 1 < len(tokens) and
                    (tokens[i+1].id == ';' or
                     tokens[i+1].id in ['else', '__attribute__',
                                        '__attribute', '__packed'] or
                     tokens[i+1].kind == TokenKind.IDENTIFIER)):
                    buf = ' ' * indent + '}'
                    newline = False
                else:
                    result.append(' ' * indent + '}')
                    buf = ''
                    newline = True
            elif t.id == ';':
                result.append(strip_space(buf) + ';')
                buf = ''
                newline = True
            # We prefer a new line for each constant in enum.
            elif t.id == ',' and t.cursor.kind == CursorKind.ENUM_DECL:
                result.append(strip_space(buf) + ',')
                buf = ''
                newline = True
            else:
                if newline:
                    buf += ' ' * indent + str(t)
                else:
                    buf += ' ' + str(t)
                newline = False
            i += 1

        if buf:
            result.append(strip_space(buf))

        return result, indent

    def writeWithWarning(self, out, warning, left_count, repeat_count, indent):
        """Dump the current block with warnings."""
        # removeWhiteSpace() will sometimes creates non-directive blocks
        # without any tokens. These come from blocks that only contained
        # empty lines and spaces. They should not be printed in the final
        # output, and then should not be counted for this operation.
        #
        if self.directive is None and not self.tokens:
            return left_count, indent

        if self.directive:
            out.write(str(self) + '\n')
            left_count -= 1
            if left_count == 0:
                out.write(warning)
                left_count = repeat_count

        else:
            lines, indent = self.format_blocks(self.tokens, indent)
            for line in lines:
                out.write(line + '\n')
                left_count -= 1
                if left_count == 0:
                    out.write(warning)
                    left_count = repeat_count

        return left_count, indent

    def __repr__(self):
        """Generate the representation of a given block."""
        if self.directive:
            result = "#%s " % self.directive
            if self.isIf():
                result += repr(self.expr)
            else:
                for tok in self.tokens:
                    result += repr(tok)
        else:
            result = ""
            for tok in self.tokens:
                result += repr(tok)

        return result

    def __str__(self):
        """Generate the string representation of a given block."""
        if self.directive:
            # "#if"
            if self.directive == "if":
                # small optimization to re-generate #ifdef and #ifndef
                e = self.expr.expr
                op = e[0]
                if op == "defined":
                    result = "#ifdef %s" % e[1]
                elif op == "!" and e[1][0] == "defined":
                    result = "#ifndef %s" % e[1][1]
                else:
                    result = "#if " + str(self.expr)

            # "#define"
            elif self.isDefine():
                result = "#%s %s" % (self.directive, self.define_id)
                if self.tokens:
                    result += " "
                expr = strip_space(' '.join([tok.id for tok in self.tokens]))
                # remove the space between name and '(' in function call
                result += re.sub(r'(\w+) \(', r'\1(', expr)

            # "#error"
            # Concatenating tokens with a space separator, because they may
            # not be quoted and broken into several tokens
            elif self.directive == "error":
                result = "#error %s" % ' '.join([tok.id for tok in self.tokens])

            else:
                result = "#%s" % self.directive
                if self.tokens:
                    result += " "
                result += ''.join([tok.id for tok in self.tokens])
        else:
            lines, _ = self.format_blocks(self.tokens)
            result = '\n'.join(lines)

        return result


class BlockList(object):
    """A convenience class used to hold and process a list of blocks.

    It calls the cpp parser to get the blocks.
    """

    def __init__(self, blocks):
        self.blocks = blocks

    def __len__(self):
        return len(self.blocks)

    def __getitem__(self, n):
        return self.blocks[n]

    def __repr__(self):
        return repr(self.blocks)

    def __str__(self):
        result = '\n'.join([str(b) for b in self.blocks])
        return result

    def dump(self):
        """Dump all the blocks in current BlockList."""
        print '##### BEGIN #####'
        for i, b in enumerate(self.blocks):
            print '### BLOCK %d ###' % i
            print b
        print '##### END #####'

    def optimizeIf01(self):
        """Remove the code between #if 0 .. #endif in a BlockList."""
        self.blocks = optimize_if01(self.blocks)

    def optimizeMacros(self, macros):
        """Remove known defined and undefined macros from a BlockList."""
        for b in self.blocks:
            if b.isIf():
                b.expr.optimize(macros)

    def removeMacroDefines(self, macros):
        """Remove known macro definitions from a BlockList."""
        self.blocks = remove_macro_defines(self.blocks, macros)

    def optimizeAll(self, macros):
        self.optimizeMacros(macros)
        self.optimizeIf01()
        return

    def findIncludes(self):
        """Return the list of included files in a BlockList."""
        result = []
        for b in self.blocks:
            i = b.isInclude()
            if i:
                result.append(i)
        return result

    def write(self, out):
        out.write(str(self))

    def writeWithWarning(self, out, warning, repeat_count):
        left_count = repeat_count
        indent = 0
        for b in self.blocks:
            left_count, indent = b.writeWithWarning(out, warning, left_count,
                                                    repeat_count, indent)

    def removeVarsAndFuncs(self, knownStatics=None):
        """Remove variable and function declarations.

        All extern and static declarations corresponding to variable and
        function declarations are removed. We only accept typedefs and
        enum/structs/union declarations.

        However, we keep the definitions corresponding to the set of known
        static inline functions in the set 'knownStatics', which is useful
        for optimized byteorder swap functions and stuff like that.
        """

        # NOTE: It's also removing function-like macros, such as __SYSCALL(...)
        # in uapi/asm-generic/unistd.h, or KEY_FIELD(...) in linux/bcache.h.
        # It could be problematic when we have function-like macros but without
        # '}' following them. It will skip all the tokens/blocks until seeing a
        # '}' as the function end. Fortunately we don't have such cases in the
        # current kernel headers.

        # state = 0 => normal (i.e. LN + spaces)
        # state = 1 => typedef/struct encountered, ends with ";"
        # state = 2 => var declaration encountered, ends with ";"
        # state = 3 => func declaration encountered, ends with "}"

        if knownStatics is None:
            knownStatics = set()
        state = 0
        depth = 0
        blocks2 = []
        skipTokens = False
        for b in self.blocks:
            if b.isDirective():
                blocks2.append(b)
            else:
                n = len(b.tokens)
                i = 0
                if skipTokens:
                    first = n
                else:
                    first = 0
                while i < n:
                    tok = b.tokens[i]
                    tokid = tok.id
                    # If we are not looking for the start of a new
                    # type/var/func, then skip over tokens until
                    # we find our terminator, managing the depth of
                    # accolades as we go.
                    if state > 0:
                        terminator = False
                        if tokid == '{':
                            depth += 1
                        elif tokid == '}':
                            if depth > 0:
                                depth -= 1
                            if (depth == 0) and (state == 3):
                                terminator = True
                        elif tokid == ';' and depth == 0:
                            terminator = True

                        if terminator:
                            # we found the terminator
                            state = 0
                            if skipTokens:
                                skipTokens = False
                                first = i + 1

                        i += 1
                        continue

                    # Is it a new type definition, then start recording it
                    if tok.id in ['struct', 'typedef', 'enum', 'union',
                                  '__extension__']:
                        state = 1
                        i += 1
                        continue

                    # Is it a variable or function definition. If so, first
                    # try to determine which type it is, and also extract
                    # its name.
                    #
                    # We're going to parse the next tokens of the same block
                    # until we find a semi-column or a left parenthesis.
                    #
                    # The semi-column corresponds to a variable definition,
                    # the left-parenthesis to a function definition.
                    #
                    # We also assume that the var/func name is the last
                    # identifier before the terminator.
                    #
                    j = i + 1
                    ident = ""
                    while j < n:
                        tokid = b.tokens[j].id
                        if tokid == '(':  # a function declaration
                            state = 3
                            break
                        elif tokid == ';':  # a variable declaration
                            state = 2
                            break
                        if b.tokens[j].kind == TokenKind.IDENTIFIER:
                            ident = b.tokens[j].id
                        j += 1

                    if j >= n:
                        # This can only happen when the declaration
                        # does not end on the current block (e.g. with
                        # a directive mixed inside it.
                        #
                        # We will treat it as malformed because
                        # it's very hard to recover from this case
                        # without making our parser much more
                        # complex.
                        #
                        logging.debug("### skip unterminated static '%s'",
                                      ident)
                        break

                    if ident in knownStatics:
                        logging.debug("### keep var/func '%s': %s", ident,
                                      repr(b.tokens[i:j]))
                    else:
                        # We're going to skip the tokens for this declaration
                        logging.debug("### skip var/func '%s': %s", ident,
                                      repr(b.tokens[i:j]))
                        if i > first:
                            blocks2.append(Block(b.tokens[first:i]))
                        skipTokens = True
                        first = n

                    i += 1

                if i > first:
                    # print "### final '%s'" % repr(b.tokens[first:i])
                    blocks2.append(Block(b.tokens[first:i]))

        self.blocks = blocks2

    def replaceTokens(self, replacements):
        """Replace tokens according to the given dict."""
        for b in self.blocks:
            made_change = False
            if b.isInclude() is None:
                for tok in b.tokens:
                    if tok.kind == TokenKind.IDENTIFIER:
                        if tok.id in replacements:
                            tok.id = replacements[tok.id]
                            made_change = True

                if b.isDefine() and b.define_id in replacements:
                    b.define_id = replacements[b.define_id]
                    made_change = True

            if made_change and b.isIf():
                # Keep 'expr' in sync with 'tokens'.
                b.expr = CppExpr(b.tokens)


def strip_space(s):
    """Strip out redundant space in a given string."""

    # NOTE: It ought to be more clever to not destroy spaces in string tokens.
    replacements = {' . ': '.',
                    ' [': '[',
                    '[ ': '[',
                    ' ]': ']',
                    '( ': '(',
                    ' )': ')',
                    ' ,': ',',
                    '# ': '#',
                    ' ;': ';',
                    '~ ': '~',
                    ' -> ': '->'}
    result = s
    for r in replacements:
        result = result.replace(r, replacements[r])

    # Remove the space between function name and the parenthesis.
    result = re.sub(r'(\w+) \(', r'\1(', result)
    return result


class BlockParser(object):
    """A class that converts an input source file into a BlockList object."""

    def __init__(self, tokzer=None):
        """Initialize a block parser.

        The input source is provided through a Tokenizer object.
        """
        self._tokzer = tokzer
        self._parsed = False

    @property
    def parsed(self):
        return self._parsed

    @staticmethod
    def _short_extent(extent):
        return '%d:%d - %d:%d' % (extent.start.line, extent.start.column,
                                  extent.end.line, extent.end.column)

    def getBlocks(self, tokzer=None):
        """Return all the blocks parsed."""

        def consume_extent(i, tokens, extent=None, detect_change=False):
            """Return tokens that belong to the given extent.

            It parses all the tokens that follow tokens[i], until getting out
            of the extent. When detect_change is True, it may terminate early
            when detecting preprocessing directives inside the extent.
            """

            result = []
            if extent is None:
                extent = tokens[i].cursor.extent

            while i < len(tokens) and tokens[i].location in extent:
                t = tokens[i]
                if debugBlockParser:
                    print ' ' * 2, t.id, t.kind, t.cursor.kind
                if (detect_change and t.cursor.extent != extent and
                    t.cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE):
                    break
                result.append(t)
                i += 1
            return (i, result)

        def consume_line(i, tokens):
            """Return tokens that follow tokens[i] in the same line."""
            result = []
            line = tokens[i].location.line
            while i < len(tokens) and tokens[i].location.line == line:
                if tokens[i].cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE:
                    break
                result.append(tokens[i])
                i += 1
            return (i, result)

        if tokzer is None:
            tokzer = self._tokzer
        tokens = tokzer.tokens

        blocks = []
        buf = []
        i = 0

        while i < len(tokens):
            t = tokens[i]
            cursor = t.cursor

            if debugBlockParser:
                print ("%d: Processing [%s], kind=[%s], cursor=[%s], "
                       "extent=[%s]" % (t.location.line, t.spelling, t.kind,
                                        cursor.kind,
                                        self._short_extent(cursor.extent)))

            if cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE:
                if buf:
                    blocks.append(Block(buf))
                    buf = []

                j = i
                if j + 1 >= len(tokens):
                    raise BadExpectedToken("### BAD TOKEN at %s" % (t.location))
                directive = tokens[j+1].id

                if directive == 'define':
                    if i+2 >= len(tokens):
                        raise BadExpectedToken("### BAD TOKEN at %s" %
                                               (tokens[i].location))

                    # Skip '#' and 'define'.
                    extent = tokens[i].cursor.extent
                    i += 2
                    id = ''
                    # We need to separate the id from the remaining of
                    # the line, especially for the function-like macro.
                    if (i + 1 < len(tokens) and tokens[i+1].id == '(' and
                        (tokens[i].location.column + len(tokens[i].spelling) ==
                         tokens[i+1].location.column)):
                        while i < len(tokens):
                            id += tokens[i].id
                            if tokens[i].spelling == ')':
                                i += 1
                                break
                            i += 1
                    else:
                        id += tokens[i].id
                        # Advance to the next token that follows the macro id
                        i += 1

                    (i, ret) = consume_extent(i, tokens, extent=extent)
                    blocks.append(Block(ret, directive=directive,
                                        lineno=t.location.line, identifier=id))

                else:
                    (i, ret) = consume_extent(i, tokens)
                    blocks.append(Block(ret[2:], directive=directive,
                                        lineno=t.location.line))

            elif cursor.kind == CursorKind.INCLUSION_DIRECTIVE:
                if buf:
                    blocks.append(Block(buf))
                    buf = []
                directive = tokens[i+1].id
                (i, ret) = consume_extent(i, tokens)

                blocks.append(Block(ret[2:], directive=directive,
                                    lineno=t.location.line))

            elif cursor.kind == CursorKind.VAR_DECL:
                if buf:
                    blocks.append(Block(buf))
                    buf = []

                (i, ret) = consume_extent(i, tokens, detect_change=True)
                buf += ret

            elif cursor.kind == CursorKind.FUNCTION_DECL:
                if buf:
                    blocks.append(Block(buf))
                    buf = []

                (i, ret) = consume_extent(i, tokens, detect_change=True)
                buf += ret

            else:
                (i, ret) = consume_line(i, tokens)
                buf += ret

        if buf:
            blocks.append(Block(buf))

        # _parsed=True indicates a successful parsing, although may result an
        # empty BlockList.
        self._parsed = True

        return BlockList(blocks)

    def parse(self, tokzer):
        return self.getBlocks(tokzer)

    def parseFile(self, path):
        return self.getBlocks(CppFileTokenizer(path))


def test_block_parsing(lines, expected):
    """Helper method to test the correctness of BlockParser.parse."""
    blocks = BlockParser().parse(CppStringTokenizer('\n'.join(lines)))
    if len(blocks) != len(expected):
        raise BadExpectedToken("BlockParser.parse() returned '%s' expecting "
                               "'%s'" % (str(blocks), repr(expected)))
    for n in range(len(blocks)):
        if str(blocks[n]) != expected[n]:
            raise BadExpectedToken("BlockParser.parse()[%d] is '%s', "
                                   "expecting '%s'" % (n, str(blocks[n]),
                                                       expected[n]))


def test_BlockParser():
    test_block_parsing(["#error hello"], ["#error hello"])
    test_block_parsing(["foo", "", "bar"], ["foo bar"])

    # We currently cannot handle the following case with libclang properly.
    # Fortunately it doesn't appear in current headers.
    # test_block_parsing(["foo", "  #  ", "bar"], ["foo", "bar"])

    test_block_parsing(["foo",
                        "  #  /* ahah */ if defined(__KERNEL__) /* more */",
                        "bar", "#endif"],
                       ["foo", "#ifdef __KERNEL__", "bar", "#endif"])


################################################################################
################################################################################
#####                                                                      #####
#####        B L O C K   L I S T   O P T I M I Z A T I O N                 #####
#####                                                                      #####
################################################################################
################################################################################


def remove_macro_defines(blocks, excludedMacros=None):
    """Remove macro definitions like #define <macroName>  ...."""
    if excludedMacros is None:
        excludedMacros = set()
    result = []
    for b in blocks:
        macroName = b.isDefine()
        if macroName is None or macroName not in excludedMacros:
            result.append(b)

    return result


def find_matching_endif(blocks, i):
    """Traverse the blocks to find out the matching #endif."""
    n = len(blocks)
    depth = 1
    while i < n:
        if blocks[i].isDirective():
            dir_ = blocks[i].directive
            if dir_ in ["if", "ifndef", "ifdef"]:
                depth += 1
            elif depth == 1 and dir_ in ["else", "elif"]:
                return i
            elif dir_ == "endif":
                depth -= 1
                if depth == 0:
                    return i
        i += 1
    return i


def optimize_if01(blocks):
    """Remove the code between #if 0 .. #endif in a list of CppBlocks."""
    i = 0
    n = len(blocks)
    result = []
    while i < n:
        j = i
        while j < n and not blocks[j].isIf():
            j += 1
        if j > i:
            logging.debug("appending lines %d to %d", blocks[i].lineno,
                          blocks[j-1].lineno)
            result += blocks[i:j]
        if j >= n:
            break
        expr = blocks[j].expr
        r = expr.toInt()
        if r is None:
            result.append(blocks[j])
            i = j + 1
            continue

        if r == 0:
            # if 0 => skip everything until the corresponding #endif
            j = find_matching_endif(blocks, j + 1)
            if j >= n:
                # unterminated #if 0, finish here
                break
            dir_ = blocks[j].directive
            if dir_ == "endif":
                logging.debug("remove 'if 0' .. 'endif' (lines %d to %d)",
                              blocks[i].lineno, blocks[j].lineno)
                i = j + 1
            elif dir_ == "else":
                # convert 'else' into 'if 1'
                logging.debug("convert 'if 0' .. 'else' into 'if 1' (lines %d "
                              "to %d)", blocks[i].lineno, blocks[j-1].lineno)
                blocks[j].directive = "if"
                blocks[j].expr = CppExpr(CppStringTokenizer("1").tokens)
                i = j
            elif dir_ == "elif":
                # convert 'elif' into 'if'
                logging.debug("convert 'if 0' .. 'elif' into 'if'")
                blocks[j].directive = "if"
                i = j
            continue

        # if 1 => find corresponding endif and remove/transform them
        k = find_matching_endif(blocks, j + 1)
        if k >= n:
            # unterminated #if 1, finish here
            logging.debug("unterminated 'if 1'")
            result += blocks[j+1:k]
            break

        dir_ = blocks[k].directive
        if dir_ == "endif":
            logging.debug("convert 'if 1' .. 'endif' (lines %d to %d)",
                          blocks[j].lineno, blocks[k].lineno)
            result += optimize_if01(blocks[j+1:k])
            i = k + 1
        elif dir_ == "else":
            # convert 'else' into 'if 0'
            logging.debug("convert 'if 1' .. 'else' (lines %d to %d)",
                          blocks[j].lineno, blocks[k].lineno)
            result += optimize_if01(blocks[j+1:k])
            blocks[k].directive = "if"
            blocks[k].expr = CppExpr(CppStringTokenizer("0").tokens)
            i = k
        elif dir_ == "elif":
            # convert 'elif' into 'if 0'
            logging.debug("convert 'if 1' .. 'elif' (lines %d to %d)",
                          blocks[j].lineno, blocks[k].lineno)
            result += optimize_if01(blocks[j+1:k])
            blocks[k].expr = CppExpr(CppStringTokenizer("0").tokens)
            i = k
    return result


def test_optimizeAll():
    text = """\
#if 1
#define  GOOD_1
#endif
#if 0
#define  BAD_2
#define  BAD_3
#endif

#if 1
#define  GOOD_2
#else
#define  BAD_4
#endif

#if 0
#define  BAD_5
#else
#define  GOOD_3
#endif

#if defined(__KERNEL__)
#define BAD_KERNEL
#endif

#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)
#define X
#endif

#ifndef SIGRTMAX
#define SIGRTMAX 123
#endif /* SIGRTMAX */

#if 0
#if 1
#define  BAD_6
#endif
#endif\
"""

    expected = """\
#define GOOD_1
#define GOOD_2
#define GOOD_3
#if !defined(__GLIBC__) || __GLIBC__ < 2
#define X
#endif
#ifndef __SIGRTMAX
#define __SIGRTMAX 123
#endif\
"""

    out = utils.StringOutput()
    blocks = BlockParser().parse(CppStringTokenizer(text))
    blocks.replaceTokens(kernel_token_replacements)
    blocks.optimizeAll({"__KERNEL__": kCppUndefinedMacro})
    blocks.write(out)
    if out.get() != expected:
        print "[FAIL]: macro optimization failed\n"
        print "<<<< expecting '",
        print expected,
        print "'\n>>>> result '",
        print out.get(),
        print "'\n----"
        global failure_count
        failure_count += 1


def runUnitTests():
    """Always run all unit tests for this program."""
    test_CppTokenizer()
    test_CppExpr()
    test_optimizeAll()
    test_BlockParser()


failure_count = 0
runUnitTests()
if failure_count != 0:
    utils.panic("Unit tests failed in cpp.py.\n")

登录后可以享受更多权益