普通文本  |  192行  |  5.84 KB

import re
import sys

def negate(condition):
    """
    Returns a CPP conditional that is the opposite of the conditional passed in.
    """
    if condition.startswith('!'):
        return condition[1:]
    return "!" + condition

class Monitor:
    """
    A simple C preprocessor that scans C source and computes, line by line,
    what the current C preprocessor #if state is.

    Doesn't handle everything--for example, if you have /* inside a C string,
    without a matching */ (also inside a C string), or with a */ inside a C
    string but on another line and with preprocessor macros in between...
    the parser will get lost.

    Anyway this implementation seems to work well enough for the CPython sources.
    """

    is_a_simple_defined = re.compile(r'^defined\s*\(\s*[A-Za-z0-9_]+\s*\)$').match

    def __init__(self, filename=None, *, verbose=False):
        self.stack = []
        self.in_comment = False
        self.continuation = None
        self.line_number = 0
        self.filename = filename
        self.verbose = verbose

    def __repr__(self):
        return ''.join((
            '<Monitor ',
            str(id(self)),
            " line=", str(self.line_number),
            " condition=", repr(self.condition()),
            ">"))

    def status(self):
        return str(self.line_number).rjust(4) + ": " + self.condition()

    def condition(self):
        """
        Returns the current preprocessor state, as a single #if condition.
        """
        return " && ".join(condition for token, condition in self.stack)

    def fail(self, *a):
        if self.filename:
            filename = " " + self.filename
        else:
            filename = ''
        print("Error at" + filename, "line", self.line_number, ":")
        print("   ", ' '.join(str(x) for x in a))
        sys.exit(-1)

    def close(self):
        if self.stack:
            self.fail("Ended file while still in a preprocessor conditional block!")

    def write(self, s):
        for line in s.split("\n"):
            self.writeline(line)

    def writeline(self, line):
        self.line_number += 1
        line = line.strip()

        def pop_stack():
            if not self.stack:
                self.fail("#" + token + " without matching #if / #ifdef / #ifndef!")
            return self.stack.pop()

        if self.continuation:
            line = self.continuation + line
            self.continuation = None

        if not line:
            return

        if line.endswith('\\'):
            self.continuation = line[:-1].rstrip() + " "
            return

        # we have to ignore preprocessor commands inside comments
        #
        # we also have to handle this:
        #     /* start
        #     ...
        #     */   /*    <-- tricky!
        #     ...
        #     */
        # and this:
        #     /* start
        #     ...
        #     */   /* also tricky! */
        if self.in_comment:
            if '*/' in line:
                # snip out the comment and continue
                #
                # GCC allows
                #    /* comment
                #    */ #include <stdio.h>
                # maybe other compilers too?
                _, _, line = line.partition('*/')
                self.in_comment = False

        while True:
            if '/*' in line:
                if self.in_comment:
                    self.fail("Nested block comment!")

                before, _, remainder = line.partition('/*')
                comment, comment_ends, after = remainder.partition('*/')
                if comment_ends:
                    # snip out the comment
                    line = before.rstrip() + ' ' + after.lstrip()
                    continue
                # comment continues to eol
                self.in_comment = True
                line = before.rstrip()
            break

        # we actually have some // comments
        # (but block comments take precedence)
        before, line_comment, comment = line.partition('//')
        if line_comment:
            line = before.rstrip()

        if not line.startswith('#'):
            return

        line = line[1:].lstrip()
        assert line

        fields = line.split()
        token = fields[0].lower()
        condition = ' '.join(fields[1:]).strip()

        if_tokens = {'if', 'ifdef', 'ifndef'}
        all_tokens = if_tokens | {'elif', 'else', 'endif'}

        if token not in all_tokens:
            return

        # cheat a little here, to reuse the implementation of if
        if token == 'elif':
            pop_stack()
            token = 'if'

        if token in if_tokens:
            if not condition:
                self.fail("Invalid format for #" + token + " line: no argument!")
            if token == 'if':
                if not self.is_a_simple_defined(condition):
                    condition = "(" + condition + ")"
            else:
                fields = condition.split()
                if len(fields) != 1:
                    self.fail("Invalid format for #" + token + " line: should be exactly one argument!")
                symbol = fields[0]
                condition = 'defined(' + symbol + ')'
                if token == 'ifndef':
                    condition = '!' + condition

            self.stack.append(("if", condition))
            if self.verbose:
                print(self.status())
            return

        previous_token, previous_condition = pop_stack()

        if token == 'else':
            self.stack.append(('else', negate(previous_condition)))
        elif token == 'endif':
            pass
        if self.verbose:
            print(self.status())

if __name__ == '__main__':
    for filename in sys.argv[1:]:
        with open(filename, "rt") as f:
            cpp = Monitor(filename, verbose=True)
            print()
            print(filename)
            for line_number, line in enumerate(f.read().split('\n'), 1):
                cpp.writeline(line)