#!/usr/bin/env python
#
# Copyright 2007 Neal Norwitz
# Portions Copyright 2007 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Generate an Abstract Syntax Tree (AST) for C++."""

__author__ = 'nnorwitz@google.com (Neal Norwitz)'


# TODO:
#  * Tokens should never be exported, need to convert to Nodes
#    (return types, parameters, etc.)
#  * Handle static class data for templatized classes
#  * Handle casts (both C++ and C-style)
#  * Handle conditions and loops (if/else, switch, for, while/do)
#
# TODO much, much later:
#  * Handle #define
#  * exceptions


try:
    # Python 3.x
    import builtins
except ImportError:
    # Python 2.x
    import __builtin__ as builtins

import sys
import traceback

from cpp import keywords
from cpp import tokenize
from cpp import utils


if not hasattr(builtins, 'reversed'):
    # Support Python 2.3 and earlier.
    def reversed(seq):
        for i in range(len(seq)-1, -1, -1):
            yield seq[i]

if not hasattr(builtins, 'next'):
    # Support Python 2.5 and earlier.
    def next(obj):
        return obj.next()


VISIBILITY_PUBLIC, VISIBILITY_PROTECTED, VISIBILITY_PRIVATE = range(3)

FUNCTION_NONE = 0x00
FUNCTION_CONST = 0x01
FUNCTION_VIRTUAL = 0x02
FUNCTION_PURE_VIRTUAL = 0x04
FUNCTION_CTOR = 0x08
FUNCTION_DTOR = 0x10
FUNCTION_ATTRIBUTE = 0x20
FUNCTION_UNKNOWN_ANNOTATION = 0x40
FUNCTION_THROW = 0x80

"""
These are currently unused.  Should really handle these properly at some point.

TYPE_MODIFIER_INLINE   = 0x010000
TYPE_MODIFIER_EXTERN   = 0x020000
TYPE_MODIFIER_STATIC   = 0x040000
TYPE_MODIFIER_CONST    = 0x080000
TYPE_MODIFIER_REGISTER = 0x100000
TYPE_MODIFIER_VOLATILE = 0x200000
TYPE_MODIFIER_MUTABLE  = 0x400000

TYPE_MODIFIER_MAP = {
    'inline': TYPE_MODIFIER_INLINE,
    'extern': TYPE_MODIFIER_EXTERN,
    'static': TYPE_MODIFIER_STATIC,
    'const': TYPE_MODIFIER_CONST,
    'register': TYPE_MODIFIER_REGISTER,
    'volatile': TYPE_MODIFIER_VOLATILE,
    'mutable': TYPE_MODIFIER_MUTABLE,
    }
"""

_INTERNAL_TOKEN = 'internal'
_NAMESPACE_POP = 'ns-pop'


# TODO(nnorwitz): use this as a singleton for templated_types, etc
# where we don't want to create a new empty dict each time.  It is also const.
class _NullDict(object):
    __contains__ = lambda self: False
    keys = values = items = iterkeys = itervalues = iteritems = lambda self: ()


# TODO(nnorwitz): move AST nodes into a separate module.
class Node(object):
    """Base AST node."""

    def __init__(self, start, end):
        self.start = start
        self.end = end

    def IsDeclaration(self):
        """Returns bool if this node is a declaration."""
        return False

    def IsDefinition(self):
        """Returns bool if this node is a definition."""
        return False

    def IsExportable(self):
        """Returns bool if this node exportable from a header file."""
        return False

    def Requires(self, node):
        """Does this AST node require the definition of the node passed in?"""
        return False

    def XXX__str__(self):
        return self._StringHelper(self.__class__.__name__, '')

    def _StringHelper(self, name, suffix):
        if not utils.DEBUG:
            return '%s(%s)' % (name, suffix)
        return '%s(%d, %d, %s)' % (name, self.start, self.end, suffix)

    def __repr__(self):
        return str(self)


class Define(Node):
    def __init__(self, start, end, name, definition):
        Node.__init__(self, start, end)
        self.name = name
        self.definition = definition

    def __str__(self):
        value = '%s %s' % (self.name, self.definition)
        return self._StringHelper(self.__class__.__name__, value)


class Include(Node):
    def __init__(self, start, end, filename, system):
        Node.__init__(self, start, end)
        self.filename = filename
        self.system = system

    def __str__(self):
        fmt = '"%s"'
        if self.system:
            fmt = '<%s>'
        return self._StringHelper(self.__class__.__name__, fmt % self.filename)


class Goto(Node):
    def __init__(self, start, end, label):
        Node.__init__(self, start, end)
        self.label = label

    def __str__(self):
        return self._StringHelper(self.__class__.__name__, str(self.label))


class Expr(Node):
    def __init__(self, start, end, expr):
        Node.__init__(self, start, end)
        self.expr = expr

    def Requires(self, node):
        # TODO(nnorwitz): impl.
        return False

    def __str__(self):
        return self._StringHelper(self.__class__.__name__, str(self.expr))


class Return(Expr):
    pass


class Delete(Expr):
    pass


class Friend(Expr):
    def __init__(self, start, end, expr, namespace):
        Expr.__init__(self, start, end, expr)
        self.namespace = namespace[:]


class Using(Node):
    def __init__(self, start, end, names):
        Node.__init__(self, start, end)
        self.names = names

    def __str__(self):
        return self._StringHelper(self.__class__.__name__, str(self.names))


class Parameter(Node):
    def __init__(self, start, end, name, parameter_type, default):
        Node.__init__(self, start, end)
        self.name = name
        self.type = parameter_type
        self.default = default

    def Requires(self, node):
        # TODO(nnorwitz): handle namespaces, etc.
        return self.type.name == node.name

    def __str__(self):
        name = str(self.type)
        suffix = '%s %s' % (name, self.name)
        if self.default:
            suffix += ' = ' + ''.join([d.name for d in self.default])
        return self._StringHelper(self.__class__.__name__, suffix)


class _GenericDeclaration(Node):
    def __init__(self, start, end, name, namespace):
        Node.__init__(self, start, end)
        self.name = name
        self.namespace = namespace[:]

    def FullName(self):
        prefix = ''
        if self.namespace and self.namespace[-1]:
            prefix = '::'.join(self.namespace) + '::'
        return prefix + self.name

    def _TypeStringHelper(self, suffix):
        if self.namespace:
            names = [n or '<anonymous>' for n in self.namespace]
            suffix += ' in ' + '::'.join(names)
        return self._StringHelper(self.__class__.__name__, suffix)


# TODO(nnorwitz): merge with Parameter in some way?
class VariableDeclaration(_GenericDeclaration):
    def __init__(self, start, end, name, var_type, initial_value, namespace):
        _GenericDeclaration.__init__(self, start, end, name, namespace)
        self.type = var_type
        self.initial_value = initial_value

    def Requires(self, node):
        # TODO(nnorwitz): handle namespaces, etc.
        return self.type.name == node.name

    def ToString(self):
        """Return a string that tries to reconstitute the variable decl."""
        suffix = '%s %s' % (self.type, self.name)
        if self.initial_value:
            suffix += ' = ' + self.initial_value
        return suffix

    def __str__(self):
        return self._StringHelper(self.__class__.__name__, self.ToString())


class Typedef(_GenericDeclaration):
    def __init__(self, start, end, name, alias, namespace):
        _GenericDeclaration.__init__(self, start, end, name, namespace)
        self.alias = alias

    def IsDefinition(self):
        return True

    def IsExportable(self):
        return True

    def Requires(self, node):
        # TODO(nnorwitz): handle namespaces, etc.
        name = node.name
        for token in self.alias:
            if token is not None and name == token.name:
                return True
        return False

    def __str__(self):
        suffix = '%s, %s' % (self.name, self.alias)
        return self._TypeStringHelper(suffix)


class _NestedType(_GenericDeclaration):
    def __init__(self, start, end, name, fields, namespace):
        _GenericDeclaration.__init__(self, start, end, name, namespace)
        self.fields = fields

    def IsDefinition(self):
        return True

    def IsExportable(self):
        return True

    def __str__(self):
        suffix = '%s, {%s}' % (self.name, self.fields)
        return self._TypeStringHelper(suffix)


class Union(_NestedType):
    pass


class Enum(_NestedType):
    pass


class Class(_GenericDeclaration):
    def __init__(self, start, end, name, bases, templated_types, body, namespace):
        _GenericDeclaration.__init__(self, start, end, name, namespace)
        self.bases = bases
        self.body = body
        self.templated_types = templated_types

    def IsDeclaration(self):
        return self.bases is None and self.body is None

    def IsDefinition(self):
        return not self.IsDeclaration()

    def IsExportable(self):
        return not self.IsDeclaration()

    def Requires(self, node):
        # TODO(nnorwitz): handle namespaces, etc.
        if self.bases:
            for token_list in self.bases:
                # TODO(nnorwitz): bases are tokens, do name comparision.
                for token in token_list:
                    if token.name == node.name:
                        return True
        # TODO(nnorwitz): search in body too.
        return False

    def __str__(self):
        name = self.name
        if self.templated_types:
            name += '<%s>' % self.templated_types
        suffix = '%s, %s, %s' % (name, self.bases, self.body)
        return self._TypeStringHelper(suffix)


class Struct(Class):
    pass


class Function(_GenericDeclaration):
    def __init__(self, start, end, name, return_type, parameters,
                 modifiers, templated_types, body, namespace):
        _GenericDeclaration.__init__(self, start, end, name, namespace)
        converter = TypeConverter(namespace)
        self.return_type = converter.CreateReturnType(return_type)
        self.parameters = converter.ToParameters(parameters)
        self.modifiers = modifiers
        self.body = body
        self.templated_types = templated_types

    def IsDeclaration(self):
        return self.body is None

    def IsDefinition(self):
        return self.body is not None

    def IsExportable(self):
        if self.return_type and 'static' in self.return_type.modifiers:
            return False
        return None not in self.namespace

    def Requires(self, node):
        if self.parameters:
            # TODO(nnorwitz): parameters are tokens, do name comparision.
            for p in self.parameters:
                if p.name == node.name:
                    return True
        # TODO(nnorwitz): search in body too.
        return False

    def __str__(self):
        # TODO(nnorwitz): add templated_types.
        suffix = ('%s %s(%s), 0x%02x, %s' %
                  (self.return_type, self.name, self.parameters,
                   self.modifiers, self.body))
        return self._TypeStringHelper(suffix)


class Method(Function):
    def __init__(self, start, end, name, in_class, return_type, parameters,
                 modifiers, templated_types, body, namespace):
        Function.__init__(self, start, end, name, return_type, parameters,
                          modifiers, templated_types, body, namespace)
        # TODO(nnorwitz): in_class could also be a namespace which can
        # mess up finding functions properly.
        self.in_class = in_class


class Type(_GenericDeclaration):
    """Type used for any variable (eg class, primitive, struct, etc)."""

    def __init__(self, start, end, name, templated_types, modifiers,
                 reference, pointer, array):
        """
        Args:
          name: str name of main type
          templated_types: [Class (Type?)] template type info between <>
          modifiers: [str] type modifiers (keywords) eg, const, mutable, etc.
          reference, pointer, array: bools
        """
        _GenericDeclaration.__init__(self, start, end, name, [])
        self.templated_types = templated_types
        if not name and modifiers:
            self.name = modifiers.pop()
        self.modifiers = modifiers
        self.reference = reference
        self.pointer = pointer
        self.array = array

    def __str__(self):
        prefix = ''
        if self.modifiers:
            prefix = ' '.join(self.modifiers) + ' '
        name = str(self.name)
        if self.templated_types:
            name += '<%s>' % self.templated_types
        suffix = prefix + name
        if self.reference:
            suffix += '&'
        if self.pointer:
            suffix += '*'
        if self.array:
            suffix += '[]'
        return self._TypeStringHelper(suffix)

    # By definition, Is* are always False.  A Type can only exist in
    # some sort of variable declaration, parameter, or return value.
    def IsDeclaration(self):
        return False

    def IsDefinition(self):
        return False

    def IsExportable(self):
        return False


class TypeConverter(object):

    def __init__(self, namespace_stack):
        self.namespace_stack = namespace_stack

    def _GetTemplateEnd(self, tokens, start):
        count = 1
        end = start
        while 1:
            token = tokens[end]
            end += 1
            if token.name == '<':
                count += 1
            elif token.name == '>':
                count -= 1
                if count == 0:
                    break
        return tokens[start:end-1], end

    def ToType(self, tokens):
        """Convert [Token,...] to [Class(...), ] useful for base classes.
        For example, code like class Foo : public Bar<x, y> { ... };
        the "Bar<x, y>" portion gets converted to an AST.

        Returns:
          [Class(...), ...]
        """
        result = []
        name_tokens = []
        reference = pointer = array = False

        def AddType(templated_types):
            # Partition tokens into name and modifier tokens.
            names = []
            modifiers = []
            for t in name_tokens:
                if keywords.IsKeyword(t.name):
                    modifiers.append(t.name)
                else:
                    names.append(t.name)
            name = ''.join(names)
            result.append(Type(name_tokens[0].start, name_tokens[-1].end,
                               name, templated_types, modifiers,
                               reference, pointer, array))
            del name_tokens[:]

        i = 0
        end = len(tokens)
        while i < end:
            token = tokens[i]
            if token.name == '<':
                new_tokens, new_end = self._GetTemplateEnd(tokens, i+1)
                AddType(self.ToType(new_tokens))
                # If there is a comma after the template, we need to consume
                # that here otherwise it becomes part of the name.
                i = new_end
                reference = pointer = array = False
            elif token.name == ',':
                AddType([])
                reference = pointer = array = False
            elif token.name == '*':
                pointer = True
            elif token.name == '&':
                reference = True
            elif token.name == '[':
               pointer = True
            elif token.name == ']':
                pass
            else:
                name_tokens.append(token)
            i += 1

        if name_tokens:
            # No '<' in the tokens, just a simple name and no template.
            AddType([])
        return result

    def DeclarationToParts(self, parts, needs_name_removed):
        name = None
        default = []
        if needs_name_removed:
            # Handle default (initial) values properly.
            for i, t in enumerate(parts):
                if t.name == '=':
                    default = parts[i+1:]
                    name = parts[i-1].name
                    if name == ']' and parts[i-2].name == '[':
                        name = parts[i-3].name
                        i -= 1
                    parts = parts[:i-1]
                    break
            else:
                if parts[-1].token_type == tokenize.NAME:
                    name = parts.pop().name
                else:
                    # TODO(nnorwitz): this is a hack that happens for code like
                    # Register(Foo<T>); where it thinks this is a function call
                    # but it's actually a declaration.
                    name = '???'
        modifiers = []
        type_name = []
        other_tokens = []
        templated_types = []
        i = 0
        end = len(parts)
        while i < end:
            p = parts[i]
            if keywords.IsKeyword(p.name):
                modifiers.append(p.name)
            elif p.name == '<':
                templated_tokens, new_end = self._GetTemplateEnd(parts, i+1)
                templated_types = self.ToType(templated_tokens)
                i = new_end - 1
                # Don't add a spurious :: to data members being initialized.
                next_index = i + 1
                if next_index < end and parts[next_index].name == '::':
                    i += 1
            elif p.name in ('[', ']', '='):
                # These are handled elsewhere.
                other_tokens.append(p)
            elif p.name not in ('*', '&', '>'):
                # Ensure that names have a space between them.
                if (type_name and type_name[-1].token_type == tokenize.NAME and
                    p.token_type == tokenize.NAME):
                    type_name.append(tokenize.Token(tokenize.SYNTAX, ' ', 0, 0))
                type_name.append(p)
            else:
                other_tokens.append(p)
            i += 1
        type_name = ''.join([t.name for t in type_name])
        return name, type_name, templated_types, modifiers, default, other_tokens

    def ToParameters(self, tokens):
        if not tokens:
            return []

        result = []
        name = type_name = ''
        type_modifiers = []
        pointer = reference = array = False
        first_token = None
        default = []

        def AddParameter():
            if default:
                del default[0]  # Remove flag.
            end = type_modifiers[-1].end
            parts = self.DeclarationToParts(type_modifiers, True)
            (name, type_name, templated_types, modifiers,
             unused_default, unused_other_tokens) = parts
            parameter_type = Type(first_token.start, first_token.end,
                                  type_name, templated_types, modifiers,
                                  reference, pointer, array)
            p = Parameter(first_token.start, end, name,
                          parameter_type, default)
            result.append(p)

        template_count = 0
        for s in tokens:
            if not first_token:
                first_token = s
            if s.name == '<':
                template_count += 1
            elif s.name == '>':
                template_count -= 1
            if template_count > 0:
                type_modifiers.append(s)
                continue

            if s.name == ',':
                AddParameter()
                name = type_name = ''
                type_modifiers = []
                pointer = reference = array = False
                first_token = None
                default = []
            elif s.name == '*':
                pointer = True
            elif s.name == '&':
                reference = True
            elif s.name == '[':
                array = True
            elif s.name == ']':
                pass  # Just don't add to type_modifiers.
            elif s.name == '=':
                # Got a default value.  Add any value (None) as a flag.
                default.append(None)
            elif default:
                default.append(s)
            else:
                type_modifiers.append(s)
        AddParameter()
        return result

    def CreateReturnType(self, return_type_seq):
        if not return_type_seq:
            return None
        start = return_type_seq[0].start
        end = return_type_seq[-1].end
        _, name, templated_types, modifiers, default, other_tokens = \
           self.DeclarationToParts(return_type_seq, False)
        names = [n.name for n in other_tokens]
        reference = '&' in names
        pointer = '*' in names
        array = '[' in names
        return Type(start, end, name, templated_types, modifiers,
                    reference, pointer, array)

    def GetTemplateIndices(self, names):
        # names is a list of strings.
        start = names.index('<')
        end = len(names) - 1
        while end > 0:
            if names[end] == '>':
                break
            end -= 1
        return start, end+1

class AstBuilder(object):
    def __init__(self, token_stream, filename, in_class='', visibility=None,
                 namespace_stack=[]):
        self.tokens = token_stream
        self.filename = filename
        # TODO(nnorwitz): use a better data structure (deque) for the queue.
        # Switching directions of the "queue" improved perf by about 25%.
        # Using a deque should be even better since we access from both sides.
        self.token_queue = []
        self.namespace_stack = namespace_stack[:]
        self.in_class = in_class
        if in_class is None:
            self.in_class_name_only = None
        else:
            self.in_class_name_only = in_class.split('::')[-1]
        self.visibility = visibility
        self.in_function = False
        self.current_token = None
        # Keep the state whether we are currently handling a typedef or not.
        self._handling_typedef = False

        self.converter = TypeConverter(self.namespace_stack)

    def HandleError(self, msg, token):
        printable_queue = list(reversed(self.token_queue[-20:]))
        sys.stderr.write('Got %s in %s @ %s %s\n' %
                         (msg, self.filename, token, printable_queue))

    def Generate(self):
        while 1:
            token = self._GetNextToken()
            if not token:
                break

            # Get the next token.
            self.current_token = token

            # Dispatch on the next token type.
            if token.token_type == _INTERNAL_TOKEN:
                if token.name == _NAMESPACE_POP:
                    self.namespace_stack.pop()
                continue

            try:
                result = self._GenerateOne(token)
                if result is not None:
                    yield result
            except:
                self.HandleError('exception', token)
                raise

    def _CreateVariable(self, pos_token, name, type_name, type_modifiers,
                        ref_pointer_name_seq, templated_types, value=None):
        reference = '&' in ref_pointer_name_seq
        pointer = '*' in ref_pointer_name_seq
        array = '[' in ref_pointer_name_seq
        var_type = Type(pos_token.start, pos_token.end, type_name,
                        templated_types, type_modifiers,
                        reference, pointer, array)
        return VariableDeclaration(pos_token.start, pos_token.end,
                                   name, var_type, value, self.namespace_stack)

    def _GenerateOne(self, token):
        if token.token_type == tokenize.NAME:
            if (keywords.IsKeyword(token.name) and
                not keywords.IsBuiltinType(token.name)):
                method = getattr(self, 'handle_' + token.name)
                return method()
            elif token.name == self.in_class_name_only:
                # The token name is the same as the class, must be a ctor if
                # there is a paren.  Otherwise, it's the return type.
                # Peek ahead to get the next token to figure out which.
                next = self._GetNextToken()
                self._AddBackToken(next)
                if next.token_type == tokenize.SYNTAX and next.name == '(':
                    return self._GetMethod([token], FUNCTION_CTOR, None, True)
                # Fall through--handle like any other method.

            # Handle data or function declaration/definition.
            syntax = tokenize.SYNTAX
            temp_tokens, last_token = \
                self._GetVarTokensUpTo(syntax, '(', ';', '{', '[')
            temp_tokens.insert(0, token)
            if last_token.name == '(':
                # If there is an assignment before the paren,
                # this is an expression, not a method.
                expr = bool([e for e in temp_tokens if e.name == '='])
                if expr:
                    new_temp = self._GetTokensUpTo(tokenize.SYNTAX, ';')
                    temp_tokens.append(last_token)
                    temp_tokens.extend(new_temp)
                    last_token = tokenize.Token(tokenize.SYNTAX, ';', 0, 0)

            if last_token.name == '[':
                # Handle array, this isn't a method, unless it's an operator.
                # TODO(nnorwitz): keep the size somewhere.
                # unused_size = self._GetTokensUpTo(tokenize.SYNTAX, ']')
                temp_tokens.append(last_token)
                if temp_tokens[-2].name == 'operator':
                    temp_tokens.append(self._GetNextToken())
                else:
                    temp_tokens2, last_token = \
                        self._GetVarTokensUpTo(tokenize.SYNTAX, ';')
                    temp_tokens.extend(temp_tokens2)

            if last_token.name == ';':
                # Handle data, this isn't a method.
                parts = self.converter.DeclarationToParts(temp_tokens, True)
                (name, type_name, templated_types, modifiers, default,
                 unused_other_tokens) = parts

                t0 = temp_tokens[0]
                names = [t.name for t in temp_tokens]
                if templated_types:
                    start, end = self.converter.GetTemplateIndices(names)
                    names = names[:start] + names[end:]
                default = ''.join([t.name for t in default])
                return self._CreateVariable(t0, name, type_name, modifiers,
                                            names, templated_types, default)
            if last_token.name == '{':
                self._AddBackTokens(temp_tokens[1:])
                self._AddBackToken(last_token)
                method_name = temp_tokens[0].name
                method = getattr(self, 'handle_' + method_name, None)
                if not method:
                    # Must be declaring a variable.
                    # TODO(nnorwitz): handle the declaration.
                    return None
                return method()
            return self._GetMethod(temp_tokens, 0, None, False)
        elif token.token_type == tokenize.SYNTAX:
            if token.name == '~' and self.in_class:
                # Must be a dtor (probably not in method body).
                token = self._GetNextToken()
                # self.in_class can contain A::Name, but the dtor will only
                # be Name.  Make sure to compare against the right value.
                if (token.token_type == tokenize.NAME and
                    token.name == self.in_class_name_only):
                    return self._GetMethod([token], FUNCTION_DTOR, None, True)
            # TODO(nnorwitz): handle a lot more syntax.
        elif token.token_type == tokenize.PREPROCESSOR:
            # TODO(nnorwitz): handle more preprocessor directives.
            # token starts with a #, so remove it and strip whitespace.
            name = token.name[1:].lstrip()
            if name.startswith('include'):
                # Remove "include".
                name = name[7:].strip()
                assert name
                # Handle #include \<newline> "header-on-second-line.h".
                if name.startswith('\\'):
                    name = name[1:].strip()
                assert name[0] in '<"', token
                assert name[-1] in '>"', token
                system = name[0] == '<'
                filename = name[1:-1]
                return Include(token.start, token.end, filename, system)
            if name.startswith('define'):
                # Remove "define".
                name = name[6:].strip()
                assert name
                value = ''
                for i, c in enumerate(name):
                    if c.isspace():
                        value = name[i:].lstrip()
                        name = name[:i]
                        break
                return Define(token.start, token.end, name, value)
            if name.startswith('if') and name[2:3].isspace():
                condition = name[3:].strip()
                if condition.startswith('0') or condition.startswith('(0)'):
                    self._SkipIf0Blocks()
        return None

    def _GetTokensUpTo(self, expected_token_type, expected_token):
        return self._GetVarTokensUpTo(expected_token_type, expected_token)[0]

    def _GetVarTokensUpTo(self, expected_token_type, *expected_tokens):
        last_token = self._GetNextToken()
        tokens = []
        while (last_token.token_type != expected_token_type or
               last_token.name not in expected_tokens):
            tokens.append(last_token)
            last_token = self._GetNextToken()
        return tokens, last_token

    # TODO(nnorwitz): remove _IgnoreUpTo() it shouldn't be necesary.
    def _IgnoreUpTo(self, token_type, token):
        unused_tokens = self._GetTokensUpTo(token_type, token)

    def _SkipIf0Blocks(self):
        count = 1
        while 1:
            token = self._GetNextToken()
            if token.token_type != tokenize.PREPROCESSOR:
                continue

            name = token.name[1:].lstrip()
            if name.startswith('endif'):
                count -= 1
                if count == 0:
                    break
            elif name.startswith('if'):
                count += 1

    def _GetMatchingChar(self, open_paren, close_paren, GetNextToken=None):
        if GetNextToken is None:
            GetNextToken = self._GetNextToken
        # Assumes the current token is open_paren and we will consume
        # and return up to the close_paren.
        count = 1
        token = GetNextToken()
        while 1:
            if token.token_type == tokenize.SYNTAX:
                if token.name == open_paren:
                    count += 1
                elif token.name == close_paren:
                    count -= 1
                    if count == 0:
                        break
            yield token
            token = GetNextToken()
        yield token

    def _GetParameters(self):
        return self._GetMatchingChar('(', ')')

    def GetScope(self):
        return self._GetMatchingChar('{', '}')

    def _GetNextToken(self):
        if self.token_queue:
            return self.token_queue.pop()
        return next(self.tokens)

    def _AddBackToken(self, token):
        if token.whence == tokenize.WHENCE_STREAM:
            token.whence = tokenize.WHENCE_QUEUE
            self.token_queue.insert(0, token)
        else:
            assert token.whence == tokenize.WHENCE_QUEUE, token
            self.token_queue.append(token)

    def _AddBackTokens(self, tokens):
        if tokens:
            if tokens[-1].whence == tokenize.WHENCE_STREAM:
                for token in tokens:
                    token.whence = tokenize.WHENCE_QUEUE
                self.token_queue[:0] = reversed(tokens)
            else:
                assert tokens[-1].whence == tokenize.WHENCE_QUEUE, tokens
                self.token_queue.extend(reversed(tokens))

    def GetName(self, seq=None):
        """Returns ([tokens], next_token_info)."""
        GetNextToken = self._GetNextToken
        if seq is not None:
            it = iter(seq)
            GetNextToken = lambda: next(it)
        next_token = GetNextToken()
        tokens = []
        last_token_was_name = False
        while (next_token.token_type == tokenize.NAME or
               (next_token.token_type == tokenize.SYNTAX and
                next_token.name in ('::', '<'))):
            # Two NAMEs in a row means the identifier should terminate.
            # It's probably some sort of variable declaration.
            if last_token_was_name and next_token.token_type == tokenize.NAME:
                break
            last_token_was_name = next_token.token_type == tokenize.NAME
            tokens.append(next_token)
            # Handle templated names.
            if next_token.name == '<':
                tokens.extend(self._GetMatchingChar('<', '>', GetNextToken))
                last_token_was_name = True
            next_token = GetNextToken()
        return tokens, next_token

    def GetMethod(self, modifiers, templated_types):
        return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(')
        assert len(return_type_and_name) >= 1
        return self._GetMethod(return_type_and_name, modifiers, templated_types,
                               False)

    def _GetMethod(self, return_type_and_name, modifiers, templated_types,
                   get_paren):
        template_portion = None
        if get_paren:
            token = self._GetNextToken()
            assert token.token_type == tokenize.SYNTAX, token
            if token.name == '<':
                # Handle templatized dtors.
                template_portion = [token]
                template_portion.extend(self._GetMatchingChar('<', '>'))
                token = self._GetNextToken()
            assert token.token_type == tokenize.SYNTAX, token
            assert token.name == '(', token

        name = return_type_and_name.pop()
        # Handle templatized ctors.
        if name.name == '>':
            index = 1
            while return_type_and_name[index].name != '<':
                index += 1
            template_portion = return_type_and_name[index:] + [name]
            del return_type_and_name[index:]
            name = return_type_and_name.pop()
        elif name.name == ']':
            rt = return_type_and_name
            assert rt[-1].name == '[', return_type_and_name
            assert rt[-2].name == 'operator', return_type_and_name
            name_seq = return_type_and_name[-2:]
            del return_type_and_name[-2:]
            name = tokenize.Token(tokenize.NAME, 'operator[]',
                                  name_seq[0].start, name.end)
            # Get the open paren so _GetParameters() below works.
            unused_open_paren = self._GetNextToken()

        # TODO(nnorwitz): store template_portion.
        return_type = return_type_and_name
        indices = name
        if return_type:
            indices = return_type[0]

        # Force ctor for templatized ctors.
        if name.name == self.in_class and not modifiers:
            modifiers |= FUNCTION_CTOR
        parameters = list(self._GetParameters())
        del parameters[-1]              # Remove trailing ')'.

        # Handling operator() is especially weird.
        if name.name == 'operator' and not parameters:
            token = self._GetNextToken()
            assert token.name == '(', token
            parameters = list(self._GetParameters())
            del parameters[-1]          # Remove trailing ')'.

        token = self._GetNextToken()
        while token.token_type == tokenize.NAME:
            modifier_token = token
            token = self._GetNextToken()
            if modifier_token.name == 'const':
                modifiers |= FUNCTION_CONST
            elif modifier_token.name == '__attribute__':
                # TODO(nnorwitz): handle more __attribute__ details.
                modifiers |= FUNCTION_ATTRIBUTE
                assert token.name == '(', token
                # Consume everything between the (parens).
                unused_tokens = list(self._GetMatchingChar('(', ')'))
                token = self._GetNextToken()
            elif modifier_token.name == 'throw':
                modifiers |= FUNCTION_THROW
                assert token.name == '(', token
                # Consume everything between the (parens).
                unused_tokens = list(self._GetMatchingChar('(', ')'))
                token = self._GetNextToken()
            elif modifier_token.name == modifier_token.name.upper():
                # HACK(nnorwitz):  assume that all upper-case names
                # are some macro we aren't expanding.
                modifiers |= FUNCTION_UNKNOWN_ANNOTATION
            else:
                self.HandleError('unexpected token', modifier_token)

        assert token.token_type == tokenize.SYNTAX, token
        # Handle ctor initializers.
        if token.name == ':':
            # TODO(nnorwitz): anything else to handle for initializer list?
            while token.name != ';' and token.name != '{':
                token = self._GetNextToken()

        # Handle pointer to functions that are really data but look
        # like method declarations.
        if token.name == '(':
            if parameters[0].name == '*':
                # name contains the return type.
                name = parameters.pop()
                # parameters contains the name of the data.
                modifiers = [p.name for p in parameters]
                # Already at the ( to open the parameter list.
                function_parameters = list(self._GetMatchingChar('(', ')'))
                del function_parameters[-1]  # Remove trailing ')'.
                # TODO(nnorwitz): store the function_parameters.
                token = self._GetNextToken()
                assert token.token_type == tokenize.SYNTAX, token
                assert token.name == ';', token
                return self._CreateVariable(indices, name.name, indices.name,
                                            modifiers, '', None)
            # At this point, we got something like:
            #  return_type (type::*name_)(params);
            # This is a data member called name_ that is a function pointer.
            # With this code: void (sq_type::*field_)(string&);
            # We get: name=void return_type=[] parameters=sq_type ... field_
            # TODO(nnorwitz): is return_type always empty?
            # TODO(nnorwitz): this isn't even close to being correct.
            # Just put in something so we don't crash and can move on.
            real_name = parameters[-1]
            modifiers = [p.name for p in self._GetParameters()]
            del modifiers[-1]           # Remove trailing ')'.
            return self._CreateVariable(indices, real_name.name, indices.name,
                                        modifiers, '', None)

        if token.name == '{':
            body = list(self.GetScope())
            del body[-1]                # Remove trailing '}'.
        else:
            body = None
            if token.name == '=':
                token = self._GetNextToken()
                assert token.token_type == tokenize.CONSTANT, token
                assert token.name == '0', token
                modifiers |= FUNCTION_PURE_VIRTUAL
                token = self._GetNextToken()

            if token.name == '[':
                # TODO(nnorwitz): store tokens and improve parsing.
                # template <typename T, size_t N> char (&ASH(T (&seq)[N]))[N];
                tokens = list(self._GetMatchingChar('[', ']'))
                token = self._GetNextToken()

            assert token.name == ';', (token, return_type_and_name, parameters)

        # Looks like we got a method, not a function.
        if len(return_type) > 2 and return_type[-1].name == '::':
            return_type, in_class = \
                         self._GetReturnTypeAndClassName(return_type)
            return Method(indices.start, indices.end, name.name, in_class,
                          return_type, parameters, modifiers, templated_types,
                          body, self.namespace_stack)
        return Function(indices.start, indices.end, name.name, return_type,
                        parameters, modifiers, templated_types, body,
                        self.namespace_stack)

    def _GetReturnTypeAndClassName(self, token_seq):
        # Splitting the return type from the class name in a method
        # can be tricky.  For example, Return::Type::Is::Hard::To::Find().
        # Where is the return type and where is the class name?
        # The heuristic used is to pull the last name as the class name.
        # This includes all the templated type info.
        # TODO(nnorwitz): if there is only One name like in the
        # example above, punt and assume the last bit is the class name.

        # Ignore a :: prefix, if exists so we can find the first real name.
        i = 0
        if token_seq[0].name == '::':
            i = 1
        # Ignore a :: suffix, if exists.
        end = len(token_seq) - 1
        if token_seq[end-1].name == '::':
            end -= 1

        # Make a copy of the sequence so we can append a sentinel
        # value. This is required for GetName will has to have some
        # terminating condition beyond the last name.
        seq_copy = token_seq[i:end]
        seq_copy.append(tokenize.Token(tokenize.SYNTAX, '', 0, 0))
        names = []
        while i < end:
            # Iterate through the sequence parsing out each name.
            new_name, next = self.GetName(seq_copy[i:])
            assert new_name, 'Got empty new_name, next=%s' % next
            # We got a pointer or ref.  Add it to the name.
            if next and next.token_type == tokenize.SYNTAX:
                new_name.append(next)
            names.append(new_name)
            i += len(new_name)

        # Now that we have the names, it's time to undo what we did.

        # Remove the sentinel value.
        names[-1].pop()
        # Flatten the token sequence for the return type.
        return_type = [e for seq in names[:-1] for e in seq]
        # The class name is the last name.
        class_name = names[-1]
        return return_type, class_name

    def handle_bool(self):
        pass

    def handle_char(self):
        pass

    def handle_int(self):
        pass

    def handle_long(self):
        pass

    def handle_short(self):
        pass

    def handle_double(self):
        pass

    def handle_float(self):
        pass

    def handle_void(self):
        pass

    def handle_wchar_t(self):
        pass

    def handle_unsigned(self):
        pass

    def handle_signed(self):
        pass

    def _GetNestedType(self, ctor):
        name = None
        name_tokens, token = self.GetName()
        if name_tokens:
            name = ''.join([t.name for t in name_tokens])

        # Handle forward declarations.
        if token.token_type == tokenize.SYNTAX and token.name == ';':
            return ctor(token.start, token.end, name, None,
                        self.namespace_stack)

        if token.token_type == tokenize.NAME and self._handling_typedef:
            self._AddBackToken(token)
            return ctor(token.start, token.end, name, None,
                        self.namespace_stack)

        # Must be the type declaration.
        fields = list(self._GetMatchingChar('{', '}'))
        del fields[-1]                  # Remove trailing '}'.
        if token.token_type == tokenize.SYNTAX and token.name == '{':
            next = self._GetNextToken()
            new_type = ctor(token.start, token.end, name, fields,
                            self.namespace_stack)
            # A name means this is an anonymous type and the name
            # is the variable declaration.
            if next.token_type != tokenize.NAME:
                return new_type
            name = new_type
            token = next

        # Must be variable declaration using the type prefixed with keyword.
        assert token.token_type == tokenize.NAME, token
        return self._CreateVariable(token, token.name, name, [], '', None)

    def handle_struct(self):
        # Special case the handling typedef/aliasing of structs here.
        # It would be a pain to handle in the class code.
        name_tokens, var_token = self.GetName()
        if name_tokens:
            next_token = self._GetNextToken()
            is_syntax = (var_token.token_type == tokenize.SYNTAX and
                         var_token.name[0] in '*&')
            is_variable = (var_token.token_type == tokenize.NAME and
                           next_token.name == ';')
            variable = var_token
            if is_syntax and not is_variable:
                variable = next_token
                temp = self._GetNextToken()
                if temp.token_type == tokenize.SYNTAX and temp.name == '(':
                    # Handle methods declared to return a struct.
                    t0 = name_tokens[0]
                    struct = tokenize.Token(tokenize.NAME, 'struct',
                                            t0.start-7, t0.start-2)
                    type_and_name = [struct]
                    type_and_name.extend(name_tokens)
                    type_and_name.extend((var_token, next_token))
                    return self._GetMethod(type_and_name, 0, None, False)
                assert temp.name == ';', (temp, name_tokens, var_token)
            if is_syntax or (is_variable and not self._handling_typedef):
                modifiers = ['struct']
                type_name = ''.join([t.name for t in name_tokens])
                position = name_tokens[0]
                return self._CreateVariable(position, variable.name, type_name,
                                            modifiers, var_token.name, None)
            name_tokens.extend((var_token, next_token))
            self._AddBackTokens(name_tokens)
        else:
            self._AddBackToken(var_token)
        return self._GetClass(Struct, VISIBILITY_PUBLIC, None)

    def handle_union(self):
        return self._GetNestedType(Union)

    def handle_enum(self):
        return self._GetNestedType(Enum)

    def handle_auto(self):
        # TODO(nnorwitz): warn about using auto?  Probably not since it
        # will be reclaimed and useful for C++0x.
        pass

    def handle_register(self):
        pass

    def handle_const(self):
        pass

    def handle_inline(self):
        pass

    def handle_extern(self):
        pass

    def handle_static(self):
        pass

    def handle_virtual(self):
        # What follows must be a method.
        token = token2 = self._GetNextToken()
        if token.name == 'inline':
            # HACK(nnorwitz): handle inline dtors by ignoring 'inline'.
            token2 = self._GetNextToken()
        if token2.token_type == tokenize.SYNTAX and token2.name == '~':
            return self.GetMethod(FUNCTION_VIRTUAL + FUNCTION_DTOR, None)
        assert token.token_type == tokenize.NAME or token.name == '::', token
        return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(')
        return_type_and_name.insert(0, token)
        if token2 is not token:
            return_type_and_name.insert(1, token2)
        return self._GetMethod(return_type_and_name, FUNCTION_VIRTUAL,
                               None, False)

    def handle_volatile(self):
        pass

    def handle_mutable(self):
        pass

    def handle_public(self):
        assert self.in_class
        self.visibility = VISIBILITY_PUBLIC

    def handle_protected(self):
        assert self.in_class
        self.visibility = VISIBILITY_PROTECTED

    def handle_private(self):
        assert self.in_class
        self.visibility = VISIBILITY_PRIVATE

    def handle_friend(self):
        tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
        assert tokens
        t0 = tokens[0]
        return Friend(t0.start, t0.end, tokens, self.namespace_stack)

    def handle_static_cast(self):
        pass

    def handle_const_cast(self):
        pass

    def handle_dynamic_cast(self):
        pass

    def handle_reinterpret_cast(self):
        pass

    def handle_new(self):
        pass

    def handle_delete(self):
        tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
        assert tokens
        return Delete(tokens[0].start, tokens[0].end, tokens)

    def handle_typedef(self):
        token = self._GetNextToken()
        if (token.token_type == tokenize.NAME and
            keywords.IsKeyword(token.name)):
            # Token must be struct/enum/union/class.
            method = getattr(self, 'handle_' + token.name)
            self._handling_typedef = True
            tokens = [method()]
            self._handling_typedef = False
        else:
            tokens = [token]

        # Get the remainder of the typedef up to the semi-colon.
        tokens.extend(self._GetTokensUpTo(tokenize.SYNTAX, ';'))

        # TODO(nnorwitz): clean all this up.
        assert tokens
        name = tokens.pop()
        indices = name
        if tokens:
            indices = tokens[0]
        if not indices:
            indices = token
        if name.name == ')':
            # HACK(nnorwitz): Handle pointers to functions "properly".
            if (len(tokens) >= 4 and
                tokens[1].name == '(' and tokens[2].name == '*'):
                tokens.append(name)
                name = tokens[3]
        elif name.name == ']':
            # HACK(nnorwitz): Handle arrays properly.
            if len(tokens) >= 2:
                tokens.append(name)
                name = tokens[1]
        new_type = tokens
        if tokens and isinstance(tokens[0], tokenize.Token):
            new_type = self.converter.ToType(tokens)[0]
        return Typedef(indices.start, indices.end, name.name,
                       new_type, self.namespace_stack)

    def handle_typeid(self):
        pass  # Not needed yet.

    def handle_typename(self):
        pass  # Not needed yet.

    def _GetTemplatedTypes(self):
        result = {}
        tokens = list(self._GetMatchingChar('<', '>'))
        len_tokens = len(tokens) - 1    # Ignore trailing '>'.
        i = 0
        while i < len_tokens:
            key = tokens[i].name
            i += 1
            if keywords.IsKeyword(key) or key == ',':
                continue
            type_name = default = None
            if i < len_tokens:
                i += 1
                if tokens[i-1].name == '=':
                    assert i < len_tokens, '%s %s' % (i, tokens)
                    default, unused_next_token = self.GetName(tokens[i:])
                    i += len(default)
                else:
                    if tokens[i-1].name != ',':
                        # We got something like: Type variable.
                        # Re-adjust the key (variable) and type_name (Type).
                        key = tokens[i-1].name
                        type_name = tokens[i-2]

            result[key] = (type_name, default)
        return result

    def handle_template(self):
        token = self._GetNextToken()
        assert token.token_type == tokenize.SYNTAX, token
        assert token.name == '<', token
        templated_types = self._GetTemplatedTypes()
        # TODO(nnorwitz): for now, just ignore the template params.
        token = self._GetNextToken()
        if token.token_type == tokenize.NAME:
            if token.name == 'class':
                return self._GetClass(Class, VISIBILITY_PRIVATE, templated_types)
            elif token.name == 'struct':
                return self._GetClass(Struct, VISIBILITY_PUBLIC, templated_types)
            elif token.name == 'friend':
                return self.handle_friend()
        self._AddBackToken(token)
        tokens, last = self._GetVarTokensUpTo(tokenize.SYNTAX, '(', ';')
        tokens.append(last)
        self._AddBackTokens(tokens)
        if last.name == '(':
            return self.GetMethod(FUNCTION_NONE, templated_types)
        # Must be a variable definition.
        return None

    def handle_true(self):
        pass  # Nothing to do.

    def handle_false(self):
        pass  # Nothing to do.

    def handle_asm(self):
        pass  # Not needed yet.

    def handle_class(self):
        return self._GetClass(Class, VISIBILITY_PRIVATE, None)

    def _GetBases(self):
        # Get base classes.
        bases = []
        while 1:
            token = self._GetNextToken()
            assert token.token_type == tokenize.NAME, token
            # TODO(nnorwitz): store kind of inheritance...maybe.
            if token.name not in ('public', 'protected', 'private'):
                # If inheritance type is not specified, it is private.
                # Just put the token back so we can form a name.
                # TODO(nnorwitz): it would be good to warn about this.
                self._AddBackToken(token)
            else:
                # Check for virtual inheritance.
                token = self._GetNextToken()
                if token.name != 'virtual':
                    self._AddBackToken(token)
                else:
                    # TODO(nnorwitz): store that we got virtual for this base.
                    pass
            base, next_token = self.GetName()
            bases_ast = self.converter.ToType(base)
            assert len(bases_ast) == 1, bases_ast
            bases.append(bases_ast[0])
            assert next_token.token_type == tokenize.SYNTAX, next_token
            if next_token.name == '{':
                token = next_token
                break
            # Support multiple inheritance.
            assert next_token.name == ',', next_token
        return bases, token

    def _GetClass(self, class_type, visibility, templated_types):
        class_name = None
        class_token = self._GetNextToken()
        if class_token.token_type != tokenize.NAME:
            assert class_token.token_type == tokenize.SYNTAX, class_token
            token = class_token
        else:
            # Skip any macro (e.g. storage class specifiers) after the
            # 'class' keyword.
            next_token = self._GetNextToken()
            if next_token.token_type == tokenize.NAME:
                self._AddBackToken(next_token)
            else:
                self._AddBackTokens([class_token, next_token])
            name_tokens, token = self.GetName()
            class_name = ''.join([t.name for t in name_tokens])
        bases = None
        if token.token_type == tokenize.SYNTAX:
            if token.name == ';':
                # Forward declaration.
                return class_type(class_token.start, class_token.end,
                                  class_name, None, templated_types, None,
                                  self.namespace_stack)
            if token.name in '*&':
                # Inline forward declaration.  Could be method or data.
                name_token = self._GetNextToken()
                next_token = self._GetNextToken()
                if next_token.name == ';':
                    # Handle data
                    modifiers = ['class']
                    return self._CreateVariable(class_token, name_token.name,
                                                class_name,
                                                modifiers, token.name, None)
                else:
                    # Assume this is a method.
                    tokens = (class_token, token, name_token, next_token)
                    self._AddBackTokens(tokens)
                    return self.GetMethod(FUNCTION_NONE, None)
            if token.name == ':':
                bases, token = self._GetBases()

        body = None
        if token.token_type == tokenize.SYNTAX and token.name == '{':
            assert token.token_type == tokenize.SYNTAX, token
            assert token.name == '{', token

            ast = AstBuilder(self.GetScope(), self.filename, class_name,
                             visibility, self.namespace_stack)
            body = list(ast.Generate())

            if not self._handling_typedef:
                token = self._GetNextToken()
                if token.token_type != tokenize.NAME:
                    assert token.token_type == tokenize.SYNTAX, token
                    assert token.name == ';', token
                else:
                    new_class = class_type(class_token.start, class_token.end,
                                           class_name, bases, None,
                                           body, self.namespace_stack)

                    modifiers = []
                    return self._CreateVariable(class_token,
                                                token.name, new_class,
                                                modifiers, token.name, None)
        else:
            if not self._handling_typedef:
                self.HandleError('non-typedef token', token)
            self._AddBackToken(token)

        return class_type(class_token.start, class_token.end, class_name,
                          bases, None, body, self.namespace_stack)

    def handle_namespace(self):
        token = self._GetNextToken()
        # Support anonymous namespaces.
        name = None
        if token.token_type == tokenize.NAME:
            name = token.name
            token = self._GetNextToken()
        self.namespace_stack.append(name)
        assert token.token_type == tokenize.SYNTAX, token
        # Create an internal token that denotes when the namespace is complete.
        internal_token = tokenize.Token(_INTERNAL_TOKEN, _NAMESPACE_POP,
                                        None, None)
        internal_token.whence = token.whence
        if token.name == '=':
            # TODO(nnorwitz): handle aliasing namespaces.
            name, next_token = self.GetName()
            assert next_token.name == ';', next_token
            self._AddBackToken(internal_token)
        else:
            assert token.name == '{', token
            tokens = list(self.GetScope())
            # Replace the trailing } with the internal namespace pop token.
            tokens[-1] = internal_token
            # Handle namespace with nothing in it.
            self._AddBackTokens(tokens)
        return None

    def handle_using(self):
        tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
        assert tokens
        return Using(tokens[0].start, tokens[0].end, tokens)

    def handle_explicit(self):
        assert self.in_class
        # Nothing much to do.
        # TODO(nnorwitz): maybe verify the method name == class name.
        # This must be a ctor.
        return self.GetMethod(FUNCTION_CTOR, None)

    def handle_this(self):
        pass  # Nothing to do.

    def handle_operator(self):
        # Pull off the next token(s?) and make that part of the method name.
        pass

    def handle_sizeof(self):
        pass

    def handle_case(self):
        pass

    def handle_switch(self):
        pass

    def handle_default(self):
        token = self._GetNextToken()
        assert token.token_type == tokenize.SYNTAX
        assert token.name == ':'

    def handle_if(self):
        pass

    def handle_else(self):
        pass

    def handle_return(self):
        tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
        if not tokens:
            return Return(self.current_token.start, self.current_token.end, None)
        return Return(tokens[0].start, tokens[0].end, tokens)

    def handle_goto(self):
        tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
        assert len(tokens) == 1, str(tokens)
        return Goto(tokens[0].start, tokens[0].end, tokens[0].name)

    def handle_try(self):
        pass  # Not needed yet.

    def handle_catch(self):
        pass  # Not needed yet.

    def handle_throw(self):
        pass  # Not needed yet.

    def handle_while(self):
        pass

    def handle_do(self):
        pass

    def handle_for(self):
        pass

    def handle_break(self):
        self._IgnoreUpTo(tokenize.SYNTAX, ';')

    def handle_continue(self):
        self._IgnoreUpTo(tokenize.SYNTAX, ';')


def BuilderFromSource(source, filename):
    """Utility method that returns an AstBuilder from source code.

    Args:
      source: 'C++ source code'
      filename: 'file1'

    Returns:
      AstBuilder
    """
    return AstBuilder(tokenize.GetTokens(source), filename)


def PrintIndentifiers(filename, should_print):
    """Prints all identifiers for a C++ source file.

    Args:
      filename: 'file1'
      should_print: predicate with signature: bool Function(token)
    """
    source = utils.ReadFile(filename, False)
    if source is None:
        sys.stderr.write('Unable to find: %s\n' % filename)
        return

    #print('Processing %s' % actual_filename)
    builder = BuilderFromSource(source, filename)
    try:
        for node in builder.Generate():
            if should_print(node):
                print(node.name)
    except KeyboardInterrupt:
        return
    except:
        pass


def PrintAllIndentifiers(filenames, should_print):
    """Prints all identifiers for each C++ source file in filenames.

    Args:
      filenames: ['file1', 'file2', ...]
      should_print: predicate with signature: bool Function(token)
    """
    for path in filenames:
        PrintIndentifiers(path, should_print)


def main(argv):
    for filename in argv[1:]:
        source = utils.ReadFile(filename)
        if source is None:
            continue

        print('Processing %s' % filename)
        builder = BuilderFromSource(source, filename)
        try:
            entire_ast = filter(None, builder.Generate())
        except KeyboardInterrupt:
            return
        except:
            # Already printed a warning, print the traceback and continue.
            traceback.print_exc()
        else:
            if utils.DEBUG:
                for ast in entire_ast:
                    print(ast)


if __name__ == '__main__':
    main(sys.argv)