#! /usr/bin/env python

# Fix Python source files to use the new equality test operator, i.e.,
#       if x = y: ...
# is changed to
#       if x == y: ...
# The script correctly tokenizes the Python program to reliably
# distinguish between assignments and equality tests.
#
# Command line arguments are files or directories to be processed.
# Directories are searched recursively for files whose name looks
# like a python module.
# Symbolic links are always ignored (except as explicit directory
# arguments).  Of course, the original file is kept as a back-up
# (with a "~" attached to its name).
# It complains about binaries (files containing null bytes)
# and about files that are ostensibly not Python files: if the first
# line starts with '#!' and does not contain the string 'python'.
#
# Changes made are reported to stdout in a diff-like format.
#
# Undoubtedly you can do this using find and sed or perl, but this is
# a nice example of Python code that recurses down a directory tree
# and uses regular expressions.  Also note several subtleties like
# preserving the file's mode and avoiding to even write a temp file
# when no changes are needed for a file.
#
# NB: by changing only the function fixline() you can turn this
# into a program for a different change to Python programs...

import sys
import re
import os
from stat import *
import string

err = sys.stderr.write
dbg = err
rep = sys.stdout.write

def main():
    bad = 0
    if not sys.argv[1:]: # No arguments
        err('usage: ' + sys.argv[0] + ' file-or-directory ...\n')
        sys.exit(2)
    for arg in sys.argv[1:]:
        if os.path.isdir(arg):
            if recursedown(arg): bad = 1
        elif os.path.islink(arg):
            err(arg + ': will not process symbolic links\n')
            bad = 1
        else:
            if fix(arg): bad = 1
    sys.exit(bad)

ispythonprog = re.compile('^[a-zA-Z0-9_]+\.py$')
def ispython(name):
    return ispythonprog.match(name) >= 0

def recursedown(dirname):
    dbg('recursedown(%r)\n' % (dirname,))
    bad = 0
    try:
        names = os.listdir(dirname)
    except os.error, msg:
        err('%s: cannot list directory: %r\n' % (dirname, msg))
        return 1
    names.sort()
    subdirs = []
    for name in names:
        if name in (os.curdir, os.pardir): continue
        fullname = os.path.join(dirname, name)
        if os.path.islink(fullname): pass
        elif os.path.isdir(fullname):
            subdirs.append(fullname)
        elif ispython(name):
            if fix(fullname): bad = 1
    for fullname in subdirs:
        if recursedown(fullname): bad = 1
    return bad

def fix(filename):
##      dbg('fix(%r)\n' % (dirname,))
    try:
        f = open(filename, 'r')
    except IOError, msg:
        err('%s: cannot open: %r\n' % (filename, msg))
        return 1
    head, tail = os.path.split(filename)
    tempname = os.path.join(head, '@' + tail)
    g = None
    # If we find a match, we rewind the file and start over but
    # now copy everything to a temp file.
    lineno = 0
    while 1:
        line = f.readline()
        if not line: break
        lineno = lineno + 1
        if g is None and '\0' in line:
            # Check for binary files
            err(filename + ': contains null bytes; not fixed\n')
            f.close()
            return 1
        if lineno == 1 and g is None and line[:2] == '#!':
            # Check for non-Python scripts
            words = string.split(line[2:])
            if words and re.search('[pP]ython', words[0]) < 0:
                msg = filename + ': ' + words[0]
                msg = msg + ' script; not fixed\n'
                err(msg)
                f.close()
                return 1
        while line[-2:] == '\\\n':
            nextline = f.readline()
            if not nextline: break
            line = line + nextline
            lineno = lineno + 1
        newline = fixline(line)
        if newline != line:
            if g is None:
                try:
                    g = open(tempname, 'w')
                except IOError, msg:
                    f.close()
                    err('%s: cannot create: %r\n' % (tempname, msg))
                    return 1
                f.seek(0)
                lineno = 0
                rep(filename + ':\n')
                continue # restart from the beginning
            rep(repr(lineno) + '\n')
            rep('< ' + line)
            rep('> ' + newline)
        if g is not None:
            g.write(newline)

    # End of file
    f.close()
    if not g: return 0 # No changes

    # Finishing touch -- move files

    # First copy the file's mode to the temp file
    try:
        statbuf = os.stat(filename)
        os.chmod(tempname, statbuf[ST_MODE] & 07777)
    except os.error, msg:
        err('%s: warning: chmod failed (%r)\n' % (tempname, msg))
    # Then make a backup of the original file as filename~
    try:
        os.rename(filename, filename + '~')
    except os.error, msg:
        err('%s: warning: backup failed (%r)\n' % (filename, msg))
    # Now move the temp file to the original file
    try:
        os.rename(tempname, filename)
    except os.error, msg:
        err('%s: rename failed (%r)\n' % (filename, msg))
        return 1
    # Return succes
    return 0


from tokenize import tokenprog

match = {'if':':', 'elif':':', 'while':':', 'return':'\n', \
         '(':')', '[':']', '{':'}', '`':'`'}

def fixline(line):
    # Quick check for easy case
    if '=' not in line: return line

    i, n = 0, len(line)
    stack = []
    while i < n:
        j = tokenprog.match(line, i)
        if j < 0:
            # A bad token; forget about the rest of this line
            print '(Syntax error:)'
            print line,
            return line
        a, b = tokenprog.regs[3] # Location of the token proper
        token = line[a:b]
        i = i+j
        if stack and token == stack[-1]:
            del stack[-1]
        elif match.has_key(token):
            stack.append(match[token])
        elif token == '=' and stack:
            line = line[:a] + '==' + line[b:]
            i, n = a + len('=='), len(line)
        elif token == '==' and not stack:
            print '(Warning: \'==\' at top level:)'
            print line,
    return line

if __name__ == "__main__":
    main()