#!/usr/bin/ruby
# encoding: utf-8

=begin LICENSE

[The "BSD licence"]
Copyright (c) 2009-2010 Kyle Yetter
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:

 1. Redistributions of source code must retain the above copyright
    notice, this list of conditions and the following disclaimer.
 2. Redistributions in binary form must reproduce the above copyright
    notice, this list of conditions and the following disclaimer in the
    documentation and/or other materials provided with the distribution.
 3. The name of the author may not be used to endorse or promote products
    derived from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=end

require 'antlr3'

module ANTLR3
module AST

=begin rdoc ANTLR3::AST::Wizard

AST::Wizard is an extra utility class that allows quick creation of AST objects
using definitions writing in ANTLR-style tree definition syntax. It can also
define <i>tree patterns</i>, objects that are conceptually similar to regular
expressions. Patterns allow a simple method for recursively searching through an
AST for a particular node structure. These features make tree wizards useful
while testing and debugging AST constructing parsers and tree parsers. This
library has been ported to Ruby directly from the ANTLR Python runtime API.

See
http://www.antlr.org/wiki/display/~admin/2007/07/02/Exploring+Concept+of+TreeWizard
for more background on the concept of a tree wizard.

== Usage

  # setting up and creating a tree wizard
  token_names = Array.new(4, '') + %w(VAR NUMBER EQ PLUS MINUS MULT DIV)
  adaptor     = ANTLR3::AST::CommonTreeAdaptor.new
  wizard      = ANTLR3::AST::Wizard.new(adaptor, token_names)
  
  # building trees
  lone_node = wizard.create "VAR[x]"   # => x
  lone_node.type                       # => 4  # = VAR
  lone_node.text                       # => "x"
  
  expression_node = wizard.create "(MINUS VAR NUMBER)"
    # => (MINUS VAR NUMBER)
  statement_node = wizard.create "(EQ[=] VAR[x] (PLUS[+] NUMBER[1] NUMBER[2]))" 
    # => (= x (+ 1 2))
  deep_node = wizard.create(<<-TREE)
    (MULT[*] NUMBER[1] 
      (MINUS[-] 
        (MULT[*] NUMBER[3]    VAR[x])
        (DIV[/]  VAR[y] NUMBER[3.14])
        (MULT[*] NUMBER[4]    VAR[z])
      )
    )
  TREE
    # => (* 1 (- (* 3 x) (/ y 3.14) (* 4 z))
  
  bad_tree_syntax = wizard.create "(+ 1 2)"
    # => nil - invalid node names
  
  # test whether a tree matches a pattern
  wizard.match(expression_node, '(MINUS VAR .)') # => true
  wizard.match(lone_node, 'NUMBER NUMBER')       # => false
  
  # extract nodes matching a pattern
  wizard.find(statement_node, '(PLUS . .)')
  # => [(+ 1 2)]
  wizard.find(deep_node, 4)  # where 4 is the value of type VAR
  # => [x, y, z]
  
  # iterate through the tree and extract nodes with pattern labels
  wizard.visit(deep_node, '(MULT %n:NUMBER %v:.)') do |node, parent, local_index, labels|
    printf "n = %p\n, v = %p\n", labels['n'], labels['v']
  end
    # => prints out:
    # n = 3, v = x
    # n = 4, v = z
  
== Tree Construction Syntax
  
  Simple Token Node:     TK
  Token Node With Text:  TK[text]
  Flat Node List:        (nil TK1 TK2)
  General Node:          (RT TK1 TK2)
  Complex Nested Node:   (RT (SUB1[sometext] TK1) TK2 (SUB2 TK3 TK4[moretext]))

=== Additional Syntax for Tree Matching Patterns

  Match Any Token Node:  .
  Label a Node:          %name:TK

=end

class Wizard
  
  include Constants
  include Util

=begin rdoc ANTLR3::AST::Wizard::PatternLexer

A class that is used internally by AST::Wizard to tokenize tree patterns

=end

  class PatternLexer
    include ANTLR3::Constants
    
    autoload :StringScanner, 'strscan'
    
    PATTERNS = [ 
      [ :space, /\s+/ ],
      [ :identifier, /[a-z_]\w*/i ],
      [ :open, /\(/ ],
      [ :close, /\)/ ],
      [ :percent, /%/ ],
      [ :colon, /:/ ],
      [ :dot, /\./ ],
      [ :argument, /\[((?:[^\[\]\\]|\\\[|\\\]|\\.)*?)\]/ ]
    ]
    
    attr_reader :text, :error, :pattern
    def initialize( pattern )
      @pattern = pattern.to_s
      @scanner = StringScanner.new( pattern )
      @text = ''
      @error = false
    end
    
    def next_token
      begin
        @scanner.eos? and return EOF
        
        type, = PATTERNS.find do |type, pattern|
          @scanner.scan( pattern )
        end
        
        case type
        when nil
          type, @text, @error = EOF, '', true
          break
        when :identifier then @text = @scanner.matched
        when :argument
          # remove escapes from \] sequences in the text argument
          ( @text = @scanner[ 1 ] ).gsub!( /\\(?=[\[\]])/, '' )
        end
      end while type == :space
      
      return type
    end
    
    alias error? error
  end
  

=begin rdoc ANTLR3::AST::Wizard::Pattern

A class that is used internally by AST::Wizard to construct AST tree objects
from a tokenized tree pattern

=end

  class PatternParser
    def self.parse( pattern, token_scheme, adaptor )
      lexer = PatternLexer.new( pattern )
      new( lexer, token_scheme, adaptor ).pattern
    end
    
    include ANTLR3::Constants
    
    def initialize( tokenizer, token_scheme, adaptor )
      @tokenizer = tokenizer
      @token_scheme = token_scheme
      @adaptor   = adaptor
      @token_type = tokenizer.next_token
    end
    
    def pattern
      case @token_type
      when :open then return parse_tree
      when :identifier
        node = parse_node
        @token_type == EOF and return node
        return nil
      end
      return nil
    end
    
    CONTINUE_TYPES = [ :open, :identifier, :percent, :dot ]
    
    def parse_tree
      @token_type != :open and return nil
      @token_type = @tokenizer.next_token
      root = parse_node or return nil
      
      loop do
        case @token_type
        when :open
          subtree = parse_tree
          @adaptor.add_child( root, subtree )
        when :identifier, :percent, :dot
          child = parse_node or return nil
          @adaptor.add_child( root, child )
        else break
        end
      end
      @token_type == :close or return nil
      @token_type = @tokenizer.next_token
      return root
    end
    
    def parse_node
      label = nil
      if @token_type == :percent
        ( @token_type = @tokenizer.next_token ) == :identifier or return nil
        label = @tokenizer.text
        ( @token_type = @tokenizer.next_token ) == :colon or return nil
        @token_type = @tokenizer.next_token
      end
      
      if @token_type == :dot
        @token_type = @tokenizer.next_token
        wildcard_payload = CommonToken.create( :type => 0, :text => '.' )
        node = WildcardPattern.new( wildcard_payload )
        label and node.label = label
        return node
      end
      
      @token_type == :identifier or return nil
      token_name = @tokenizer.text
      @token_type = @tokenizer.next_token
      token_name == 'nil' and return @adaptor.create_flat_list
      
      text = token_name
      arg = nil
      if @token_type == :argument
        arg = @tokenizer.text
        text = arg
        @token_type = @tokenizer.next_token
      end
      
      node_type = @token_scheme[ token_name ] || INVALID_TOKEN_TYPE
      node = @adaptor.create_from_type( node_type, text )
      
      if Pattern === node
        node.label, node.has_text_arg = label, arg
      end
      return node
    end
  end
  

=begin rdoc ANTLR3::AST::Wizard::Pattern

A simple tree class that represents the skeletal structure of tree. It is used
to validate tree structures as well as to extract nodes that match the pattern.

=end

  class Pattern < CommonTree
    def self.parse( pattern_str, scheme )
      PatternParser.parse( 
        pattern_str, scheme, PatternAdaptor.new( scheme.token_class )
      )
    end
    
    attr_accessor :label, :has_text_arg
    alias :has_text_arg? :has_text_arg
    
    def initialize( payload )
      super( payload )
      @label = nil
      @has_text_arg = nil
    end
    
    def to_s
      prefix = @label ? '%' << @label << ':' : ''
      return( prefix << super )
    end
  end
  
=begin rdoc ANTLR3::AST::Wizard::WildcardPattern

A simple tree node used to represent the operation "match any tree node type" in
a tree pattern. They are represented by '.' in tree pattern specifications.

=end
  
  class WildcardPattern < Pattern; end
  

=begin rdoc ANTLR3::AST::Wizard::PatternAdaptor

A customized TreeAdaptor used by AST::Wizards to build tree patterns.

=end

  class PatternAdaptor < CommonTreeAdaptor
    def create_with_payload( payload )
      return Pattern.new( payload )
    end
  end

  attr_accessor :token_scheme, :adaptor

  def initialize( options = {} )
    @token_scheme = options.fetch( :token_scheme ) do
      TokenScheme.build( options[ :token_class ], options[ :tokens ] )
    end
    @adaptor = options.fetch( :adaptor ) do
      CommonTreeAdaptor.new( @token_scheme.token_class )
    end
  end
  
  def create( pattern )
    PatternParser.parse( pattern, @token_scheme, @adaptor )
  end
  
  def index( tree, map = {} )
    tree or return( map )
    type = @adaptor.type_of( tree )
    elements = map[ type ] ||= []
    elements << tree
    @adaptor.each_child( tree ) { | child | index( child, map ) }
    return( map )
  end
  
  def find( tree, what )
    case what
    when Integer then find_token_type( tree, what )
    when String  then find_pattern( tree, what )
    when Symbol  then find_token_type( tree, @token_scheme[ what ] )
    else raise ArgumentError, "search subject must be a token type (integer) or a string"
    end
  end
  
  def find_token_type( tree, type )
    nodes = []
    visit( tree, type ) { | t, | nodes << t }
    return nodes
  end
  
  def find_pattern( tree, pattern )
    subtrees = []
    visit_pattern( tree, pattern ) { | t, | subtrees << t }
    return( subtrees )
  end
  
  def visit( tree, what = nil, &block )
    block_given? or return enum_for( :visit, tree, what )
    Symbol === what and what = @token_scheme[ what ]
    case what
    when nil then visit_all( tree, &block )
    when Integer then visit_type( tree, nil, what, &block )
    when String  then visit_pattern( tree, what, &block )
    else raise( ArgumentError, tidy( <<-'END', true ) )
      | The 'what' filter argument must be a tree
      | pattern (String) or a token type (Integer)
      | -- got #{ what.inspect }
      END
    end
  end
  
  def visit_all( tree, parent = nil, &block )
    index = @adaptor.child_index( tree )
    yield( tree, parent, index, nil )
    @adaptor.each_child( tree ) do | child |
      visit_all( child, tree, &block )
    end
  end
  
  def visit_type( tree, parent, type, &block )
    tree.nil? and return( nil )
    index = @adaptor.child_index( tree )
    @adaptor.type_of( tree ) == type and yield( tree, parent, index, nil )
    @adaptor.each_child( tree ) do | child |
      visit_type( child, tree, type, &block )
    end
  end
  
  def visit_pattern( tree, pattern, &block )
    pattern = Pattern.parse( pattern, @token_scheme )
    
    if pattern.nil? or pattern.flat_list? or pattern.is_a?( WildcardPattern )
      return( nil )
    end
    
    visit( tree, pattern.type ) do | tree, parent, child_index, labels |
      labels = match!( tree, pattern ) and
        yield( tree, parent, child_index, labels )
    end
  end
  
  def match( tree, pattern )
    pattern = Pattern.parse( pattern, @token_scheme )
    
    return( match!( tree, pattern ) )
  end
  
  def match!( tree, pattern, labels = {} )
    tree.nil? || pattern.nil? and return false
    unless pattern.is_a? WildcardPattern
      @adaptor.type_of( tree ) == pattern.type or return false
      pattern.has_text_arg && ( @adaptor.text_of( tree ) != pattern.text ) and
        return false
    end
    labels[ pattern.label ] = tree if labels && pattern.label
    
    number_of_children = @adaptor.child_count( tree )
    return false unless number_of_children == pattern.child_count
    
    number_of_children.times do |index|
      actual_child = @adaptor.child_of( tree, index )
      pattern_child = pattern.child( index )
      
      return( false ) unless match!( actual_child, pattern_child, labels )
    end
    
    return labels
  end
  
  def equals( tree_a, tree_b, adaptor = @adaptor )
    tree_a && tree_b or return( false )
    
    adaptor.type_of( tree_a ) == adaptor.type_of( tree_b ) or return false
    adaptor.text_of( tree_a ) == adaptor.text_of( tree_b ) or return false
    
    child_count_a = adaptor.child_count( tree_a )
    child_count_b = adaptor.child_count( tree_b )
    child_count_a == child_count_b or return false
    
    child_count_a.times do | i |
      child_a = adaptor.child_of( tree_a, i )
      child_b = adaptor.child_of( tree_b, i )
      equals( child_a, child_b, adaptor ) or return false
    end
    return true
  end
  
  
  DOT_DOT_PATTERN = /.*[^\.]\\.{2}[^\.].*/
  DOUBLE_ETC_PATTERN = /.*\.{3}\s+\.{3}.*/
  
  def in_context?( tree, context )
    case context
    when DOT_DOT_PATTERN then raise ArgumentError, "invalid syntax: .."
    when DOUBLE_ETC_PATTERN then raise ArgumentError, "invalid syntax: ... ..."
    end
    
    context = context.gsub( /([^\.\s])\.{3}([^\.])/, '\1 ... \2' )
    context.strip!
    nodes = context.split( /\s+/ )
    
    while tree = @adaptor.parent( tree ) and node = nodes.pop
      if node == '...'
        node = nodes.pop or return( true )
        tree = @adaptor.each_ancestor( tree ).find do | t |
          @adaptor.type_name( t ) == node
        end or return( false )
      end
      @adaptor.type_name( tree ) == node or return( false )
    end
    
    return( false ) if tree.nil? and not nodes.empty?
    return true
  end
  
  private :match!
end
end
end