#!/usr/bin/ruby # encoding: utf-8 =begin LICENSE [The "BSD licence"] Copyright (c) 2009-2010 Kyle Yetter All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =end require 'antlr3' module ANTLR3 module AST =begin rdoc ANTLR3::AST::Wizard AST::Wizard is an extra utility class that allows quick creation of AST objects using definitions writing in ANTLR-style tree definition syntax. It can also define <i>tree patterns</i>, objects that are conceptually similar to regular expressions. Patterns allow a simple method for recursively searching through an AST for a particular node structure. These features make tree wizards useful while testing and debugging AST constructing parsers and tree parsers. This library has been ported to Ruby directly from the ANTLR Python runtime API. See http://www.antlr.org/wiki/display/~admin/2007/07/02/Exploring+Concept+of+TreeWizard for more background on the concept of a tree wizard. == Usage # setting up and creating a tree wizard token_names = Array.new(4, '') + %w(VAR NUMBER EQ PLUS MINUS MULT DIV) adaptor = ANTLR3::AST::CommonTreeAdaptor.new wizard = ANTLR3::AST::Wizard.new(adaptor, token_names) # building trees lone_node = wizard.create "VAR[x]" # => x lone_node.type # => 4 # = VAR lone_node.text # => "x" expression_node = wizard.create "(MINUS VAR NUMBER)" # => (MINUS VAR NUMBER) statement_node = wizard.create "(EQ[=] VAR[x] (PLUS[+] NUMBER[1] NUMBER[2]))" # => (= x (+ 1 2)) deep_node = wizard.create(<<-TREE) (MULT[*] NUMBER[1] (MINUS[-] (MULT[*] NUMBER[3] VAR[x]) (DIV[/] VAR[y] NUMBER[3.14]) (MULT[*] NUMBER[4] VAR[z]) ) ) TREE # => (* 1 (- (* 3 x) (/ y 3.14) (* 4 z)) bad_tree_syntax = wizard.create "(+ 1 2)" # => nil - invalid node names # test whether a tree matches a pattern wizard.match(expression_node, '(MINUS VAR .)') # => true wizard.match(lone_node, 'NUMBER NUMBER') # => false # extract nodes matching a pattern wizard.find(statement_node, '(PLUS . .)') # => [(+ 1 2)] wizard.find(deep_node, 4) # where 4 is the value of type VAR # => [x, y, z] # iterate through the tree and extract nodes with pattern labels wizard.visit(deep_node, '(MULT %n:NUMBER %v:.)') do |node, parent, local_index, labels| printf "n = %p\n, v = %p\n", labels['n'], labels['v'] end # => prints out: # n = 3, v = x # n = 4, v = z == Tree Construction Syntax Simple Token Node: TK Token Node With Text: TK[text] Flat Node List: (nil TK1 TK2) General Node: (RT TK1 TK2) Complex Nested Node: (RT (SUB1[sometext] TK1) TK2 (SUB2 TK3 TK4[moretext])) === Additional Syntax for Tree Matching Patterns Match Any Token Node: . Label a Node: %name:TK =end class Wizard include Constants include Util =begin rdoc ANTLR3::AST::Wizard::PatternLexer A class that is used internally by AST::Wizard to tokenize tree patterns =end class PatternLexer include ANTLR3::Constants autoload :StringScanner, 'strscan' PATTERNS = [ [ :space, /\s+/ ], [ :identifier, /[a-z_]\w*/i ], [ :open, /\(/ ], [ :close, /\)/ ], [ :percent, /%/ ], [ :colon, /:/ ], [ :dot, /\./ ], [ :argument, /\[((?:[^\[\]\\]|\\\[|\\\]|\\.)*?)\]/ ] ] attr_reader :text, :error, :pattern def initialize( pattern ) @pattern = pattern.to_s @scanner = StringScanner.new( pattern ) @text = '' @error = false end def next_token begin @scanner.eos? and return EOF type, = PATTERNS.find do |type, pattern| @scanner.scan( pattern ) end case type when nil type, @text, @error = EOF, '', true break when :identifier then @text = @scanner.matched when :argument # remove escapes from \] sequences in the text argument ( @text = @scanner[ 1 ] ).gsub!( /\\(?=[\[\]])/, '' ) end end while type == :space return type end alias error? error end =begin rdoc ANTLR3::AST::Wizard::Pattern A class that is used internally by AST::Wizard to construct AST tree objects from a tokenized tree pattern =end class PatternParser def self.parse( pattern, token_scheme, adaptor ) lexer = PatternLexer.new( pattern ) new( lexer, token_scheme, adaptor ).pattern end include ANTLR3::Constants def initialize( tokenizer, token_scheme, adaptor ) @tokenizer = tokenizer @token_scheme = token_scheme @adaptor = adaptor @token_type = tokenizer.next_token end def pattern case @token_type when :open then return parse_tree when :identifier node = parse_node @token_type == EOF and return node return nil end return nil end CONTINUE_TYPES = [ :open, :identifier, :percent, :dot ] def parse_tree @token_type != :open and return nil @token_type = @tokenizer.next_token root = parse_node or return nil loop do case @token_type when :open subtree = parse_tree @adaptor.add_child( root, subtree ) when :identifier, :percent, :dot child = parse_node or return nil @adaptor.add_child( root, child ) else break end end @token_type == :close or return nil @token_type = @tokenizer.next_token return root end def parse_node label = nil if @token_type == :percent ( @token_type = @tokenizer.next_token ) == :identifier or return nil label = @tokenizer.text ( @token_type = @tokenizer.next_token ) == :colon or return nil @token_type = @tokenizer.next_token end if @token_type == :dot @token_type = @tokenizer.next_token wildcard_payload = CommonToken.create( :type => 0, :text => '.' ) node = WildcardPattern.new( wildcard_payload ) label and node.label = label return node end @token_type == :identifier or return nil token_name = @tokenizer.text @token_type = @tokenizer.next_token token_name == 'nil' and return @adaptor.create_flat_list text = token_name arg = nil if @token_type == :argument arg = @tokenizer.text text = arg @token_type = @tokenizer.next_token end node_type = @token_scheme[ token_name ] || INVALID_TOKEN_TYPE node = @adaptor.create_from_type( node_type, text ) if Pattern === node node.label, node.has_text_arg = label, arg end return node end end =begin rdoc ANTLR3::AST::Wizard::Pattern A simple tree class that represents the skeletal structure of tree. It is used to validate tree structures as well as to extract nodes that match the pattern. =end class Pattern < CommonTree def self.parse( pattern_str, scheme ) PatternParser.parse( pattern_str, scheme, PatternAdaptor.new( scheme.token_class ) ) end attr_accessor :label, :has_text_arg alias :has_text_arg? :has_text_arg def initialize( payload ) super( payload ) @label = nil @has_text_arg = nil end def to_s prefix = @label ? '%' << @label << ':' : '' return( prefix << super ) end end =begin rdoc ANTLR3::AST::Wizard::WildcardPattern A simple tree node used to represent the operation "match any tree node type" in a tree pattern. They are represented by '.' in tree pattern specifications. =end class WildcardPattern < Pattern; end =begin rdoc ANTLR3::AST::Wizard::PatternAdaptor A customized TreeAdaptor used by AST::Wizards to build tree patterns. =end class PatternAdaptor < CommonTreeAdaptor def create_with_payload( payload ) return Pattern.new( payload ) end end attr_accessor :token_scheme, :adaptor def initialize( options = {} ) @token_scheme = options.fetch( :token_scheme ) do TokenScheme.build( options[ :token_class ], options[ :tokens ] ) end @adaptor = options.fetch( :adaptor ) do CommonTreeAdaptor.new( @token_scheme.token_class ) end end def create( pattern ) PatternParser.parse( pattern, @token_scheme, @adaptor ) end def index( tree, map = {} ) tree or return( map ) type = @adaptor.type_of( tree ) elements = map[ type ] ||= [] elements << tree @adaptor.each_child( tree ) { | child | index( child, map ) } return( map ) end def find( tree, what ) case what when Integer then find_token_type( tree, what ) when String then find_pattern( tree, what ) when Symbol then find_token_type( tree, @token_scheme[ what ] ) else raise ArgumentError, "search subject must be a token type (integer) or a string" end end def find_token_type( tree, type ) nodes = [] visit( tree, type ) { | t, | nodes << t } return nodes end def find_pattern( tree, pattern ) subtrees = [] visit_pattern( tree, pattern ) { | t, | subtrees << t } return( subtrees ) end def visit( tree, what = nil, &block ) block_given? or return enum_for( :visit, tree, what ) Symbol === what and what = @token_scheme[ what ] case what when nil then visit_all( tree, &block ) when Integer then visit_type( tree, nil, what, &block ) when String then visit_pattern( tree, what, &block ) else raise( ArgumentError, tidy( <<-'END', true ) ) | The 'what' filter argument must be a tree | pattern (String) or a token type (Integer) | -- got #{ what.inspect } END end end def visit_all( tree, parent = nil, &block ) index = @adaptor.child_index( tree ) yield( tree, parent, index, nil ) @adaptor.each_child( tree ) do | child | visit_all( child, tree, &block ) end end def visit_type( tree, parent, type, &block ) tree.nil? and return( nil ) index = @adaptor.child_index( tree ) @adaptor.type_of( tree ) == type and yield( tree, parent, index, nil ) @adaptor.each_child( tree ) do | child | visit_type( child, tree, type, &block ) end end def visit_pattern( tree, pattern, &block ) pattern = Pattern.parse( pattern, @token_scheme ) if pattern.nil? or pattern.flat_list? or pattern.is_a?( WildcardPattern ) return( nil ) end visit( tree, pattern.type ) do | tree, parent, child_index, labels | labels = match!( tree, pattern ) and yield( tree, parent, child_index, labels ) end end def match( tree, pattern ) pattern = Pattern.parse( pattern, @token_scheme ) return( match!( tree, pattern ) ) end def match!( tree, pattern, labels = {} ) tree.nil? || pattern.nil? and return false unless pattern.is_a? WildcardPattern @adaptor.type_of( tree ) == pattern.type or return false pattern.has_text_arg && ( @adaptor.text_of( tree ) != pattern.text ) and return false end labels[ pattern.label ] = tree if labels && pattern.label number_of_children = @adaptor.child_count( tree ) return false unless number_of_children == pattern.child_count number_of_children.times do |index| actual_child = @adaptor.child_of( tree, index ) pattern_child = pattern.child( index ) return( false ) unless match!( actual_child, pattern_child, labels ) end return labels end def equals( tree_a, tree_b, adaptor = @adaptor ) tree_a && tree_b or return( false ) adaptor.type_of( tree_a ) == adaptor.type_of( tree_b ) or return false adaptor.text_of( tree_a ) == adaptor.text_of( tree_b ) or return false child_count_a = adaptor.child_count( tree_a ) child_count_b = adaptor.child_count( tree_b ) child_count_a == child_count_b or return false child_count_a.times do | i | child_a = adaptor.child_of( tree_a, i ) child_b = adaptor.child_of( tree_b, i ) equals( child_a, child_b, adaptor ) or return false end return true end DOT_DOT_PATTERN = /.*[^\.]\\.{2}[^\.].*/ DOUBLE_ETC_PATTERN = /.*\.{3}\s+\.{3}.*/ def in_context?( tree, context ) case context when DOT_DOT_PATTERN then raise ArgumentError, "invalid syntax: .." when DOUBLE_ETC_PATTERN then raise ArgumentError, "invalid syntax: ... ..." end context = context.gsub( /([^\.\s])\.{3}([^\.])/, '\1 ... \2' ) context.strip! nodes = context.split( /\s+/ ) while tree = @adaptor.parent( tree ) and node = nodes.pop if node == '...' node = nodes.pop or return( true ) tree = @adaptor.each_ancestor( tree ).find do | t | @adaptor.type_name( t ) == node end or return( false ) end @adaptor.type_name( tree ) == node or return( false ) end return( false ) if tree.nil? and not nodes.empty? return true end private :match! end end end