#!/usr/bin/perl -w # # This is JavaScriptCore's variant of the PCRE library. While this library # started out as a copy of PCRE, many of the features of PCRE have been # removed. This library now supports only the regular expression features # required by the JavaScript language specification, and has only the functions # needed by JavaScriptCore and the rest of WebKit. # # Originally written by Philip Hazel # Copyright (c) 1997-2006 University of Cambridge # Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. # # ----------------------------------------------------------------------------- # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # * Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # * Neither the name of the University of Cambridge nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # ----------------------------------------------------------------------------- # This is a freestanding support program to generate a file containing # character tables. The tables are built according to the default C # locale. use strict; use File::Basename; use File::Spec; use File::Temp qw(tempfile); use Getopt::Long; sub readHeaderValues(); my %pcre_internal; if (scalar(@ARGV) < 1) { print STDERR "Usage: ", basename($0), " [--preprocessor=program] output-file\n"; exit 1; } my $outputFile; my $preprocessor; GetOptions('preprocessor=s' => \$preprocessor); if (not $preprocessor) { $preprocessor = "cpp"; } $outputFile = $ARGV[0]; die('Must specify output file.') unless defined($outputFile); readHeaderValues(); open(OUT, ">", $outputFile) or die "$!"; binmode(OUT); printf(OUT "/*************************************************\n" . "* Perl-Compatible Regular Expressions *\n" . "*************************************************/\n\n" . "/* This file is automatically written by the dftables auxiliary \n" . "program. If you edit it by hand, you might like to edit the Makefile to \n" . "prevent its ever being regenerated.\n\n"); printf(OUT "This file contains the default tables for characters with codes less than\n" . "128 (ASCII characters). These tables are used when no external tables are\n" . "passed to PCRE. */\n\n" . "const unsigned char jsc_pcre_default_tables[%d] = {\n\n" . "/* This table is a lower casing table. */\n\n", $pcre_internal{tables_length}); if ($pcre_internal{lcc_offset} != 0) { die "lcc_offset != 0"; } printf(OUT " "); for (my $i = 0; $i < 128; $i++) { if (($i & 7) == 0 && $i != 0) { printf(OUT "\n "); } printf(OUT "0x%02X", ord(lc(chr($i)))); if ($i != 127) { printf(OUT ", "); } } printf(OUT ",\n\n"); printf(OUT "/* This table is a case flipping table. */\n\n"); if ($pcre_internal{fcc_offset} != 128) { die "fcc_offset != 128"; } printf(OUT " "); for (my $i = 0; $i < 128; $i++) { if (($i & 7) == 0 && $i != 0) { printf(OUT "\n "); } my $c = chr($i); printf(OUT "0x%02X", $c =~ /[[:lower:]]/ ? ord(uc($c)) : ord(lc($c))); if ($i != 127) { printf(OUT ", "); } } printf(OUT ",\n\n"); printf(OUT "/* This table contains bit maps for various character classes.\n" . "Each map is 32 bytes long and the bits run from the least\n" . "significant end of each byte. The classes are: space, digit, word. */\n\n"); if ($pcre_internal{cbits_offset} != $pcre_internal{fcc_offset} + 128) { die "cbits_offset != fcc_offset + 128"; } my @cbit_table = (0) x $pcre_internal{cbit_length}; for (my $i = ord('0'); $i <= ord('9'); $i++) { $cbit_table[$pcre_internal{cbit_digit} + $i / 8] |= 1 << ($i & 7); } $cbit_table[$pcre_internal{cbit_word} + ord('_') / 8] |= 1 << (ord('_') & 7); for (my $i = 0; $i < 128; $i++) { my $c = chr($i); if ($c =~ /[[:alnum:]]/) { $cbit_table[$pcre_internal{cbit_word} + $i / 8] |= 1 << ($i & 7); } if ($c =~ /[[:space:]]/) { $cbit_table[$pcre_internal{cbit_space} + $i / 8] |= 1 << ($i & 7); } } printf(OUT " "); for (my $i = 0; $i < $pcre_internal{cbit_length}; $i++) { if (($i & 7) == 0 && $i != 0) { if (($i & 31) == 0) { printf(OUT "\n"); } printf(OUT "\n "); } printf(OUT "0x%02X", $cbit_table[$i]); if ($i != $pcre_internal{cbit_length} - 1) { printf(OUT ", "); } } printf(OUT ",\n\n"); printf(OUT "/* This table identifies various classes of character by individual bits:\n" . " 0x%02x white space character\n" . " 0x%02x hexadecimal digit\n" . " 0x%02x alphanumeric or '_'\n*/\n\n", $pcre_internal{ctype_space}, $pcre_internal{ctype_xdigit}, $pcre_internal{ctype_word}); if ($pcre_internal{ctypes_offset} != $pcre_internal{cbits_offset} + $pcre_internal{cbit_length}) { die "ctypes_offset != cbits_offset + cbit_length"; } printf(OUT " "); for (my $i = 0; $i < 128; $i++) { my $x = 0; my $c = chr($i); if ($c =~ /[[:space:]]/) { $x += $pcre_internal{ctype_space}; } if ($c =~ /[[:xdigit:]]/) { $x += $pcre_internal{ctype_xdigit}; } if ($c =~ /[[:alnum:]_]/) { $x += $pcre_internal{ctype_word}; } printf(OUT "0x%02X", $x); if ($i != 127) { printf(OUT ", "); } else { printf(OUT "};"); } if (($i & 7) == 7) { printf(OUT " /* "); my $d = chr($i - 7); if ($d =~ /[[:print:]]/) { printf(OUT " %c -", $i - 7); } else { printf(OUT "%3d-", $i - 7); } if ($c =~ m/[[:print:]]/) { printf(OUT " %c ", $i); } else { printf(OUT "%3d", $i); } printf(OUT " */\n"); if ($i != 127) { printf(OUT " "); } } } if ($pcre_internal{tables_length} != $pcre_internal{ctypes_offset} + 128) { die "tables_length != ctypes_offset + 128"; } printf(OUT "\n\n/* End of chartables.c */\n"); close(OUT); exit 0; sub readHeaderValues() { my @variables = qw( cbit_digit cbit_length cbit_space cbit_word cbits_offset ctype_space ctype_word ctype_xdigit ctypes_offset fcc_offset lcc_offset tables_length ); local $/ = undef; my $headerPath = File::Spec->catfile(dirname($0), "pcre_internal.h"); my ($fh, $tempFile) = tempfile( basename($0) . "-XXXXXXXX", DIR => File::Spec->tmpdir(), SUFFIX => ".in", UNLINK => 0, ); print $fh "#define DFTABLES\n\n"; open(HEADER, "<", $headerPath) or die "$!"; print $fh <HEADER>; close(HEADER); print $fh "\n\n"; for my $v (@variables) { print $fh "\$pcre_internal{\"$v\"} = $v;\n"; } close($fh); open(CPP, "$preprocessor \"$tempFile\" |") or die "$!"; my $content = <CPP>; close(CPP); eval $content; die "$@" if $@; unlink $tempFile; }