and tags in your source with class=prettyprint. // You can also use the (html deprecated) tag, but the pretty printer // needs to do more substantial DOM manipulations to support that, so some // css styles may not be preserved. // Change log: // cbeust, 2006/08/22 // Java annotations (start with "@") are now captured as literals ("lit") // var PR_keywords = new Object(); /** initialize the keyword list for our target languages. */ (function () { var CPP_KEYWORDS = ( "bool break case catch char class const const_cast continue default " + "delete deprecated dllexport dllimport do double dynamic_cast else enum " + "explicit extern false float for friend goto if inline int long mutable " + "naked namespace new noinline noreturn nothrow novtable operator private " + "property protected public register reinterpret_cast return selectany " + "short signed sizeof static static_cast struct switch template this " + "thread throw true try typedef typeid typename union unsigned using " + "declaration, using directive uuid virtual void volatile while typeof"); var JAVA_KEYWORDS = ( "abstract default goto package synchronized boolean do if private this " + "break double implements protected throw byte else import public throws " + "case enum instanceof return transient catch extends int short try char " + "final interface static void class finally long strictfp volatile const " + "float native super while continue for new switch"); var PYTHON_KEYWORDS = ( "and assert break class continue def del elif else except exec finally " + "for from global if import in is lambda not or pass print raise return " + "try while yield"); var JSCRIPT_KEYWORDS = ( "abstract boolean break byte case catch char class const continue " + "debugger default delete do double else enum export extends false final " + "finally float for function goto if implements import in instanceof int " + "interface long native new null package private protected public return " + "short static super switch synchronized this throw throws transient " + "true try typeof var void volatile while with NaN Infinity"); var PERL_KEYWORDS = ( "foreach require sub unless until use elsif BEGIN END"); var SH_KEYWORDS = ( "if then do else fi end"); var KEYWORDS = [CPP_KEYWORDS, JAVA_KEYWORDS, PYTHON_KEYWORDS, JSCRIPT_KEYWORDS, PERL_KEYWORDS, SH_KEYWORDS]; for (var k = 0; k < KEYWORDS.length; k++) { var kw = KEYWORDS[k].split(' '); for (var i = 0; i < kw.length; i++) { if (kw[i]) { PR_keywords[kw[i]] = true; } } } }).call(this); // token style names. correspond to css classes /** token style for a string literal */ var PR_STRING = 'str'; /** token style for a keyword */ var PR_KEYWORD = 'kwd'; /** token style for a comment */ var PR_COMMENT = 'com'; /** token style for a type */ var PR_TYPE = 'typ'; /** token style for a literal value. e.g. 1, null, true. */ var PR_LITERAL = 'lit'; /** token style for a punctuation string. */ var PR_PUNCTUATION = 'pun'; /** token style for a punctuation string. */ var PR_PLAIN = 'pln'; /** token style for an sgml tag. */ var PR_TAG = 'tag'; /** token style for a markup declaration such as a DOCTYPE. */ var PR_DECLARATION = 'dec'; /** token style for embedded source. */ var PR_SOURCE = 'src'; /** token style for an sgml attribute name. */ var PR_ATTRIB_NAME = 'atn'; /** token style for an sgml attribute value. */ var PR_ATTRIB_VALUE = 'atv'; /** the position of the end of a token during. A division of a string into * n tokens can be represented as a series n - 1 token ends, as long as * runs of whitespace warrant their own token. * @private */ function PR_TokenEnd(end, style) { if (undefined === style) { throw new Error('BAD'); } if ('number' != typeof(end)) { throw new Error('BAD'); } this.end = end; this.style = style; } PR_TokenEnd.prototype.toString = function () { return '[PR_TokenEnd ' + this.end + (this.style ? ':' + this.style : '') + ']'; }; /** a chunk of text with a style. These are used to represent both the output * from the lexing functions as well as intermediate results. * @constructor * @param token the token text * @param style one of the token styles defined in designdoc-template, or null * for a styleless token, such as an embedded html tag. * @private */ function PR_Token(token, style) { if (undefined === style) { throw new Error('BAD'); } this.token = token; this.style = style; } PR_Token.prototype.toString = function () { return '[PR_Token ' + this.token + (this.style ? ':' + this.style : '') + ']'; }; /** a helper class that decodes common html entities used to escape source and * markup punctuation characters in html. * @constructor * @private */ function PR_DecodeHelper() { this.next = 0; this.ch = '\0'; } PR_DecodeHelper.prototype.decode = function (s, i) { var next = i + 1; var ch = s.charAt(i); if ('&' == ch) { var semi = s.indexOf(';', next); if (semi >= 0 && semi < next + 4) { var entityName = s.substring(next, semi).toLowerCase(); next = semi + 1; if ('lt' == entityName) { ch = '<'; } else if ('gt' == entityName) { ch = '>'; } else if ('quot' == entityName) { ch = '"'; } else if ('apos' == entityName) { ch = '\''; } else if ('amp' == entityName) { ch = '&'; } else { next = i + 1; } } } this.next = next; this.ch = ch; return this.ch; } // some string utilities function PR_isWordChar(ch) { return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'); } function PR_isIdentifierStart(ch) { return PR_isWordChar(ch) || ch == '_' || ch == '$' || ch == '@'; } function PR_isIdentifierPart(ch) { return PR_isIdentifierStart(ch) || PR_isDigitChar(ch); } function PR_isSpaceChar(ch) { return "\t \r\n".indexOf(ch) >= 0; } function PR_isDigitChar(ch) { return ch >= '0' && ch <= '9'; } function PR_trim(s) { var i = 0, j = s.length - 1; while (i <= j && PR_isSpaceChar(s.charAt(i))) { ++i; } while (j > i && PR_isSpaceChar(s.charAt(j))) { --j; } return s.substring(i, j + 1); } function PR_startsWith(s, prefix) { return s.length >= prefix.length && prefix == s.substring(0, prefix.length); } function PR_endsWith(s, suffix) { return s.length >= suffix.length && suffix == s.substring(s.length - suffix.length, s.length); } /** true iff prefix matches the first prefix characters in chars[0:len]. * @private */ function PR_prefixMatch(chars, len, prefix) { if (len < prefix.length) { return false; } for (var i = 0, n = prefix.length; i < n; ++i) { if (prefix.charAt(i) != chars[i]) { return false; } } return true; } /** used to convert html special characters embedded in XMP tags into html. */ function PR_textToHtml(str) { return str.replace(/&/g, '&').replace(//g, '>'); } /** split markup into chunks of html tags (style null) and * plain text (style {@link #PR_PLAIN}). * * @param s a String of html. * @return an Array of PR_Tokens of style PR_PLAIN and null. * @private */ function PR_chunkify(s) { var chunks = new Array(); var state = 0; var start = 0; var pos = -1; for (var i = 0, n = s.length; i < n; ++i) { var ch = s.charAt(i); switch (state) { case 0: if ('<' == ch) { state = 1; } break; case 1: pos = i - 1; if ('/' == ch) { state = 2; } else if (PR_isWordChar(ch)) { state = 3; } else if ('<' == ch) { state = 1; } else { state = 0; } break; case 2: if (PR_isWordChar(ch)) { state = 3; } else if ('<' == ch) { state = 1; } else { state = 0; } break; case 3: if ('>' == ch) { if (pos > start) { chunks.push(new PR_Token(s.substring(start, pos), PR_PLAIN)); } chunks.push(new PR_Token(s.substring(pos, i + 1), null)); start = i + 1; pos = -1; state = 0; } break; } } if (s.length > start) { chunks.push(new PR_Token(s.substring(start, s.length), PR_PLAIN)); } return chunks; } /** splits chunks around entities. * @private */ function PR_splitEntities(chunks) { var chunksOut = new Array(); var state = 0; for (var ci = 0, nc = chunks.length; ci < nc; ++ci) { var chunk = chunks[ci]; if (PR_PLAIN != chunk.style) { chunksOut.push(chunk); continue; } var s = chunk.token; var pos = 0; var start; for (var i = 0; i < s.length; ++i) { var ch = s.charAt(i); switch (state) { case 0: if ('&' == ch) { state = 1; } break; case 1: if ('#' == ch || PR_isWordChar(ch)) { start = i - 1; state = 2; } else { state = 0; } break; case 2: if (';' == ch) { if (start > pos) { chunksOut.push( new PR_Token(s.substring(pos, start), chunk.style)); } chunksOut.push(new PR_Token(s.substring(start, i + 1), null)); pos = i + 1; state = 0; } break; } } if (s.length > pos) { chunksOut.push(pos ? new PR_Token(s.substring(pos, s.length), chunk.style) : chunk); } } return chunksOut; } /** walk the tokenEnds list and the chunk list in parallel to generate a list * of split tokens. * @private */ function PR_splitChunks(chunks, tokenEnds) { var tokens = new Array(); // the output var ci = 0; // index into chunks // position of beginning of amount written so far in absolute space. var posAbs = 0; // position of amount written so far in chunk space var posChunk = 0; // current chunk var chunk = new PR_Token('', null); for (var ei = 0, ne = tokenEnds.length; ei < ne; ++ei) { var tokenEnd = tokenEnds[ei]; var end = tokenEnd.end; var tokLen = end - posAbs; var remainingInChunk = chunk.token.length - posChunk; while (remainingInChunk <= tokLen) { if (remainingInChunk > 0) { tokens.push( new PR_Token(chunk.token.substring(posChunk, chunk.token.length), null == chunk.style ? null : tokenEnd.style)); } posAbs += remainingInChunk; posChunk = 0; if (ci < chunks.length) { chunk = chunks[ci++]; } tokLen = end - posAbs; remainingInChunk = chunk.token.length - posChunk; } if (tokLen) { tokens.push( new PR_Token(chunk.token.substring(posChunk, posChunk + tokLen), tokenEnd.style)); posAbs += tokLen; posChunk += tokLen; } } return tokens; } /** splits markup tokens into declarations, tags, and source chunks. * @private */ function PR_splitMarkup(chunks) { // A state machine to split out declarations, tags, etc. // This state machine deals with absolute space in the text, indexed by k, // and position in the current chunk, indexed by pos and tokenStart to // generate a list of the ends of tokens. // Absolute space is calculated by considering the chunks as appended into // one big string, as they were before being split. // Known failure cases // Server side scripting sections such as ...?> in attributes. // i.e. // Handling this would require a stack, and we don't use PHP. // The output: a list of pairs of PR_TokenEnd instances var tokenEnds = new Array(); var state = 0; // FSM state variable var k = 0; // position in absolute space of the start of the current chunk var tokenStart = -1; // the start of the current token // Try to find a closing tag for any open
tags in your source with class=prettyprint. // You can also use the (html deprecated) tag, but the pretty printer // needs to do more substantial DOM manipulations to support that, so some // css styles may not be preserved. // Change log: // cbeust, 2006/08/22 // Java annotations (start with "@") are now captured as literals ("lit") // var PR_keywords = new Object(); /** initialize the keyword list for our target languages. */ (function () { var CPP_KEYWORDS = ( "bool break case catch char class const const_cast continue default " + "delete deprecated dllexport dllimport do double dynamic_cast else enum " + "explicit extern false float for friend goto if inline int long mutable " + "naked namespace new noinline noreturn nothrow novtable operator private " + "property protected public register reinterpret_cast return selectany " + "short signed sizeof static static_cast struct switch template this " + "thread throw true try typedef typeid typename union unsigned using " + "declaration, using directive uuid virtual void volatile while typeof"); var JAVA_KEYWORDS = ( "abstract default goto package synchronized boolean do if private this " + "break double implements protected throw byte else import public throws " + "case enum instanceof return transient catch extends int short try char " + "final interface static void class finally long strictfp volatile const " + "float native super while continue for new switch"); var PYTHON_KEYWORDS = ( "and assert break class continue def del elif else except exec finally " + "for from global if import in is lambda not or pass print raise return " + "try while yield"); var JSCRIPT_KEYWORDS = ( "abstract boolean break byte case catch char class const continue " + "debugger default delete do double else enum export extends false final " + "finally float for function goto if implements import in instanceof int " + "interface long native new null package private protected public return " + "short static super switch synchronized this throw throws transient " + "true try typeof var void volatile while with NaN Infinity"); var PERL_KEYWORDS = ( "foreach require sub unless until use elsif BEGIN END"); var SH_KEYWORDS = ( "if then do else fi end"); var KEYWORDS = [CPP_KEYWORDS, JAVA_KEYWORDS, PYTHON_KEYWORDS, JSCRIPT_KEYWORDS, PERL_KEYWORDS, SH_KEYWORDS]; for (var k = 0; k < KEYWORDS.length; k++) { var kw = KEYWORDS[k].split(' '); for (var i = 0; i < kw.length; i++) { if (kw[i]) { PR_keywords[kw[i]] = true; } } } }).call(this); // token style names. correspond to css classes /** token style for a string literal */ var PR_STRING = 'str'; /** token style for a keyword */ var PR_KEYWORD = 'kwd'; /** token style for a comment */ var PR_COMMENT = 'com'; /** token style for a type */ var PR_TYPE = 'typ'; /** token style for a literal value. e.g. 1, null, true. */ var PR_LITERAL = 'lit'; /** token style for a punctuation string. */ var PR_PUNCTUATION = 'pun'; /** token style for a punctuation string. */ var PR_PLAIN = 'pln'; /** token style for an sgml tag. */ var PR_TAG = 'tag'; /** token style for a markup declaration such as a DOCTYPE. */ var PR_DECLARATION = 'dec'; /** token style for embedded source. */ var PR_SOURCE = 'src'; /** token style for an sgml attribute name. */ var PR_ATTRIB_NAME = 'atn'; /** token style for an sgml attribute value. */ var PR_ATTRIB_VALUE = 'atv'; /** the position of the end of a token during. A division of a string into * n tokens can be represented as a series n - 1 token ends, as long as * runs of whitespace warrant their own token. * @private */ function PR_TokenEnd(end, style) { if (undefined === style) { throw new Error('BAD'); } if ('number' != typeof(end)) { throw new Error('BAD'); } this.end = end; this.style = style; } PR_TokenEnd.prototype.toString = function () { return '[PR_TokenEnd ' + this.end + (this.style ? ':' + this.style : '') + ']'; }; /** a chunk of text with a style. These are used to represent both the output * from the lexing functions as well as intermediate results. * @constructor * @param token the token text * @param style one of the token styles defined in designdoc-template, or null * for a styleless token, such as an embedded html tag. * @private */ function PR_Token(token, style) { if (undefined === style) { throw new Error('BAD'); } this.token = token; this.style = style; } PR_Token.prototype.toString = function () { return '[PR_Token ' + this.token + (this.style ? ':' + this.style : '') + ']'; }; /** a helper class that decodes common html entities used to escape source and * markup punctuation characters in html. * @constructor * @private */ function PR_DecodeHelper() { this.next = 0; this.ch = '\0'; } PR_DecodeHelper.prototype.decode = function (s, i) { var next = i + 1; var ch = s.charAt(i); if ('&' == ch) { var semi = s.indexOf(';', next); if (semi >= 0 && semi < next + 4) { var entityName = s.substring(next, semi).toLowerCase(); next = semi + 1; if ('lt' == entityName) { ch = '<'; } else if ('gt' == entityName) { ch = '>'; } else if ('quot' == entityName) { ch = '"'; } else if ('apos' == entityName) { ch = '\''; } else if ('amp' == entityName) { ch = '&'; } else { next = i + 1; } } } this.next = next; this.ch = ch; return this.ch; } // some string utilities function PR_isWordChar(ch) { return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'); } function PR_isIdentifierStart(ch) { return PR_isWordChar(ch) || ch == '_' || ch == '$' || ch == '@'; } function PR_isIdentifierPart(ch) { return PR_isIdentifierStart(ch) || PR_isDigitChar(ch); } function PR_isSpaceChar(ch) { return "\t \r\n".indexOf(ch) >= 0; } function PR_isDigitChar(ch) { return ch >= '0' && ch <= '9'; } function PR_trim(s) { var i = 0, j = s.length - 1; while (i <= j && PR_isSpaceChar(s.charAt(i))) { ++i; } while (j > i && PR_isSpaceChar(s.charAt(j))) { --j; } return s.substring(i, j + 1); } function PR_startsWith(s, prefix) { return s.length >= prefix.length && prefix == s.substring(0, prefix.length); } function PR_endsWith(s, suffix) { return s.length >= suffix.length && suffix == s.substring(s.length - suffix.length, s.length); } /** true iff prefix matches the first prefix characters in chars[0:len]. * @private */ function PR_prefixMatch(chars, len, prefix) { if (len < prefix.length) { return false; } for (var i = 0, n = prefix.length; i < n; ++i) { if (prefix.charAt(i) != chars[i]) { return false; } } return true; } /** used to convert html special characters embedded in XMP tags into html. */ function PR_textToHtml(str) { return str.replace(/&/g, '&').replace(//g, '>'); } /** split markup into chunks of html tags (style null) and * plain text (style {@link #PR_PLAIN}). * * @param s a String of html. * @return an Array of PR_Tokens of style PR_PLAIN and null. * @private */ function PR_chunkify(s) { var chunks = new Array(); var state = 0; var start = 0; var pos = -1; for (var i = 0, n = s.length; i < n; ++i) { var ch = s.charAt(i); switch (state) { case 0: if ('<' == ch) { state = 1; } break; case 1: pos = i - 1; if ('/' == ch) { state = 2; } else if (PR_isWordChar(ch)) { state = 3; } else if ('<' == ch) { state = 1; } else { state = 0; } break; case 2: if (PR_isWordChar(ch)) { state = 3; } else if ('<' == ch) { state = 1; } else { state = 0; } break; case 3: if ('>' == ch) { if (pos > start) { chunks.push(new PR_Token(s.substring(start, pos), PR_PLAIN)); } chunks.push(new PR_Token(s.substring(pos, i + 1), null)); start = i + 1; pos = -1; state = 0; } break; } } if (s.length > start) { chunks.push(new PR_Token(s.substring(start, s.length), PR_PLAIN)); } return chunks; } /** splits chunks around entities. * @private */ function PR_splitEntities(chunks) { var chunksOut = new Array(); var state = 0; for (var ci = 0, nc = chunks.length; ci < nc; ++ci) { var chunk = chunks[ci]; if (PR_PLAIN != chunk.style) { chunksOut.push(chunk); continue; } var s = chunk.token; var pos = 0; var start; for (var i = 0; i < s.length; ++i) { var ch = s.charAt(i); switch (state) { case 0: if ('&' == ch) { state = 1; } break; case 1: if ('#' == ch || PR_isWordChar(ch)) { start = i - 1; state = 2; } else { state = 0; } break; case 2: if (';' == ch) { if (start > pos) { chunksOut.push( new PR_Token(s.substring(pos, start), chunk.style)); } chunksOut.push(new PR_Token(s.substring(start, i + 1), null)); pos = i + 1; state = 0; } break; } } if (s.length > pos) { chunksOut.push(pos ? new PR_Token(s.substring(pos, s.length), chunk.style) : chunk); } } return chunksOut; } /** walk the tokenEnds list and the chunk list in parallel to generate a list * of split tokens. * @private */ function PR_splitChunks(chunks, tokenEnds) { var tokens = new Array(); // the output var ci = 0; // index into chunks // position of beginning of amount written so far in absolute space. var posAbs = 0; // position of amount written so far in chunk space var posChunk = 0; // current chunk var chunk = new PR_Token('', null); for (var ei = 0, ne = tokenEnds.length; ei < ne; ++ei) { var tokenEnd = tokenEnds[ei]; var end = tokenEnd.end; var tokLen = end - posAbs; var remainingInChunk = chunk.token.length - posChunk; while (remainingInChunk <= tokLen) { if (remainingInChunk > 0) { tokens.push( new PR_Token(chunk.token.substring(posChunk, chunk.token.length), null == chunk.style ? null : tokenEnd.style)); } posAbs += remainingInChunk; posChunk = 0; if (ci < chunks.length) { chunk = chunks[ci++]; } tokLen = end - posAbs; remainingInChunk = chunk.token.length - posChunk; } if (tokLen) { tokens.push( new PR_Token(chunk.token.substring(posChunk, posChunk + tokLen), tokenEnd.style)); posAbs += tokLen; posChunk += tokLen; } } return tokens; } /** splits markup tokens into declarations, tags, and source chunks. * @private */ function PR_splitMarkup(chunks) { // A state machine to split out declarations, tags, etc. // This state machine deals with absolute space in the text, indexed by k, // and position in the current chunk, indexed by pos and tokenStart to // generate a list of the ends of tokens. // Absolute space is calculated by considering the chunks as appended into // one big string, as they were before being split. // Known failure cases // Server side scripting sections such as ...?> in attributes. // i.e. // Handling this would require a stack, and we don't use PHP. // The output: a list of pairs of PR_TokenEnd instances var tokenEnds = new Array(); var state = 0; // FSM state variable var k = 0; // position in absolute space of the start of the current chunk var tokenStart = -1; // the start of the current token // Try to find a closing tag for any open