// Copyright 2005 Google Inc. // All Rights Reserved. // // msamuel@google.com // Usage: // 1) include this source file in an html page via // <script type=text/javascript src=prettify.js></script> // 2) define style rules. See the example page for examples. // 3) mark the <pre> and <code> tags in your source with class=prettyprint. // You can also use the (html deprecated) <xmp> tag, but the pretty printer // needs to do more substantial DOM manipulations to support that, so some // css styles may not be preserved. // Change log: // cbeust, 2006/08/22 // Java annotations (start with "@") are now captured as literals ("lit") // var PR_keywords = new Object(); /** initialize the keyword list for our target languages. */ (function () { var CPP_KEYWORDS = ( "bool break case catch char class const const_cast continue default " + "delete deprecated dllexport dllimport do double dynamic_cast else enum " + "explicit extern false float for friend goto if inline int long mutable " + "naked namespace new noinline noreturn nothrow novtable operator private " + "property protected public register reinterpret_cast return selectany " + "short signed sizeof static static_cast struct switch template this " + "thread throw true try typedef typeid typename union unsigned using " + "declaration, using directive uuid virtual void volatile while typeof"); var JAVA_KEYWORDS = ( "abstract default goto package synchronized boolean do if private this " + "break double implements protected throw byte else import public throws " + "case enum instanceof return transient catch extends int short try char " + "final interface static void class finally long strictfp volatile const " + "float native super while continue for new switch"); var PYTHON_KEYWORDS = ( "and assert break class continue def del elif else except exec finally " + "for from global if import in is lambda not or pass print raise return " + "try while yield"); var JSCRIPT_KEYWORDS = ( "abstract boolean break byte case catch char class const continue " + "debugger default delete do double else enum export extends false final " + "finally float for function goto if implements import in instanceof int " + "interface long native new null package private protected public return " + "short static super switch synchronized this throw throws transient " + "true try typeof var void volatile while with NaN Infinity"); var PERL_KEYWORDS = ( "foreach require sub unless until use elsif BEGIN END"); var SH_KEYWORDS = ( "if then do else fi end"); var KEYWORDS = [CPP_KEYWORDS, JAVA_KEYWORDS, PYTHON_KEYWORDS, JSCRIPT_KEYWORDS, PERL_KEYWORDS, SH_KEYWORDS]; for (var k = 0; k < KEYWORDS.length; k++) { var kw = KEYWORDS[k].split(' '); for (var i = 0; i < kw.length; i++) { if (kw[i]) { PR_keywords[kw[i]] = true; } } } }).call(this); // token style names. correspond to css classes /** token style for a string literal */ var PR_STRING = 'str'; /** token style for a keyword */ var PR_KEYWORD = 'kwd'; /** token style for a comment */ var PR_COMMENT = 'com'; /** token style for a type */ var PR_TYPE = 'typ'; /** token style for a literal value. e.g. 1, null, true. */ var PR_LITERAL = 'lit'; /** token style for a punctuation string. */ var PR_PUNCTUATION = 'pun'; /** token style for a punctuation string. */ var PR_PLAIN = 'pln'; /** token style for an sgml tag. */ var PR_TAG = 'tag'; /** token style for a markup declaration such as a DOCTYPE. */ var PR_DECLARATION = 'dec'; /** token style for embedded source. */ var PR_SOURCE = 'src'; /** token style for an sgml attribute name. */ var PR_ATTRIB_NAME = 'atn'; /** token style for an sgml attribute value. */ var PR_ATTRIB_VALUE = 'atv'; /** the position of the end of a token during. A division of a string into * n tokens can be represented as a series n - 1 token ends, as long as * runs of whitespace warrant their own token. * @private */ function PR_TokenEnd(end, style) { if (undefined === style) { throw new Error('BAD'); } if ('number' != typeof(end)) { throw new Error('BAD'); } this.end = end; this.style = style; } PR_TokenEnd.prototype.toString = function () { return '[PR_TokenEnd ' + this.end + (this.style ? ':' + this.style : '') + ']'; }; /** a chunk of text with a style. These are used to represent both the output * from the lexing functions as well as intermediate results. * @constructor * @param token the token text * @param style one of the token styles defined in designdoc-template, or null * for a styleless token, such as an embedded html tag. * @private */ function PR_Token(token, style) { if (undefined === style) { throw new Error('BAD'); } this.token = token; this.style = style; } PR_Token.prototype.toString = function () { return '[PR_Token ' + this.token + (this.style ? ':' + this.style : '') + ']'; }; /** a helper class that decodes common html entities used to escape source and * markup punctuation characters in html. * @constructor * @private */ function PR_DecodeHelper() { this.next = 0; this.ch = '\0'; } PR_DecodeHelper.prototype.decode = function (s, i) { var next = i + 1; var ch = s.charAt(i); if ('&' == ch) { var semi = s.indexOf(';', next); if (semi >= 0 && semi < next + 4) { var entityName = s.substring(next, semi).toLowerCase(); next = semi + 1; if ('lt' == entityName) { ch = '<'; } else if ('gt' == entityName) { ch = '>'; } else if ('quot' == entityName) { ch = '"'; } else if ('apos' == entityName) { ch = '\''; } else if ('amp' == entityName) { ch = '&'; } else { next = i + 1; } } } this.next = next; this.ch = ch; return this.ch; } // some string utilities function PR_isWordChar(ch) { return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'); } function PR_isIdentifierStart(ch) { return PR_isWordChar(ch) || ch == '_' || ch == '$' || ch == '@'; } function PR_isIdentifierPart(ch) { return PR_isIdentifierStart(ch) || PR_isDigitChar(ch); } function PR_isSpaceChar(ch) { return "\t \r\n".indexOf(ch) >= 0; } function PR_isDigitChar(ch) { return ch >= '0' && ch <= '9'; } function PR_trim(s) { var i = 0, j = s.length - 1; while (i <= j && PR_isSpaceChar(s.charAt(i))) { ++i; } while (j > i && PR_isSpaceChar(s.charAt(j))) { --j; } return s.substring(i, j + 1); } function PR_startsWith(s, prefix) { return s.length >= prefix.length && prefix == s.substring(0, prefix.length); } function PR_endsWith(s, suffix) { return s.length >= suffix.length && suffix == s.substring(s.length - suffix.length, s.length); } /** true iff prefix matches the first prefix characters in chars[0:len]. * @private */ function PR_prefixMatch(chars, len, prefix) { if (len < prefix.length) { return false; } for (var i = 0, n = prefix.length; i < n; ++i) { if (prefix.charAt(i) != chars[i]) { return false; } } return true; } /** used to convert html special characters embedded in XMP tags into html. */ function PR_textToHtml(str) { return str.replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>'); } /** split markup into chunks of html tags (style null) and * plain text (style {@link #PR_PLAIN}). * * @param s a String of html. * @return an Array of PR_Tokens of style PR_PLAIN and null. * @private */ function PR_chunkify(s) { var chunks = new Array(); var state = 0; var start = 0; var pos = -1; for (var i = 0, n = s.length; i < n; ++i) { var ch = s.charAt(i); switch (state) { case 0: if ('<' == ch) { state = 1; } break; case 1: pos = i - 1; if ('/' == ch) { state = 2; } else if (PR_isWordChar(ch)) { state = 3; } else if ('<' == ch) { state = 1; } else { state = 0; } break; case 2: if (PR_isWordChar(ch)) { state = 3; } else if ('<' == ch) { state = 1; } else { state = 0; } break; case 3: if ('>' == ch) { if (pos > start) { chunks.push(new PR_Token(s.substring(start, pos), PR_PLAIN)); } chunks.push(new PR_Token(s.substring(pos, i + 1), null)); start = i + 1; pos = -1; state = 0; } break; } } if (s.length > start) { chunks.push(new PR_Token(s.substring(start, s.length), PR_PLAIN)); } return chunks; } /** splits chunks around entities. * @private */ function PR_splitEntities(chunks) { var chunksOut = new Array(); var state = 0; for (var ci = 0, nc = chunks.length; ci < nc; ++ci) { var chunk = chunks[ci]; if (PR_PLAIN != chunk.style) { chunksOut.push(chunk); continue; } var s = chunk.token; var pos = 0; var start; for (var i = 0; i < s.length; ++i) { var ch = s.charAt(i); switch (state) { case 0: if ('&' == ch) { state = 1; } break; case 1: if ('#' == ch || PR_isWordChar(ch)) { start = i - 1; state = 2; } else { state = 0; } break; case 2: if (';' == ch) { if (start > pos) { chunksOut.push( new PR_Token(s.substring(pos, start), chunk.style)); } chunksOut.push(new PR_Token(s.substring(start, i + 1), null)); pos = i + 1; state = 0; } break; } } if (s.length > pos) { chunksOut.push(pos ? new PR_Token(s.substring(pos, s.length), chunk.style) : chunk); } } return chunksOut; } /** walk the tokenEnds list and the chunk list in parallel to generate a list * of split tokens. * @private */ function PR_splitChunks(chunks, tokenEnds) { var tokens = new Array(); // the output var ci = 0; // index into chunks // position of beginning of amount written so far in absolute space. var posAbs = 0; // position of amount written so far in chunk space var posChunk = 0; // current chunk var chunk = new PR_Token('', null); for (var ei = 0, ne = tokenEnds.length; ei < ne; ++ei) { var tokenEnd = tokenEnds[ei]; var end = tokenEnd.end; var tokLen = end - posAbs; var remainingInChunk = chunk.token.length - posChunk; while (remainingInChunk <= tokLen) { if (remainingInChunk > 0) { tokens.push( new PR_Token(chunk.token.substring(posChunk, chunk.token.length), null == chunk.style ? null : tokenEnd.style)); } posAbs += remainingInChunk; posChunk = 0; if (ci < chunks.length) { chunk = chunks[ci++]; } tokLen = end - posAbs; remainingInChunk = chunk.token.length - posChunk; } if (tokLen) { tokens.push( new PR_Token(chunk.token.substring(posChunk, posChunk + tokLen), tokenEnd.style)); posAbs += tokLen; posChunk += tokLen; } } return tokens; } /** splits markup tokens into declarations, tags, and source chunks. * @private */ function PR_splitMarkup(chunks) { // A state machine to split out declarations, tags, etc. // This state machine deals with absolute space in the text, indexed by k, // and position in the current chunk, indexed by pos and tokenStart to // generate a list of the ends of tokens. // Absolute space is calculated by considering the chunks as appended into // one big string, as they were before being split. // Known failure cases // Server side scripting sections such as <?...?> in attributes. // i.e. <span class="<? foo ?>"> // Handling this would require a stack, and we don't use PHP. // The output: a list of pairs of PR_TokenEnd instances var tokenEnds = new Array(); var state = 0; // FSM state variable var k = 0; // position in absolute space of the start of the current chunk var tokenStart = -1; // the start of the current token // Try to find a closing tag for any open <style> or <script> tags // We can't do this at a later stage because then the following case // would fail: // <script>document.writeln('<!--');</script> // We use tokenChars[:tokenCharsI] to accumulate the tag name so that we // can check whether to enter into a no scripting section when the tag ends. var tokenChars = new Array(12); var tokenCharsI = 0; // if non null, the tag prefix that we need to see to break out. var endScriptTag = null; var decodeHelper = new PR_DecodeHelper(); for (var ci = 0, nc = chunks.length; ci < nc; ++ci) { var chunk = chunks[ci]; if (PR_PLAIN != chunk.style) { k += chunk.token.length; continue; } var s = chunk.token; var pos = 0; // the position past the last character processed so far in s for (var i = 0, n = s.length; i < n; /* i = next at bottom */) { decodeHelper.decode(s, i); var ch = decodeHelper.ch; var next = decodeHelper.next; var tokenStyle = null; switch (state) { case 0: if ('<' == ch) { state = 1; } break; case 1: tokenCharsI = 0; if ('/' == ch) { // only consider close tags if we're in script/style state = 7; } else if (null == endScriptTag) { if ('!' == ch) { state = 2; } else if (PR_isWordChar(ch)) { state = 8; } else if ('?' == ch) { state = 9; } else if ('%' == ch) { state = 11; } else if ('<' != ch) { state = 0; } } else if ('<' != ch) { state = 0; } break; case 2: if ('-' == ch) { state = 4; } else if (PR_isWordChar(ch)) { state = 3; } else if ('<' == ch) { state = 1; } else { state = 0; } break; case 3: if ('>' == ch) { state = 0; tokenStyle = PR_DECLARATION; } break; case 4: if ('-' == ch) { state = 5; } break; case 5: if ('-' == ch) { state = 6; } break; case 6: if ('>' == ch) { state = 0; tokenStyle = PR_COMMENT; } else if ('-' == ch) { state = 6; } else { state = 4; } break; case 7: if (PR_isWordChar(ch)) { state = 8; } else if ('<' == ch) { state = 1; } else { state = 0; } break; case 8: if ('>' == ch) { state = 0; tokenStyle = PR_TAG; } break; case 9: if ('?' == ch) { state = 10; } break; case 10: if ('>' == ch) { state = 0; tokenStyle = PR_SOURCE; } else if ('?' != ch) { state = 9; } break; case 11: if ('%' == ch) { state = 12; } break; case 12: if ('>' == ch) { state = 0; tokenStyle = PR_SOURCE; } else if ('%' != ch) { state = 11; } break; } if (tokenCharsI < tokenChars.length) { tokenChars[tokenCharsI++] = ch.toLowerCase(); } if (1 == state) { tokenStart = k + i; } i = next; if (tokenStyle != null) { if (null != tokenStyle) { if (endScriptTag) { if (PR_prefixMatch(tokenChars, tokenCharsI, endScriptTag)) { endScriptTag = null; } } else { if (PR_prefixMatch(tokenChars, tokenCharsI, 'script')) { endScriptTag = '/script'; } else if (PR_prefixMatch(tokenChars, tokenCharsI, 'style')) { endScriptTag = '/style'; } else if (PR_prefixMatch(tokenChars, tokenCharsI, 'xmp')) { endScriptTag = '/xmp'; } } // disallow the tag if endScriptTag is set and this was not an open // tag. if (endScriptTag && tokenCharsI && '/' == tokenChars[0]) { tokenStyle = null; } } if (null != tokenStyle) { tokenEnds.push(new PR_TokenEnd(tokenStart, PR_PLAIN)); tokenEnds.push(new PR_TokenEnd(k + next, tokenStyle)); } } } k += chunk.token.length; } tokenEnds.push(new PR_TokenEnd(k, PR_PLAIN)); return tokenEnds; } /** splits the given string into comment, string, and "other" tokens. * @return an array of PR_Tokens with style in * (PR_STRING, PR_COMMENT, PR_PLAIN, null) * The result array may contain spurious zero length tokens. Ignore them. * * @private */ function PR_splitStringAndCommentTokens(chunks) { // a state machine to split out comments, strings, and other stuff var tokenEnds = new Array(); // positions of ends of tokens in absolute space var state = 0; // FSM state variable var delim = -1; // string delimiter var k = 0; // absolute position of beginning of current chunk for (var ci = 0, nc = chunks.length; ci < nc; ++ci) { var chunk = chunks[ci]; var s = chunk.token; if (PR_PLAIN == chunk.style) { for (var i = 0, n = s.length; i < n; ++i) { var ch = s.charAt(i); if (0 == state) { if (ch == '"' || ch == '\'' || ch == '`') { tokenEnds.push(new PR_TokenEnd(k + i, PR_PLAIN)); state = 1; delim = ch; } else if (ch == '/') { state = 3; } else if (ch == '#') { tokenEnds.push(new PR_TokenEnd(k + i, PR_PLAIN)); state = 4; } } else if (1 == state) { if (ch == delim) { state = 0; tokenEnds.push(new PR_TokenEnd(k + i + 1, PR_STRING)); } else if (ch == '\\') { state = 2; } } else if (2 == state) { state = 1; } else if (3 == state) { if (ch == '/') { state = 4; tokenEnds.push(new PR_TokenEnd(k + i - 1, PR_PLAIN)); } else if (ch == '*') { state = 5; tokenEnds.push(new PR_TokenEnd(k + i - 1, PR_PLAIN)); } else { state = 0; // next loop will reenter state 0 without same value of i, so // ch will be reconsidered as start of new token. --i; } } else if (4 == state) { if (ch == '\r' || ch == '\n') { state = 0; tokenEnds.push(new PR_TokenEnd(k + i, PR_COMMENT)); } } else if (5 == state) { if (ch == '*') { state = 6; } } else if (6 == state) { if (ch == '/') { state = 0; tokenEnds.push(new PR_TokenEnd(k + i + 1, PR_COMMENT)); } else if (ch != '*') { state = 5; } } } } k += s.length; } tokenEnds.push(new PR_TokenEnd(k, PR_PLAIN)); // a token ends at the end return PR_splitChunks(chunks, tokenEnds); } /** used by lexSource to split a non string, non comment token. * @private */ function PR_splitNonStringNonCommentToken(s, outlist) { var pos = 0; var state = 0; for (var i = 0; i <= s.length; i++) { var ch = s.charAt(i); // the next state. // if set to -1 then it will cause a reentry to state 0 without consuming // another character. var nstate = state; if (i == s.length) { // nstate will not be equal to state, so it will append the token nstate = -2; } else { switch (state) { case 0: // whitespace state if (PR_isIdentifierStart(ch)) { nstate = 1; } else if (PR_isDigitChar(ch)) { nstate = 2; } else if (!PR_isSpaceChar(ch)) { nstate = 3; } if (nstate && pos < i) { var t = s.substring(pos, i); outlist.push(new PR_Token(t, PR_PLAIN)); pos = i; } break; case 1: // identifier state if (!PR_isIdentifierPart(ch)) { nstate = -1; } break; case 2: // number literal state // handle numeric literals like // 0x7f 300UL 100_000 // this does not treat floating point values as a single literal // 0.1 and 3e-6 // are each split into multiple tokens if (!(PR_isDigitChar(ch) || PR_isWordChar(ch) || ch == '_')) { nstate = -1; } break; case 3: // punctuation state if (PR_isIdentifierStart(ch) || PR_isDigitChar(ch) || PR_isSpaceChar(ch)) { nstate = -1; } break; } } if (nstate != state) { if (nstate < 0) { if (i > pos) { var t = s.substring(pos, i); var ch0 = t.charAt(0); var style; if (PR_isIdentifierStart(ch0)) { if (PR_keywords[t]) { style = PR_KEYWORD; } else if (ch0 == '@') { style = PR_LITERAL; } else { // Treat any word that starts with an uppercase character and // contains at least one lowercase character as a type, or // ends with _t. // This works perfectly for Java, pretty well for C++, and // passably for Python. The _t catches C structs. var isType = false; if (ch0 >= 'A' && ch0 <= 'Z') { for (var j = 1; j < t.length; j++) { var ch1 = t.charAt(j); if (ch1 >= 'a' && ch1 <= 'z') { isType = true; break; } } if (!isType && t.length >= 2 && t.substring(t.length - 2) == '_t') { isType = true; } } style = isType ? PR_TYPE : PR_PLAIN; } } else if (PR_isDigitChar(ch0)) { style = PR_LITERAL; } else if (!PR_isSpaceChar(ch0)) { style = PR_PUNCTUATION; } else { style = PR_PLAIN; } pos = i; outlist.push(new PR_Token(t, style)); } state = 0; if (nstate == -1) { // don't increment. This allows us to use state 0 to redispatch based // on the current character. i--; continue; } } state = nstate; } } } /** split a group of chunks of markup. * @private */ function PR_tokenizeMarkup(chunks) { if (!(chunks && chunks.length)) { return chunks; } var tokenEnds = PR_splitMarkup(chunks); return PR_splitChunks(chunks, tokenEnds); } /** split tags attributes and their values out from the tag name, and * recursively lex source chunks. * @private */ function PR_splitTagAttributes(tokens) { var tokensOut = new Array(); var state = 0; var stateStyle = PR_TAG; var delim = null; // attribute delimiter for quoted value state. var decodeHelper = new PR_DecodeHelper(); for (var ci = 0; ci < tokens.length; ++ci) { var tok = tokens[ci]; if (PR_TAG == tok.style) { var s = tok.token; var start = 0; for (var i = 0; i < s.length; /* i = next at bottom */) { decodeHelper.decode(s, i); var ch = decodeHelper.ch; var next = decodeHelper.next; var emitEnd = null; // null or position of end of chunk to emit. var nextStyle = null; // null or next value of stateStyle if (ch == '>') { if (PR_TAG != stateStyle) { emitEnd = i; nextStyle = PR_TAG; } } else { switch (state) { case 0: if ('<' == ch) { state = 1; } break; case 1: if (PR_isSpaceChar(ch)) { state = 2; } break; case 2: if (!PR_isSpaceChar(ch)) { nextStyle = PR_ATTRIB_NAME; emitEnd = i; state = 3; } break; case 3: if ('=' == ch) { emitEnd = i; nextStyle = PR_TAG; state = 5; } else if (PR_isSpaceChar(ch)) { emitEnd = i; nextStyle = PR_TAG; state = 4; } break; case 4: if ('=' == ch) { state = 5; } else if (!PR_isSpaceChar(ch)) { emitEnd = i; nextStyle = PR_ATTRIB_NAME; state = 3; } break; case 5: if ('"' == ch || '\'' == ch) { emitEnd = i; nextStyle = PR_ATTRIB_VALUE; state = 6; delim = ch; } else if (!PR_isSpaceChar(ch)) { emitEnd = i; nextStyle = PR_ATTRIB_VALUE; state = 7; } break; case 6: if (ch == delim) { emitEnd = next; nextStyle = PR_TAG; state = 2; } break; case 7: if (PR_isSpaceChar(ch)) { emitEnd = i; nextStyle = PR_TAG; state = 2; } break; } } if (emitEnd) { if (emitEnd > start) { tokensOut.push( new PR_Token(s.substring(start, emitEnd), stateStyle)); start = emitEnd; } stateStyle = nextStyle; } i = next; } if (s.length > start) { tokensOut.push(new PR_Token(s.substring(start, s.length), stateStyle)); } } else { if (tok.style) { state = 0; stateStyle = PR_TAG; } tokensOut.push(tok); } } return tokensOut; } /** identify regions of markup that are really source code, and recursivley * lex them. * @private */ function PR_splitSourceNodes(tokens) { var tokensOut = new Array(); // when we see a <script> tag, store '/' here so that we know to end the // source processing var endScriptTag = null; var decodeHelper = new PR_DecodeHelper(); var sourceChunks = null; for (var ci = 0, nc = tokens.length; ci < nc; ++ci) { var tok = tokens[ci]; if (null == tok.style) { tokens.push(tok); continue; } var s = tok.token; if (null == endScriptTag) { if (PR_SOURCE == tok.style) { // split off any starting and trailing <?, <% if ('<' == decodeHelper.decode(s, 0)) { decodeHelper.decode(s, decodeHelper.next); if ('%' == decodeHelper.ch || '?' == decodeHelper.ch) { endScriptTag = decodeHelper.ch; tokensOut.push(new PR_Token(s.substring(0, decodeHelper.next), PR_TAG)); s = s.substring(decodeHelper.next, s.length); } } } else if (PR_TAG == tok.style) { if ('<' == decodeHelper.decode(s, 0) && '/' != s.charAt(decodeHelper.next)) { var tagContent = s.substring(decodeHelper.next).toLowerCase(); // FIXME(msamuel): this does not mirror exactly the code in // in PR_splitMarkup that defers splitting tags inside script and // style blocks. if (PR_startsWith(tagContent, 'script') || PR_startsWith(tagContent, 'style') || PR_startsWith(tagContent, 'xmp')) { endScriptTag = '/'; } } } } if (null != endScriptTag) { var endTok = null; if (PR_SOURCE == tok.style) { if (endScriptTag == '%' || endScriptTag == '?') { var pos = s.lastIndexOf(endScriptTag); if (pos >= 0 && '>' == decodeHelper.decode(s, pos + 1) && s.length == decodeHelper.next) { endTok = new PR_Token(s.substring(pos, s.length), PR_TAG); s = s.substring(0, pos); } } if (null == sourceChunks) { sourceChunks = new Array(); } sourceChunks.push(new PR_Token(s, PR_PLAIN)); } else if (PR_PLAIN == tok.style) { if (null == sourceChunks) { sourceChunks = new Array(); } sourceChunks.push(tok); } else if (PR_TAG == tok.style) { // if it starts with </ then it must be the end tag. if ('<' == decodeHelper.decode(tok.token, 0) && tok.token.length > decodeHelper.next && '/' == decodeHelper.decode(tok.token, decodeHelper.next)) { endTok = tok; } else { tokensOut.push(tok); } } else { if (sourceChunks) { sourceChunks.push(tok); } else { // push remaining tag and attribute tokens from the opening tag tokensOut.push(tok); } } if (endTok) { if (sourceChunks) { var sourceTokens = PR_lexSource(sourceChunks); tokensOut.push(new PR_Token('<span class=embsrc>', null)); for (var si = 0, ns = sourceTokens.length; si < ns; ++si) { tokensOut.push(sourceTokens[si]); } tokensOut.push(new PR_Token('</span>', null)); sourceChunks = null; } tokensOut.push(endTok); endScriptTag = null; } } else { tokensOut.push(tok); } } return tokensOut; } /** splits the quotes from an attribute value. * ['"foo"'] -> ['"', 'foo', '"'] * @private */ function PR_splitAttributeQuotes(tokens) { var firstPlain = null, lastPlain = null; for (var i = 0; i < tokens.length; ++i) { if (PR_PLAIN = tokens[i].style) { firstPlain = i; break; } } for (var i = tokens.length; --i >= 0;) { if (PR_PLAIN = tokens[i].style) { lastPlain = i; break; } } if (null == firstPlain) { return tokens; } var decodeHelper = new PR_DecodeHelper(); var fs = tokens[firstPlain].token; var fc = decodeHelper.decode(fs, 0); if ('"' != fc && '\'' != fc) { return tokens; } var fpos = decodeHelper.next; var ls = tokens[lastPlain].token; var lpos = ls.lastIndexOf('&'); if (lpos < 0) { lpos = ls.length - 1; } var lc = decodeHelper.decode(ls, lpos); if (lc != fc || decodeHelper.next != ls.length) { lc = null; lpos = ls.length; } var tokensOut = new Array(); for (var i = 0; i < firstPlain; ++i) { tokensOut.push(tokens[i]); } tokensOut.push(new PR_Token(fs.substring(0, fpos), PR_ATTRIB_VALUE)); if (lastPlain == firstPlain) { tokensOut.push(new PR_Token(fs.substring(fpos, lpos), PR_PLAIN)); } else { tokensOut.push(new PR_Token(fs.substring(fpos, fs.length), PR_PLAIN)); for (var i = firstPlain + 1; i < lastPlain; ++i) { tokensOut.push(tokens[i]); } if (lc) { tokens.push(new PR_Token(ls.substring(0, lpos), PR_PLAIN)); } else { tokens.push(tokens[lastPlain]); } } if (lc) { tokensOut.push(new PR_Token(ls.substring(lpos, ls.length), PR_PLAIN)); } for (var i = lastPlain + 1; i < tokens.length; ++i) { tokensOut.push(tokens[i]); } return tokensOut; } /** identify attribute values that really contain source code and recursively * lex them. * @private */ function PR_splitSourceAttributes(tokens) { var tokensOut = new Array(); var sourceChunks = null; var inSource = false; var name = ''; for (var ci = 0, nc = tokens.length; ci < nc; ++ci) { var tok = tokens[ci]; var outList = tokensOut; if (PR_TAG == tok.style) { if (inSource) { inSource = false; name = ''; if (sourceChunks) { tokensOut.push(new PR_Token('<span class=embsrc>', null)); var sourceTokens = PR_lexSource(PR_splitAttributeQuotes(sourceChunks)); for (var si = 0, ns = sourceTokens.length; si < ns; ++si) { tokensOut.push(sourceTokens[si]); } tokensOut.push(new PR_Token('</span>', null)); sourceChunks = null; } } else if (name && tok.token.indexOf('=') >= 0) { var nameLower = name.toLowerCase(); if (PR_startsWith(nameLower, 'on') || 'style' == nameLower) { inSource = true; } } else { name = ''; } } else if (PR_ATTRIB_NAME == tok.style) { name += tok.token; } else if (PR_ATTRIB_VALUE == tok.style) { if (inSource) { if (null == sourceChunks) { sourceChunks = new Array(); } outList = sourceChunks; tok = new PR_Token(tok.token, PR_PLAIN); } } else { if (sourceChunks) { outList = sourceChunks; } } outList.push(tok); } return tokensOut; } /** returns a list of PR_Token objects given chunks of source code. * * This code assumes that < tokens are html escaped, but " are not. * It will do a resonable job with <, but will not recognize an " * as starting a string. * * This code treats ", ', and ` as string delimiters, and \ as a string escape. * It does not recognize double delimiter escapes, or perl's qq() style * strings. * * It recognizes C, C++, and shell style comments. * * @param chunks PR_Tokens with style in (null, PR_PLAIN) */ function PR_lexSource(chunks) { // positions of ends of tokens in order var tokensIn = PR_splitStringAndCommentTokens(chunks); // split entities out of so that we know to treat them as single units. tokensIn = PR_splitEntities(tokensIn); // split non comment|string tokens on whitespace and word boundaries var tokensOut = new Array(); for (var i = 0; i < tokensIn.length; ++i) { var tok = tokensIn[i]; var t = tok.token; var s = tok.style; if (PR_PLAIN == s) { PR_splitNonStringNonCommentToken(t, tokensOut); continue; } tokensOut.push(tok); } return tokensOut; } /** returns a list of PR_Token objects given a string of markup. * * This code assumes that < tokens are html escaped, but " are not. * It will do a resonable job with <, but will not recognize an " * as starting a string. * * This code recognizes a number of constructs. * <!-- ... --> comment * <!\w ... > declaration * <\w ... > tag * </\w ... > tag * <?...?> embedded source * &[#\w]...; entity * * It does not recognizes %foo; entities. * * It will recurse into any <style>, <script>, and on* attributes using * PR_lexSource. */ function PR_lexMarkup(chunks) { // This function works as follows: // 1) Start by splitting the markup into text and tag chunks // Input: String s // Output: List<PR_Token> where style in (PR_PLAIN, null) // 2) Then split the text chunks further into comments, declarations, // tags, etc. // After each split, consider whether the token is the start of an // embedded source section, i.e. is an open <script> tag. If it is, // find the corresponding close token, and don't bother to lex in between. // Input: List<String> // Output: List<PR_Token> with style in (PR_TAG, PR_PLAIN, PR_SOURCE, null) // 3) Finally go over each tag token and split out attribute names and values. // Input: List<PR_Token> // Output: List<PR_Token> where style in // (PR_TAG, PR_PLAIN, PR_SOURCE, NAME, VALUE, null) var tokensOut = PR_tokenizeMarkup(chunks); tokensOut = PR_splitTagAttributes(tokensOut); tokensOut = PR_splitSourceNodes(tokensOut); tokensOut = PR_splitSourceAttributes(tokensOut); return tokensOut; } /** classify the string as either source or markup and lex appropriately. */ function PR_lexOne(s) { var chunks = PR_chunkify(s); // treat it as markup if the first non whitespace character is a < and the // last non-whitespace character is a > var isMarkup = false; for (var i = 0; i < chunks.length; ++i) { if (PR_PLAIN == chunks[i].style) { if (PR_startsWith(PR_trim(chunks[i].token), '<')) { for (var j = chunks.length; --j >= 0;) { if (PR_PLAIN == chunks[j].style) { isMarkup = PR_endsWith(PR_trim(chunks[j].token), '>'); break; } } } break; } } return isMarkup ? PR_lexMarkup(chunks) : PR_lexSource(chunks); } /** pretty print a chunk of code. * * @param s code as html * @return code as html, but prettier */ function prettyPrintOne(s) { try { var tokens = PR_lexOne(s); var out = ''; var lastStyle = null; for (var i = 0; i < tokens.length; i++) { var t = tokens[i]; if (t.style != lastStyle) { if (lastStyle != null) { out += '</span>'; } if (t.style != null) { out += '<span class=' + t.style + '>'; } lastStyle = t.style; } var html = t.token; if (null != t.style) { // This interacts badly with the wiki which introduces paragraph tags // int pre blocks for some strange reason. // It's necessary for IE though which seems to lose the preformattedness // of <pre> tags when their innerHTML is assigned. html = html.replace(/(?:\r\n?)|\n/g, '<br>').replace(/ /g, ' '); } out += html; } if (lastStyle != null) { out += '</span>'; } return out; } catch (e) { //alert(e.stack); // DISABLE in production return s; } } /** find all the < pre > and < code > tags in the DOM with class=prettyprint and * prettify them. */ function prettyPrint() { // fetch a list of nodes to rewrite var codeSegments = [ document.getElementsByTagName('pre'), document.getElementsByTagName('code'), document.getElementsByTagName('xmp') ]; var elements = []; for (var i = 0; i < codeSegments.length; ++i) { for (var j = 0; j < codeSegments[i].length; ++j) { elements.push(codeSegments[i][j]); } } codeSegments = null; // the loop is broken into a series of continuations to make sure that we // don't make the browser unresponsive when rewriting a large page. var k = 0; function doWork() { var endTime = new Date().getTime() + 250; for (; k < elements.length && new Date().getTime() < endTime; k++) { var cs = elements[k]; if (cs.className && cs.className.indexOf('prettyprint') >= 0) { // make sure this is not nested in an already prettified element var nested = false; for (var p = cs.parentNode; p != null; p = p.parentNode) { if ((p.tagName == 'pre' || p.tagName == 'code' || p.tagName == 'xmp') && p.className && p.className.indexOf('prettyprint') >= 0) { nested = true; break; } } if (!nested) { // XMP tags contain unescaped entities so require special handling. var isRawContent = 'XMP' == cs.tagName; // fetch the content as a snippet of properly escaped HTML var content = cs.innerHTML; if (isRawContent) { content = PR_textToHtml(content); } // do the pretty printing var newContent = prettyPrintOne(content); // push the prettified html back into the tag. if (!isRawContent) { // just replace the old html with the new cs.innerHTML = newContent; } else { // we need to change the tag to a <pre> since <xmp>s do not allow // embedded tags such as the span tags used to attach styles to // sections of source code. var pre = document.createElement('PRE'); for (var i = 0; i < cs.attributes.length; ++i) { var a = cs.attributes[i]; if (a.specified) { pre.setAttribute(a.name, a.value); } } pre.innerHTML = newContent; // remove the old cs.parentNode.replaceChild(pre, cs); } } } } if (k < elements.length) { // finish up in a continuation setTimeout(doWork, 250); } } doWork(); }