* * For a fairly comprehensive set of languages see the * README * file that came with this source. At a minimum, the lexer should work on a * number of languages including C and friends, Java, Python, Bash, SQL, HTML, * XML, CSS, Javascript, and Makefiles. It works passably on Ruby, PHP and Awk * and a subset of Perl, but, because of commenting conventions, doesn't work on * Smalltalk, Lisp-like, or CAML-like languages without an explicit lang class. *
* Usage:
} and {@code } tags in your source with * {@code class=prettyprint.} * You can also use the (html deprecated) {@code } tag, but the pretty * printer needs to do more substantial DOM manipulations to support that, so * some css styles may not be preserved. *
} tags in your source with * {@code class=prettyprint.} * You can also use the (html deprecated) {@code } tag, but the pretty * printer needs to do more substantial DOM manipulations to support that, so * some css styles may not be preserved. *
} or {@code } element to specify the * language, as in {@code }. Any class that * starts with "lang-" followed by a file extension, specifies the file type. * See the "lang-*.js" files in this directory for code that implements * per-language file handlers. * * Change log: * cbeust, 2006/08/22 * * Java annotations (start with "@") are now captured as literals ("lit") * * @requires console */ // JSLint declarations /*global console, document, navigator, setTimeout, window */ /** * Split {@code prettyPrint} into multiple timeouts so as not to interfere with * UI events. * If set to {@code false}, {@code prettyPrint()} is synchronous. */ window['PR_SHOULD_USE_CONTINUATION'] = true; /** the number of characters between tab columns */ window['PR_TAB_WIDTH'] = 8; /** Walks the DOM returning a properly escaped version of innerHTML. * @param {Node} node * @param {Array.} out output buffer that receives chunks of HTML. */ window['PR_normalizedHtml'] /** Contains functions for creating and registering new language handlers. * @type {Object} */ = window['PR'] /** Pretty print a chunk of code. * * @param {string} sourceCodeHtml code as html * @return {string} code as html, but prettier */ = window['prettyPrintOne'] /** Find all the {@code } and {@code } tags in the DOM with * {@code class=prettyprint} and prettify them. * @param {Function?} opt_whenDone if specified, called when the last entry * has been finished. */ = window['prettyPrint'] = void 0; /** browser detection. @extern @returns false if not IE, otherwise the major version. */ window['_pr_isIE6'] = function () { var ieVersion = navigator && navigator.userAgent && navigator.userAgent.match(/\bMSIE ([678])\./); ieVersion = ieVersion ? +ieVersion[1] : false; window['_pr_isIE6'] = function () { return ieVersion; }; return ieVersion; }; (function () { // Keyword lists for various languages. var FLOW_CONTROL_KEYWORDS = "break continue do else for if return while "; var C_KEYWORDS = FLOW_CONTROL_KEYWORDS + "auto case char const default " + "double enum extern float goto int long register short signed sizeof " + "static struct switch typedef union unsigned void volatile "; var COMMON_KEYWORDS = C_KEYWORDS + "catch class delete false import " + "new operator private protected public this throw true try typeof "; var CPP_KEYWORDS = COMMON_KEYWORDS + "alignof align_union asm axiom bool " + "concept concept_map const_cast constexpr decltype " + "dynamic_cast explicit export friend inline late_check " + "mutable namespace nullptr reinterpret_cast static_assert static_cast " + "template typeid typename using virtual wchar_t where "; var JAVA_KEYWORDS = COMMON_KEYWORDS + "abstract boolean byte extends final finally implements import " + "instanceof null native package strictfp super synchronized throws " + "transient "; var CSHARP_KEYWORDS = JAVA_KEYWORDS + "as base by checked decimal delegate descending dynamic event " + "fixed foreach from group implicit in interface internal into is lock " + "object out override orderby params partial readonly ref sbyte sealed " + "stackalloc string select uint ulong unchecked unsafe ushort var "; var COFFEE_KEYWORDS = "all and by catch class else extends false finally " + "for if in is isnt loop new no not null of off on or return super then " + "true try unless until when while yes "; var JSCRIPT_KEYWORDS = COMMON_KEYWORDS + "debugger eval export function get null set undefined var with " + "Infinity NaN "; var PERL_KEYWORDS = "caller delete die do dump elsif eval exit foreach for " + "goto if import last local my next no our print package redo require " + "sub undef unless until use wantarray while BEGIN END "; var PYTHON_KEYWORDS = FLOW_CONTROL_KEYWORDS + "and as assert class def del " + "elif except exec finally from global import in is lambda " + "nonlocal not or pass print raise try with yield " + "False True None "; var RUBY_KEYWORDS = FLOW_CONTROL_KEYWORDS + "alias and begin case class def" + " defined elsif end ensure false in module next nil not or redo rescue " + "retry self super then true undef unless until when yield BEGIN END "; var SH_KEYWORDS = FLOW_CONTROL_KEYWORDS + "case done elif esac eval fi " + "function in local set then until "; var ALL_KEYWORDS = ( CPP_KEYWORDS + CSHARP_KEYWORDS + JSCRIPT_KEYWORDS + PERL_KEYWORDS + PYTHON_KEYWORDS + RUBY_KEYWORDS + SH_KEYWORDS); // token style names. correspond to css classes /** token style for a string literal */ var PR_STRING = 'str'; /** token style for a keyword */ var PR_KEYWORD = 'kwd'; /** token style for a comment */ var PR_COMMENT = 'com'; /** token style for a type */ var PR_TYPE = 'typ'; /** token style for a literal value. e.g. 1, null, true. */ var PR_LITERAL = 'lit'; /** token style for a punctuation string. */ var PR_PUNCTUATION = 'pun'; /** token style for a punctuation string. */ var PR_PLAIN = 'pln'; /** token style for an sgml tag. */ var PR_TAG = 'tag'; /** token style for a markup declaration such as a DOCTYPE. */ var PR_DECLARATION = 'dec'; /** token style for embedded source. */ var PR_SOURCE = 'src'; /** token style for an sgml attribute name. */ var PR_ATTRIB_NAME = 'atn'; /** token style for an sgml attribute value. */ var PR_ATTRIB_VALUE = 'atv'; /** * A class that indicates a section of markup that is not code, e.g. to allow * embedding of line numbers within code listings. */ var PR_NOCODE = 'nocode'; /** A set of tokens that can precede a regular expression literal in * javascript. * http://www.mozilla.org/js/language/js20/rationale/syntax.html has the full * list, but I've removed ones that might be problematic when seen in * languages that don't support regular expression literals. * * Specifically, I've removed any keywords that can't precede a regexp * literal in a syntactically legal javascript program, and I've removed the * "in" keyword since it's not a keyword in many languages, and might be used * as a count of inches. * * The link a above does not accurately describe EcmaScript rules since * it fails to distinguish between (a=++/b/i) and (a++/b/i) but it works * very well in practice. * * @private */ var REGEXP_PRECEDER_PATTERN = function () { var preceders = [ "!", "!=", "!==", "#", "%", "%=", "&", "&&", "&&=", "&=", "(", "*", "*=", /* "+", */ "+=", ",", /* "-", */ "-=", "->", /*".", "..", "...", handled below */ "/", "/=", ":", "::", ";", "<", "<<", "<<=", "<=", "=", "==", "===", ">", ">=", ">>", ">>=", ">>>", ">>>=", "?", "@", "[", "^", "^=", "^^", "^^=", "{", "|", "|=", "||", "||=", "~" /* handles =~ and !~ */, "break", "case", "continue", "delete", "do", "else", "finally", "instanceof", "return", "throw", "try", "typeof" ]; var pattern = '(?:^^|[+-]'; for (var i = 0; i < preceders.length; ++i) { pattern += '|' + preceders[i].replace(/([^=<>:&a-z])/g, '\\$1'); } pattern += ')\\s*'; // matches at end, and matches empty string return pattern; // CAVEAT: this does not properly handle the case where a regular // expression immediately follows another since a regular expression may // have flags for case-sensitivity and the like. Having regexp tokens // adjacent is not valid in any language I'm aware of, so I'm punting. // TODO: maybe style special characters inside a regexp as punctuation. }(); // Define regexps here so that the interpreter doesn't have to create an // object each time the function containing them is called. // The language spec requires a new object created even if you don't access // the $1 members. var pr_amp = /&/g; var pr_lt = //g; var pr_quot = /\"/g; /** like textToHtml but escapes double quotes to be attribute safe. */ function attribToHtml(str) { return str.replace(pr_amp, '&') .replace(pr_lt, '<') .replace(pr_gt, '>') .replace(pr_quot, '"'); } /** escapest html special characters to html. */ function textToHtml(str) { return str.replace(pr_amp, '&') .replace(pr_lt, '<') .replace(pr_gt, '>'); } var pr_ltEnt = /</g; var pr_gtEnt = />/g; var pr_aposEnt = /'/g; var pr_quotEnt = /"/g; var pr_ampEnt = /&/g; var pr_nbspEnt = / /g; /** unescapes html to plain text. */ function htmlToText(html) { var pos = html.indexOf('&'); if (pos < 0) { return html; } // Handle numeric entities specially. We can't use functional substitution // since that doesn't work in older versions of Safari. // These should be rare since most browsers convert them to normal chars. for (--pos; (pos = html.indexOf('', pos + 1)) >= 0;) { var end = html.indexOf(';', pos); if (end >= 0) { var num = html.substring(pos + 3, end); var radix = 10; if (num && num.charAt(0) === 'x') { num = num.substring(1); radix = 16; } var codePoint = parseInt(num, radix); if (!isNaN(codePoint)) { html = (html.substring(0, pos) + String.fromCharCode(codePoint) + html.substring(end + 1)); } } } return html.replace(pr_ltEnt, '<') .replace(pr_gtEnt, '>') .replace(pr_aposEnt, "'") .replace(pr_quotEnt, '"') .replace(pr_nbspEnt, ' ') .replace(pr_ampEnt, '&'); } /** is the given node's innerHTML normally unescaped? */ function isRawContent(node) { return 'XMP' === node.tagName; } var newlineRe = /[\r\n]/g; /** * Are newlines and adjacent spaces significant in the given node's innerHTML? */ function isPreformatted(node, content) { // PRE means preformatted, and is a very common case, so don't create // unnecessary computed style objects. if ('PRE' === node.tagName) { return true; } if (!newlineRe.test(content)) { return true; } // Don't care var whitespace = ''; // For disconnected nodes, IE has no currentStyle. if (node.currentStyle) { whitespace = node.currentStyle.whiteSpace; } else if (window.getComputedStyle) { // Firefox makes a best guess if node is disconnected whereas Safari // returns the empty string. whitespace = window.getComputedStyle(node, null).whiteSpace; } return !whitespace || whitespace === 'pre'; } function normalizedHtml(node, out, opt_sortAttrs) { switch (node.nodeType) { case 1: // an element var name = node.tagName.toLowerCase(); out.push('<', name); var attrs = node.attributes; var n = attrs.length; if (n) { if (opt_sortAttrs) { var sortedAttrs = []; for (var i = n; --i >= 0;) { sortedAttrs[i] = attrs[i]; } sortedAttrs.sort(function (a, b) { return (a.name < b.name) ? -1 : a.name === b.name ? 0 : 1; }); attrs = sortedAttrs; } for (var i = 0; i < n; ++i) { var attr = attrs[i]; if (!attr.specified) { continue; } out.push(' ', attr.name.toLowerCase(), '="', attribToHtml(attr.value), '"'); } } out.push('>'); for (var child = node.firstChild; child; child = child.nextSibling) { normalizedHtml(child, out, opt_sortAttrs); } if (node.firstChild || !/^(?:br|link|img)$/.test(name)) { out.push('<\/', name, '>'); } break; case 3: case 4: // text out.push(textToHtml(node.nodeValue)); break; } } /** * Given a group of {@link RegExp}s, returns a {@code RegExp} that globally * matches the union o the sets o strings matched d by the input RegExp. * Since it matches globally, if the input strings have a start-of-input * anchor (/^.../), it is ignored for the purposes of unioning. * @param {Array.} regexs non multiline, non-global regexs. * @return {RegExp} a global regex. */ function combinePrefixPatterns(regexs) { var capturedGroupIndex = 0; var needToFoldCase = false; var ignoreCase = false; for (var i = 0, n = regexs.length; i < n; ++i) { var regex = regexs[i]; if (regex.ignoreCase) { ignoreCase = true; } else if (/[a-z]/i.test(regex.source.replace( /\\u[0-9a-f]{4}|\\x[0-9a-f]{2}|\\[^ux]/gi, ''))) { needToFoldCase = true; ignoreCase = false; break; } } function decodeEscape(charsetPart) { if (charsetPart.charAt(0) !== '\\') { return charsetPart.charCodeAt(0); } switch (charsetPart.charAt(1)) { case 'b': return 8; case 't': return 9; case 'n': return 0xa; case 'v': return 0xb; case 'f': return 0xc; case 'r': return 0xd; case 'u': case 'x': return parseInt(charsetPart.substring(2), 16) || charsetPart.charCodeAt(1); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': return parseInt(charsetPart.substring(1), 8); default: return charsetPart.charCodeAt(1); } } function encodeEscape(charCode) { if (charCode < 0x20) { return (charCode < 0x10 ? '\\x0' : '\\x') + charCode.toString(16); } var ch = String.fromCharCode(charCode); if (ch === '\\' || ch === '-' || ch === '[' || ch === ']') { ch = '\\' + ch; } return ch; } function caseFoldCharset(charSet) { var charsetParts = charSet.substring(1, charSet.length - 1).match( new RegExp( '\\\\u[0-9A-Fa-f]{4}' + '|\\\\x[0-9A-Fa-f]{2}' + '|\\\\[0-3][0-7]{0,2}' + '|\\\\[0-7]{1,2}' + '|\\\\[\\s\\S]' + '|-' + '|[^-\\\\]', 'g')); var groups = []; var ranges = []; var inverse = charsetParts[0] === '^'; for (var i = inverse ? 1 : 0, n = charsetParts.length; i < n; ++i) { var p = charsetParts[i]; switch (p) { case '\\B': case '\\b': case '\\D': case '\\d': case '\\S': case '\\s': case '\\W': case '\\w': groups.push(p); continue; } var start = decodeEscape(p); var end; if (i + 2 < n && '-' === charsetParts[i + 1]) { end = decodeEscape(charsetParts[i + 2]); i += 2; } else { end = start; } ranges.push([start, end]); // If the range might intersect letters, then expand it. if (!(end < 65 || start > 122)) { if (!(end < 65 || start > 90)) { ranges.push([Math.max(65, start) | 32, Math.min(end, 90) | 32]); } if (!(end < 97 || start > 122)) { ranges.push([Math.max(97, start) & ~32, Math.min(end, 122) & ~32]); } } } // [[1, 10], [3, 4], [8, 12], [14, 14], [16, 16], [17, 17]] // -> [[1, 12], [14, 14], [16, 17]] ranges.sort(function (a, b) { return (a[0] - b[0]) || (b[1] - a[1]); }); var consolidatedRanges = []; var lastRange = [NaN, NaN]; for (var i = 0; i < ranges.length; ++i) { var range = ranges[i]; if (range[0] <= lastRange[1] + 1) { lastRange[1] = Math.max(lastRange[1], range[1]); } else { consolidatedRanges.push(lastRange = range); } } var out = ['[']; if (inverse) { out.push('^'); } out.push.apply(out, groups); for (var i = 0; i < consolidatedRanges.length; ++i) { var range = consolidatedRanges[i]; out.push(encodeEscape(range[0])); if (range[1] > range[0]) { if (range[1] + 1 > range[0]) { out.push('-'); } out.push(encodeEscape(range[1])); } } out.push(']'); return out.join(''); } function allowAnywhereFoldCaseAndRenumberGroups(regex) { // Split into character sets, escape sequences, punctuation strings // like ('(', '(?:', ')', '^'), and runs of characters that do not // include any of the above. var parts = regex.source.match( new RegExp( '(?:' + '\\[(?:[^\\x5C\\x5D]|\\\\[\\s\\S])*\\]' // a character set + '|\\\\u[A-Fa-f0-9]{4}' // a unicode escape + '|\\\\x[A-Fa-f0-9]{2}' // a hex escape + '|\\\\[0-9]+' // a back-reference or octal escape + '|\\\\[^ux0-9]' // other escape sequence + '|\\(\\?[:!=]' // start of a non-capturing group + '|[\\(\\)\\^]' // start/emd of a group, or line start + '|[^\\x5B\\x5C\\(\\)\\^]+' // run of other characters + ')', 'g')); var n = parts.length; // Maps captured group numbers to the number they will occupy in // the output or to -1 if that has not been determined, or to // undefined if they need not be capturing in the output. var capturedGroups = []; // Walk over and identify back references to build the capturedGroups // mapping. for (var i = 0, groupIndex = 0; i < n; ++i) { var p = parts[i]; if (p === '(') { // groups are 1-indexed, so max group index is count of '(' ++groupIndex; } else if ('\\' === p.charAt(0)) { var decimalValue = +p.substring(1); if (decimalValue && decimalValue <= groupIndex) { capturedGroups[decimalValue] = -1; } } } // Renumber groups and reduce capturing groups to non-capturing groups // where possible. for (var i = 1; i < capturedGroups.length; ++i) { if (-1 === capturedGroups[i]) { capturedGroups[i] = ++capturedGroupIndex; } } for (var i = 0, groupIndex = 0; i < n; ++i) { var p = parts[i]; if (p === '(') { ++groupIndex; if (capturedGroups[groupIndex] === undefined) { parts[i] = '(?:'; } } else if ('\\' === p.charAt(0)) { var decimalValue = +p.substring(1); if (decimalValue && decimalValue <= groupIndex) { parts[i] = '\\' + capturedGroups[groupIndex]; } } } // Remove any prefix anchors so that the output will match anywhere. // ^^ really does mean an anchored match though. for (var i = 0, groupIndex = 0; i < n; ++i) { if ('^' === parts[i] && '^' !== parts[i + 1]) { parts[i] = ''; } } // Expand letters to groupts to handle mixing of case-sensitive and // case-insensitive patterns if necessary. if (regex.ignoreCase && needToFoldCase) { for (var i = 0; i < n; ++i) { var p = parts[i]; var ch0 = p.charAt(0); if (p.length >= 2 && ch0 === '[') { parts[i] = caseFoldCharset(p); } else if (ch0 !== '\\') { // TODO: handle letters in numeric escapes. parts[i] = p.replace( /[a-zA-Z]/g, function (ch) { var cc = ch.charCodeAt(0); return '[' + String.fromCharCode(cc & ~32, cc | 32) + ']'; }); } } } return parts.join(''); } var rewritten = []; for (var i = 0, n = regexs.length; i < n; ++i) { var regex = regexs[i]; if (regex.global || regex.multiline) { throw new Error('' + regex); } rewritten.push( '(?:' + allowAnywhereFoldCaseAndRenumberGroups(regex) + ')'); } return new RegExp(rewritten.join('|'), ignoreCase ? 'gi' : 'g'); } var PR_innerHtmlWorks = null; function getInnerHtml(node) { // inner html is hopelessly broken in Safari 2.0.4 when the content is // an html description of well formed XML and the containing tag is a PRE // tag, so we detect that case and emulate innerHTML. if (null === PR_innerHtmlWorks) { var testNode = document.createElement('PRE'); testNode.appendChild( document.createTextNode('\n')); PR_innerHtmlWorks = !/)[\r\n]+/g, '$1') .replace(/(?:[\r\n]+[ \t]*)+/g, ' '); } return content; } var out = []; for (var child = node.firstChild; child; child = child.nextSibling) { normalizedHtml(child, out); } return out.join(''); } /** returns a function that expand tabs to spaces. This function can be fed * successive chunks of text, and will maintain its own internal state to * keep track of how tabs are expanded. * @return {function (string) : string} a function that takes * plain text and return the text with tabs expanded. * @private */ function makeTabExpander(tabWidth) { var SPACES = ' '; var charInLine = 0; return function (plainText) { // walk over each character looking for tabs and newlines. // On tabs, expand them. On newlines, reset charInLine. // Otherwise increment charInLine var out = null; var pos = 0; for (var i = 0, n = plainText.length; i < n; ++i) { var ch = plainText.charAt(i); switch (ch) { case '\t': if (!out) { out = []; } out.push(plainText.substring(pos, i)); // calculate how much space we need in front of this part // nSpaces is the amount of padding -- the number of spaces needed // to move us to the next column, where columns occur at factors of // tabWidth. var nSpaces = tabWidth - (charInLine % tabWidth); charInLine += nSpaces; for (; nSpaces >= 0; nSpaces -= SPACES.length) { out.push(SPACES.substring(0, nSpaces)); } pos = i + 1; break; case '\n': charInLine = 0; break; default: ++charInLine; } } if (!out) { return plainText; } out.push(plainText.substring(pos)); return out.join(''); }; } var pr_chunkPattern = new RegExp( '[^<]+' // A run of characters other than '<' + '|<\!--[\\s\\S]*?--\>' // an HTML comment + '|' // a CDATA section // a probable tag that should not be highlighted + '|<\/?[a-zA-Z](?:[^>\"\']|\'[^\']*\'|\"[^\"]*\")*>' + '|<', // A '<' that does not begin a larger chunk 'g'); var pr_commentPrefix = /^<\!--/; var pr_cdataPrefix = /^) into their textual equivalent. * * @param {string} s html where whitespace is considered significant. * @return {Object} source code and extracted tags. * @private */ function extractTags(s) { // since the pattern has the 'g' modifier and defines no capturing groups, // this will return a list of all chunks which we then classify and wrap as // PR_Tokens var matches = s.match(pr_chunkPattern); var sourceBuf = []; var sourceBufLen = 0; var extractedTags = []; if (matches) { for (var i = 0, n = matches.length; i < n; ++i) { var match = matches[i]; if (match.length > 1 && match.charAt(0) === '<') { if (pr_commentPrefix.test(match)) { continue; } if (pr_cdataPrefix.test(match)) { // strip CDATA prefix and suffix. Don't unescape since it's CDATA sourceBuf.push(match.substring(9, match.length - 3)); sourceBufLen += match.length - 12; } else if (pr_brPrefix.test(match)) { // tags are lexically significant so convert them to text. // This is undone later. sourceBuf.push('\n'); ++sourceBufLen; } else { if (match.indexOf(PR_NOCODE) >= 0 && isNoCodeTag(match)) { // A will start a section that should be // ignored. Continue walking the list until we see a matching end // tag. var name = match.match(pr_tagNameRe)[2]; var depth = 1; var j; end_tag_loop: for (j = i + 1; j < n; ++j) { var name2 = matches[j].match(pr_tagNameRe); if (name2 && name2[2] === name) { if (name2[1] === '/') { if (--depth === 0) { break end_tag_loop; } } else { ++depth; } } } if (j < n) { extractedTags.push( sourceBufLen, matches.slice(i, j + 1).join('')); i = j; } else { // Ignore unclosed sections. extractedTags.push(sourceBufLen, match); } } else { extractedTags.push(sourceBufLen, match); } } } else { var literalText = htmlToText(match); sourceBuf.push(literalText); sourceBufLen += literalText.length; } } } return { source: sourceBuf.join(''), tags: extractedTags }; } /** True if the given tag contains a class attribute with the nocode class. */ function isNoCodeTag(tag) { return !!tag // First canonicalize the representation of attributes .replace(/\s(\w+)\s*=\s*(?:\"([^\"]*)\"|'([^\']*)'|(\S+))/g, ' $1="$2$3$4"') // Then look for the attribute we want. .match(/[cC][lL][aA][sS][sS]=\"[^\"]*\bnocode\b/); } /** * Apply the given language handler to sourceCode and add the resulting * decorations to out. * @param {number} basePos the index of sourceCode within the chunk of source * whose decorations are already present on out. */ function appendDecorations(basePos, sourceCode, langHandler, out) { if (!sourceCode) { return; } var job = { source: sourceCode, basePos: basePos }; langHandler(job); out.push.apply(out, job.decorations); } /** Given triples of [style, pattern, context] returns a lexing function, * The lexing function interprets the patterns to find token boundaries and * returns a decoration list of the form * [index_0, style_0, index_1, style_1, ..., index_n, style_n] * where index_n is an index into the sourceCode, and style_n is a style * constant like PR_PLAIN. index_n-1 <= index_n, and style_n-1 applies to * all characters in sourceCode[index_n-1:index_n]. * * The stylePatterns is a list whose elements have the form * [style : string, pattern : RegExp, DEPRECATED, shortcut : string]. * * Style is a style constant like PR_PLAIN, or can be a string of the * form 'lang-FOO', where FOO is a language extension describing the * language of the portion of the token in $1 after pattern executes. * E.g., if style is 'lang-lisp', and group 1 contains the text * '(hello (world))', then that portion of the token will be passed to the * registered lisp handler for formatting. * The text before and after group 1 will be restyled using this decorator * so decorators should take care that this doesn't result in infinite * recursion. For example, the HTML lexer rule for SCRIPT elements looks * something like ['lang-js', /<[s]cript>(.+?)<\/script>/]. This may match * ' 登录后可以享受更多权益 您还没有登录,登录后您可以: 收藏Android系统代码 收藏喜欢的文章 多个平台共享账号 去登录 首次使用?从这里 注册
} element to specify the * language, as in {@code }. Any class that * starts with "lang-" followed by a file extension, specifies the file type. * See the "lang-*.js" files in this directory for code that implements * per-language file handlers. * * Change log: * cbeust, 2006/08/22 * * Java annotations (start with "@") are now captured as literals ("lit") * * @requires console */ // JSLint declarations /*global console, document, navigator, setTimeout, window */ /** * Split {@code prettyPrint} into multiple timeouts so as not to interfere with * UI events. * If set to {@code false}, {@code prettyPrint()} is synchronous. */ window['PR_SHOULD_USE_CONTINUATION'] = true; /** the number of characters between tab columns */ window['PR_TAB_WIDTH'] = 8; /** Walks the DOM returning a properly escaped version of innerHTML. * @param {Node} node * @param {Array.} out output buffer that receives chunks of HTML. */ window['PR_normalizedHtml'] /** Contains functions for creating and registering new language handlers. * @type {Object} */ = window['PR'] /** Pretty print a chunk of code. * * @param {string} sourceCodeHtml code as html * @return {string} code as html, but prettier */ = window['prettyPrintOne'] /** Find all the {@code } and {@code } tags in the DOM with * {@code class=prettyprint} and prettify them. * @param {Function?} opt_whenDone if specified, called when the last entry * has been finished. */ = window['prettyPrint'] = void 0; /** browser detection. @extern @returns false if not IE, otherwise the major version. */ window['_pr_isIE6'] = function () { var ieVersion = navigator && navigator.userAgent && navigator.userAgent.match(/\bMSIE ([678])\./); ieVersion = ieVersion ? +ieVersion[1] : false; window['_pr_isIE6'] = function () { return ieVersion; }; return ieVersion; }; (function () { // Keyword lists for various languages. var FLOW_CONTROL_KEYWORDS = "break continue do else for if return while "; var C_KEYWORDS = FLOW_CONTROL_KEYWORDS + "auto case char const default " + "double enum extern float goto int long register short signed sizeof " + "static struct switch typedef union unsigned void volatile "; var COMMON_KEYWORDS = C_KEYWORDS + "catch class delete false import " + "new operator private protected public this throw true try typeof "; var CPP_KEYWORDS = COMMON_KEYWORDS + "alignof align_union asm axiom bool " + "concept concept_map const_cast constexpr decltype " + "dynamic_cast explicit export friend inline late_check " + "mutable namespace nullptr reinterpret_cast static_assert static_cast " + "template typeid typename using virtual wchar_t where "; var JAVA_KEYWORDS = COMMON_KEYWORDS + "abstract boolean byte extends final finally implements import " + "instanceof null native package strictfp super synchronized throws " + "transient "; var CSHARP_KEYWORDS = JAVA_KEYWORDS + "as base by checked decimal delegate descending dynamic event " + "fixed foreach from group implicit in interface internal into is lock " + "object out override orderby params partial readonly ref sbyte sealed " + "stackalloc string select uint ulong unchecked unsafe ushort var "; var COFFEE_KEYWORDS = "all and by catch class else extends false finally " + "for if in is isnt loop new no not null of off on or return super then " + "true try unless until when while yes "; var JSCRIPT_KEYWORDS = COMMON_KEYWORDS + "debugger eval export function get null set undefined var with " + "Infinity NaN "; var PERL_KEYWORDS = "caller delete die do dump elsif eval exit foreach for " + "goto if import last local my next no our print package redo require " + "sub undef unless until use wantarray while BEGIN END "; var PYTHON_KEYWORDS = FLOW_CONTROL_KEYWORDS + "and as assert class def del " + "elif except exec finally from global import in is lambda " + "nonlocal not or pass print raise try with yield " + "False True None "; var RUBY_KEYWORDS = FLOW_CONTROL_KEYWORDS + "alias and begin case class def" + " defined elsif end ensure false in module next nil not or redo rescue " + "retry self super then true undef unless until when yield BEGIN END "; var SH_KEYWORDS = FLOW_CONTROL_KEYWORDS + "case done elif esac eval fi " + "function in local set then until "; var ALL_KEYWORDS = ( CPP_KEYWORDS + CSHARP_KEYWORDS + JSCRIPT_KEYWORDS + PERL_KEYWORDS + PYTHON_KEYWORDS + RUBY_KEYWORDS + SH_KEYWORDS); // token style names. correspond to css classes /** token style for a string literal */ var PR_STRING = 'str'; /** token style for a keyword */ var PR_KEYWORD = 'kwd'; /** token style for a comment */ var PR_COMMENT = 'com'; /** token style for a type */ var PR_TYPE = 'typ'; /** token style for a literal value. e.g. 1, null, true. */ var PR_LITERAL = 'lit'; /** token style for a punctuation string. */ var PR_PUNCTUATION = 'pun'; /** token style for a punctuation string. */ var PR_PLAIN = 'pln'; /** token style for an sgml tag. */ var PR_TAG = 'tag'; /** token style for a markup declaration such as a DOCTYPE. */ var PR_DECLARATION = 'dec'; /** token style for embedded source. */ var PR_SOURCE = 'src'; /** token style for an sgml attribute name. */ var PR_ATTRIB_NAME = 'atn'; /** token style for an sgml attribute value. */ var PR_ATTRIB_VALUE = 'atv'; /** * A class that indicates a section of markup that is not code, e.g. to allow * embedding of line numbers within code listings. */ var PR_NOCODE = 'nocode'; /** A set of tokens that can precede a regular expression literal in * javascript. * http://www.mozilla.org/js/language/js20/rationale/syntax.html has the full * list, but I've removed ones that might be problematic when seen in * languages that don't support regular expression literals. * * Specifically, I've removed any keywords that can't precede a regexp * literal in a syntactically legal javascript program, and I've removed the * "in" keyword since it's not a keyword in many languages, and might be used * as a count of inches. * * The link a above does not accurately describe EcmaScript rules since * it fails to distinguish between (a=++/b/i) and (a++/b/i) but it works * very well in practice. * * @private */ var REGEXP_PRECEDER_PATTERN = function () { var preceders = [ "!", "!=", "!==", "#", "%", "%=", "&", "&&", "&&=", "&=", "(", "*", "*=", /* "+", */ "+=", ",", /* "-", */ "-=", "->", /*".", "..", "...", handled below */ "/", "/=", ":", "::", ";", "<", "<<", "<<=", "<=", "=", "==", "===", ">", ">=", ">>", ">>=", ">>>", ">>>=", "?", "@", "[", "^", "^=", "^^", "^^=", "{", "|", "|=", "||", "||=", "~" /* handles =~ and !~ */, "break", "case", "continue", "delete", "do", "else", "finally", "instanceof", "return", "throw", "try", "typeof" ]; var pattern = '(?:^^|[+-]'; for (var i = 0; i < preceders.length; ++i) { pattern += '|' + preceders[i].replace(/([^=<>:&a-z])/g, '\\$1'); } pattern += ')\\s*'; // matches at end, and matches empty string return pattern; // CAVEAT: this does not properly handle the case where a regular // expression immediately follows another since a regular expression may // have flags for case-sensitivity and the like. Having regexp tokens // adjacent is not valid in any language I'm aware of, so I'm punting. // TODO: maybe style special characters inside a regexp as punctuation. }(); // Define regexps here so that the interpreter doesn't have to create an // object each time the function containing them is called. // The language spec requires a new object created even if you don't access // the $1 members. var pr_amp = /&/g; var pr_lt = //g; var pr_quot = /\"/g; /** like textToHtml but escapes double quotes to be attribute safe. */ function attribToHtml(str) { return str.replace(pr_amp, '&') .replace(pr_lt, '<') .replace(pr_gt, '>') .replace(pr_quot, '"'); } /** escapest html special characters to html. */ function textToHtml(str) { return str.replace(pr_amp, '&') .replace(pr_lt, '<') .replace(pr_gt, '>'); } var pr_ltEnt = /</g; var pr_gtEnt = />/g; var pr_aposEnt = /'/g; var pr_quotEnt = /"/g; var pr_ampEnt = /&/g; var pr_nbspEnt = / /g; /** unescapes html to plain text. */ function htmlToText(html) { var pos = html.indexOf('&'); if (pos < 0) { return html; } // Handle numeric entities specially. We can't use functional substitution // since that doesn't work in older versions of Safari. // These should be rare since most browsers convert them to normal chars. for (--pos; (pos = html.indexOf('', pos + 1)) >= 0;) { var end = html.indexOf(';', pos); if (end >= 0) { var num = html.substring(pos + 3, end); var radix = 10; if (num && num.charAt(0) === 'x') { num = num.substring(1); radix = 16; } var codePoint = parseInt(num, radix); if (!isNaN(codePoint)) { html = (html.substring(0, pos) + String.fromCharCode(codePoint) + html.substring(end + 1)); } } } return html.replace(pr_ltEnt, '<') .replace(pr_gtEnt, '>') .replace(pr_aposEnt, "'") .replace(pr_quotEnt, '"') .replace(pr_nbspEnt, ' ') .replace(pr_ampEnt, '&'); } /** is the given node's innerHTML normally unescaped? */ function isRawContent(node) { return 'XMP' === node.tagName; } var newlineRe = /[\r\n]/g; /** * Are newlines and adjacent spaces significant in the given node's innerHTML? */ function isPreformatted(node, content) { // PRE means preformatted, and is a very common case, so don't create // unnecessary computed style objects. if ('PRE' === node.tagName) { return true; } if (!newlineRe.test(content)) { return true; } // Don't care var whitespace = ''; // For disconnected nodes, IE has no currentStyle. if (node.currentStyle) { whitespace = node.currentStyle.whiteSpace; } else if (window.getComputedStyle) { // Firefox makes a best guess if node is disconnected whereas Safari // returns the empty string. whitespace = window.getComputedStyle(node, null).whiteSpace; } return !whitespace || whitespace === 'pre'; } function normalizedHtml(node, out, opt_sortAttrs) { switch (node.nodeType) { case 1: // an element var name = node.tagName.toLowerCase(); out.push('<', name); var attrs = node.attributes; var n = attrs.length; if (n) { if (opt_sortAttrs) { var sortedAttrs = []; for (var i = n; --i >= 0;) { sortedAttrs[i] = attrs[i]; } sortedAttrs.sort(function (a, b) { return (a.name < b.name) ? -1 : a.name === b.name ? 0 : 1; }); attrs = sortedAttrs; } for (var i = 0; i < n; ++i) { var attr = attrs[i]; if (!attr.specified) { continue; } out.push(' ', attr.name.toLowerCase(), '="', attribToHtml(attr.value), '"'); } } out.push('>'); for (var child = node.firstChild; child; child = child.nextSibling) { normalizedHtml(child, out, opt_sortAttrs); } if (node.firstChild || !/^(?:br|link|img)$/.test(name)) { out.push('<\/', name, '>'); } break; case 3: case 4: // text out.push(textToHtml(node.nodeValue)); break; } } /** * Given a group of {@link RegExp}s, returns a {@code RegExp} that globally * matches the union o the sets o strings matched d by the input RegExp. * Since it matches globally, if the input strings have a start-of-input * anchor (/^.../), it is ignored for the purposes of unioning. * @param {Array.} regexs non multiline, non-global regexs. * @return {RegExp} a global regex. */ function combinePrefixPatterns(regexs) { var capturedGroupIndex = 0; var needToFoldCase = false; var ignoreCase = false; for (var i = 0, n = regexs.length; i < n; ++i) { var regex = regexs[i]; if (regex.ignoreCase) { ignoreCase = true; } else if (/[a-z]/i.test(regex.source.replace( /\\u[0-9a-f]{4}|\\x[0-9a-f]{2}|\\[^ux]/gi, ''))) { needToFoldCase = true; ignoreCase = false; break; } } function decodeEscape(charsetPart) { if (charsetPart.charAt(0) !== '\\') { return charsetPart.charCodeAt(0); } switch (charsetPart.charAt(1)) { case 'b': return 8; case 't': return 9; case 'n': return 0xa; case 'v': return 0xb; case 'f': return 0xc; case 'r': return 0xd; case 'u': case 'x': return parseInt(charsetPart.substring(2), 16) || charsetPart.charCodeAt(1); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': return parseInt(charsetPart.substring(1), 8); default: return charsetPart.charCodeAt(1); } } function encodeEscape(charCode) { if (charCode < 0x20) { return (charCode < 0x10 ? '\\x0' : '\\x') + charCode.toString(16); } var ch = String.fromCharCode(charCode); if (ch === '\\' || ch === '-' || ch === '[' || ch === ']') { ch = '\\' + ch; } return ch; } function caseFoldCharset(charSet) { var charsetParts = charSet.substring(1, charSet.length - 1).match( new RegExp( '\\\\u[0-9A-Fa-f]{4}' + '|\\\\x[0-9A-Fa-f]{2}' + '|\\\\[0-3][0-7]{0,2}' + '|\\\\[0-7]{1,2}' + '|\\\\[\\s\\S]' + '|-' + '|[^-\\\\]', 'g')); var groups = []; var ranges = []; var inverse = charsetParts[0] === '^'; for (var i = inverse ? 1 : 0, n = charsetParts.length; i < n; ++i) { var p = charsetParts[i]; switch (p) { case '\\B': case '\\b': case '\\D': case '\\d': case '\\S': case '\\s': case '\\W': case '\\w': groups.push(p); continue; } var start = decodeEscape(p); var end; if (i + 2 < n && '-' === charsetParts[i + 1]) { end = decodeEscape(charsetParts[i + 2]); i += 2; } else { end = start; } ranges.push([start, end]); // If the range might intersect letters, then expand it. if (!(end < 65 || start > 122)) { if (!(end < 65 || start > 90)) { ranges.push([Math.max(65, start) | 32, Math.min(end, 90) | 32]); } if (!(end < 97 || start > 122)) { ranges.push([Math.max(97, start) & ~32, Math.min(end, 122) & ~32]); } } } // [[1, 10], [3, 4], [8, 12], [14, 14], [16, 16], [17, 17]] // -> [[1, 12], [14, 14], [16, 17]] ranges.sort(function (a, b) { return (a[0] - b[0]) || (b[1] - a[1]); }); var consolidatedRanges = []; var lastRange = [NaN, NaN]; for (var i = 0; i < ranges.length; ++i) { var range = ranges[i]; if (range[0] <= lastRange[1] + 1) { lastRange[1] = Math.max(lastRange[1], range[1]); } else { consolidatedRanges.push(lastRange = range); } } var out = ['[']; if (inverse) { out.push('^'); } out.push.apply(out, groups); for (var i = 0; i < consolidatedRanges.length; ++i) { var range = consolidatedRanges[i]; out.push(encodeEscape(range[0])); if (range[1] > range[0]) { if (range[1] + 1 > range[0]) { out.push('-'); } out.push(encodeEscape(range[1])); } } out.push(']'); return out.join(''); } function allowAnywhereFoldCaseAndRenumberGroups(regex) { // Split into character sets, escape sequences, punctuation strings // like ('(', '(?:', ')', '^'), and runs of characters that do not // include any of the above. var parts = regex.source.match( new RegExp( '(?:' + '\\[(?:[^\\x5C\\x5D]|\\\\[\\s\\S])*\\]' // a character set + '|\\\\u[A-Fa-f0-9]{4}' // a unicode escape + '|\\\\x[A-Fa-f0-9]{2}' // a hex escape + '|\\\\[0-9]+' // a back-reference or octal escape + '|\\\\[^ux0-9]' // other escape sequence + '|\\(\\?[:!=]' // start of a non-capturing group + '|[\\(\\)\\^]' // start/emd of a group, or line start + '|[^\\x5B\\x5C\\(\\)\\^]+' // run of other characters + ')', 'g')); var n = parts.length; // Maps captured group numbers to the number they will occupy in // the output or to -1 if that has not been determined, or to // undefined if they need not be capturing in the output. var capturedGroups = []; // Walk over and identify back references to build the capturedGroups // mapping. for (var i = 0, groupIndex = 0; i < n; ++i) { var p = parts[i]; if (p === '(') { // groups are 1-indexed, so max group index is count of '(' ++groupIndex; } else if ('\\' === p.charAt(0)) { var decimalValue = +p.substring(1); if (decimalValue && decimalValue <= groupIndex) { capturedGroups[decimalValue] = -1; } } } // Renumber groups and reduce capturing groups to non-capturing groups // where possible. for (var i = 1; i < capturedGroups.length; ++i) { if (-1 === capturedGroups[i]) { capturedGroups[i] = ++capturedGroupIndex; } } for (var i = 0, groupIndex = 0; i < n; ++i) { var p = parts[i]; if (p === '(') { ++groupIndex; if (capturedGroups[groupIndex] === undefined) { parts[i] = '(?:'; } } else if ('\\' === p.charAt(0)) { var decimalValue = +p.substring(1); if (decimalValue && decimalValue <= groupIndex) { parts[i] = '\\' + capturedGroups[groupIndex]; } } } // Remove any prefix anchors so that the output will match anywhere. // ^^ really does mean an anchored match though. for (var i = 0, groupIndex = 0; i < n; ++i) { if ('^' === parts[i] && '^' !== parts[i + 1]) { parts[i] = ''; } } // Expand letters to groupts to handle mixing of case-sensitive and // case-insensitive patterns if necessary. if (regex.ignoreCase && needToFoldCase) { for (var i = 0; i < n; ++i) { var p = parts[i]; var ch0 = p.charAt(0); if (p.length >= 2 && ch0 === '[') { parts[i] = caseFoldCharset(p); } else if (ch0 !== '\\') { // TODO: handle letters in numeric escapes. parts[i] = p.replace( /[a-zA-Z]/g, function (ch) { var cc = ch.charCodeAt(0); return '[' + String.fromCharCode(cc & ~32, cc | 32) + ']'; }); } } } return parts.join(''); } var rewritten = []; for (var i = 0, n = regexs.length; i < n; ++i) { var regex = regexs[i]; if (regex.global || regex.multiline) { throw new Error('' + regex); } rewritten.push( '(?:' + allowAnywhereFoldCaseAndRenumberGroups(regex) + ')'); } return new RegExp(rewritten.join('|'), ignoreCase ? 'gi' : 'g'); } var PR_innerHtmlWorks = null; function getInnerHtml(node) { // inner html is hopelessly broken in Safari 2.0.4 when the content is // an html description of well formed XML and the containing tag is a PRE // tag, so we detect that case and emulate innerHTML. if (null === PR_innerHtmlWorks) { var testNode = document.createElement('PRE'); testNode.appendChild( document.createTextNode('\n')); PR_innerHtmlWorks = !/)[\r\n]+/g, '$1') .replace(/(?:[\r\n]+[ \t]*)+/g, ' '); } return content; } var out = []; for (var child = node.firstChild; child; child = child.nextSibling) { normalizedHtml(child, out); } return out.join(''); } /** returns a function that expand tabs to spaces. This function can be fed * successive chunks of text, and will maintain its own internal state to * keep track of how tabs are expanded. * @return {function (string) : string} a function that takes * plain text and return the text with tabs expanded. * @private */ function makeTabExpander(tabWidth) { var SPACES = ' '; var charInLine = 0; return function (plainText) { // walk over each character looking for tabs and newlines. // On tabs, expand them. On newlines, reset charInLine. // Otherwise increment charInLine var out = null; var pos = 0; for (var i = 0, n = plainText.length; i < n; ++i) { var ch = plainText.charAt(i); switch (ch) { case '\t': if (!out) { out = []; } out.push(plainText.substring(pos, i)); // calculate how much space we need in front of this part // nSpaces is the amount of padding -- the number of spaces needed // to move us to the next column, where columns occur at factors of // tabWidth. var nSpaces = tabWidth - (charInLine % tabWidth); charInLine += nSpaces; for (; nSpaces >= 0; nSpaces -= SPACES.length) { out.push(SPACES.substring(0, nSpaces)); } pos = i + 1; break; case '\n': charInLine = 0; break; default: ++charInLine; } } if (!out) { return plainText; } out.push(plainText.substring(pos)); return out.join(''); }; } var pr_chunkPattern = new RegExp( '[^<]+' // A run of characters other than '<' + '|<\!--[\\s\\S]*?--\>' // an HTML comment + '|' // a CDATA section // a probable tag that should not be highlighted + '|<\/?[a-zA-Z](?:[^>\"\']|\'[^\']*\'|\"[^\"]*\")*>' + '|<', // A '<' that does not begin a larger chunk 'g'); var pr_commentPrefix = /^<\!--/; var pr_cdataPrefix = /^) into their textual equivalent. * * @param {string} s html where whitespace is considered significant. * @return {Object} source code and extracted tags. * @private */ function extractTags(s) { // since the pattern has the 'g' modifier and defines no capturing groups, // this will return a list of all chunks which we then classify and wrap as // PR_Tokens var matches = s.match(pr_chunkPattern); var sourceBuf = []; var sourceBufLen = 0; var extractedTags = []; if (matches) { for (var i = 0, n = matches.length; i < n; ++i) { var match = matches[i]; if (match.length > 1 && match.charAt(0) === '<') { if (pr_commentPrefix.test(match)) { continue; } if (pr_cdataPrefix.test(match)) { // strip CDATA prefix and suffix. Don't unescape since it's CDATA sourceBuf.push(match.substring(9, match.length - 3)); sourceBufLen += match.length - 12; } else if (pr_brPrefix.test(match)) { // tags are lexically significant so convert them to text. // This is undone later. sourceBuf.push('\n'); ++sourceBufLen; } else { if (match.indexOf(PR_NOCODE) >= 0 && isNoCodeTag(match)) { // A will start a section that should be // ignored. Continue walking the list until we see a matching end // tag. var name = match.match(pr_tagNameRe)[2]; var depth = 1; var j; end_tag_loop: for (j = i + 1; j < n; ++j) { var name2 = matches[j].match(pr_tagNameRe); if (name2 && name2[2] === name) { if (name2[1] === '/') { if (--depth === 0) { break end_tag_loop; } } else { ++depth; } } } if (j < n) { extractedTags.push( sourceBufLen, matches.slice(i, j + 1).join('')); i = j; } else { // Ignore unclosed sections. extractedTags.push(sourceBufLen, match); } } else { extractedTags.push(sourceBufLen, match); } } } else { var literalText = htmlToText(match); sourceBuf.push(literalText); sourceBufLen += literalText.length; } } } return { source: sourceBuf.join(''), tags: extractedTags }; } /** True if the given tag contains a class attribute with the nocode class. */ function isNoCodeTag(tag) { return !!tag // First canonicalize the representation of attributes .replace(/\s(\w+)\s*=\s*(?:\"([^\"]*)\"|'([^\']*)'|(\S+))/g, ' $1="$2$3$4"') // Then look for the attribute we want. .match(/[cC][lL][aA][sS][sS]=\"[^\"]*\bnocode\b/); } /** * Apply the given language handler to sourceCode and add the resulting * decorations to out. * @param {number} basePos the index of sourceCode within the chunk of source * whose decorations are already present on out. */ function appendDecorations(basePos, sourceCode, langHandler, out) { if (!sourceCode) { return; } var job = { source: sourceCode, basePos: basePos }; langHandler(job); out.push.apply(out, job.decorations); } /** Given triples of [style, pattern, context] returns a lexing function, * The lexing function interprets the patterns to find token boundaries and * returns a decoration list of the form * [index_0, style_0, index_1, style_1, ..., index_n, style_n] * where index_n is an index into the sourceCode, and style_n is a style * constant like PR_PLAIN. index_n-1 <= index_n, and style_n-1 applies to * all characters in sourceCode[index_n-1:index_n]. * * The stylePatterns is a list whose elements have the form * [style : string, pattern : RegExp, DEPRECATED, shortcut : string]. * * Style is a style constant like PR_PLAIN, or can be a string of the * form 'lang-FOO', where FOO is a language extension describing the * language of the portion of the token in $1 after pattern executes. * E.g., if style is 'lang-lisp', and group 1 contains the text * '(hello (world))', then that portion of the token will be passed to the * registered lisp handler for formatting. * The text before and after group 1 will be restyled using this decorator * so decorators should take care that this doesn't result in infinite * recursion. For example, the HTML lexer rule for SCRIPT elements looks * something like ['lang-js', /<[s]cript>(.+?)<\/script>/]. This may match * ' 登录后可以享受更多权益 您还没有登录,登录后您可以: 收藏Android系统代码 收藏喜欢的文章 多个平台共享账号 去登录 首次使用?从这里 注册
}. Any class that * starts with "lang-" followed by a file extension, specifies the file type. * See the "lang-*.js" files in this directory for code that implements * per-language file handlers. * * Change log: * cbeust, 2006/08/22 * * Java annotations (start with "@") are now captured as literals ("lit") * * @requires console */ // JSLint declarations /*global console, document, navigator, setTimeout, window */ /** * Split {@code prettyPrint} into multiple timeouts so as not to interfere with * UI events. * If set to {@code false}, {@code prettyPrint()} is synchronous. */ window['PR_SHOULD_USE_CONTINUATION'] = true; /** the number of characters between tab columns */ window['PR_TAB_WIDTH'] = 8; /** Walks the DOM returning a properly escaped version of innerHTML. * @param {Node} node * @param {Array.} out output buffer that receives chunks of HTML. */ window['PR_normalizedHtml'] /** Contains functions for creating and registering new language handlers. * @type {Object} */ = window['PR'] /** Pretty print a chunk of code. * * @param {string} sourceCodeHtml code as html * @return {string} code as html, but prettier */ = window['prettyPrintOne'] /** Find all the {@code } and {@code } tags in the DOM with * {@code class=prettyprint} and prettify them. * @param {Function?} opt_whenDone if specified, called when the last entry * has been finished. */ = window['prettyPrint'] = void 0; /** browser detection. @extern @returns false if not IE, otherwise the major version. */ window['_pr_isIE6'] = function () { var ieVersion = navigator && navigator.userAgent && navigator.userAgent.match(/\bMSIE ([678])\./); ieVersion = ieVersion ? +ieVersion[1] : false; window['_pr_isIE6'] = function () { return ieVersion; }; return ieVersion; }; (function () { // Keyword lists for various languages. var FLOW_CONTROL_KEYWORDS = "break continue do else for if return while "; var C_KEYWORDS = FLOW_CONTROL_KEYWORDS + "auto case char const default " + "double enum extern float goto int long register short signed sizeof " + "static struct switch typedef union unsigned void volatile "; var COMMON_KEYWORDS = C_KEYWORDS + "catch class delete false import " + "new operator private protected public this throw true try typeof "; var CPP_KEYWORDS = COMMON_KEYWORDS + "alignof align_union asm axiom bool " + "concept concept_map const_cast constexpr decltype " + "dynamic_cast explicit export friend inline late_check " + "mutable namespace nullptr reinterpret_cast static_assert static_cast " + "template typeid typename using virtual wchar_t where "; var JAVA_KEYWORDS = COMMON_KEYWORDS + "abstract boolean byte extends final finally implements import " + "instanceof null native package strictfp super synchronized throws " + "transient "; var CSHARP_KEYWORDS = JAVA_KEYWORDS + "as base by checked decimal delegate descending dynamic event " + "fixed foreach from group implicit in interface internal into is lock " + "object out override orderby params partial readonly ref sbyte sealed " + "stackalloc string select uint ulong unchecked unsafe ushort var "; var COFFEE_KEYWORDS = "all and by catch class else extends false finally " + "for if in is isnt loop new no not null of off on or return super then " + "true try unless until when while yes "; var JSCRIPT_KEYWORDS = COMMON_KEYWORDS + "debugger eval export function get null set undefined var with " + "Infinity NaN "; var PERL_KEYWORDS = "caller delete die do dump elsif eval exit foreach for " + "goto if import last local my next no our print package redo require " + "sub undef unless until use wantarray while BEGIN END "; var PYTHON_KEYWORDS = FLOW_CONTROL_KEYWORDS + "and as assert class def del " + "elif except exec finally from global import in is lambda " + "nonlocal not or pass print raise try with yield " + "False True None "; var RUBY_KEYWORDS = FLOW_CONTROL_KEYWORDS + "alias and begin case class def" + " defined elsif end ensure false in module next nil not or redo rescue " + "retry self super then true undef unless until when yield BEGIN END "; var SH_KEYWORDS = FLOW_CONTROL_KEYWORDS + "case done elif esac eval fi " + "function in local set then until "; var ALL_KEYWORDS = ( CPP_KEYWORDS + CSHARP_KEYWORDS + JSCRIPT_KEYWORDS + PERL_KEYWORDS + PYTHON_KEYWORDS + RUBY_KEYWORDS + SH_KEYWORDS); // token style names. correspond to css classes /** token style for a string literal */ var PR_STRING = 'str'; /** token style for a keyword */ var PR_KEYWORD = 'kwd'; /** token style for a comment */ var PR_COMMENT = 'com'; /** token style for a type */ var PR_TYPE = 'typ'; /** token style for a literal value. e.g. 1, null, true. */ var PR_LITERAL = 'lit'; /** token style for a punctuation string. */ var PR_PUNCTUATION = 'pun'; /** token style for a punctuation string. */ var PR_PLAIN = 'pln'; /** token style for an sgml tag. */ var PR_TAG = 'tag'; /** token style for a markup declaration such as a DOCTYPE. */ var PR_DECLARATION = 'dec'; /** token style for embedded source. */ var PR_SOURCE = 'src'; /** token style for an sgml attribute name. */ var PR_ATTRIB_NAME = 'atn'; /** token style for an sgml attribute value. */ var PR_ATTRIB_VALUE = 'atv'; /** * A class that indicates a section of markup that is not code, e.g. to allow * embedding of line numbers within code listings. */ var PR_NOCODE = 'nocode'; /** A set of tokens that can precede a regular expression literal in * javascript. * http://www.mozilla.org/js/language/js20/rationale/syntax.html has the full * list, but I've removed ones that might be problematic when seen in * languages that don't support regular expression literals. * * Specifically, I've removed any keywords that can't precede a regexp * literal in a syntactically legal javascript program, and I've removed the * "in" keyword since it's not a keyword in many languages, and might be used * as a count of inches. * * The link a above does not accurately describe EcmaScript rules since * it fails to distinguish between (a=++/b/i) and (a++/b/i) but it works * very well in practice. * * @private */ var REGEXP_PRECEDER_PATTERN = function () { var preceders = [ "!", "!=", "!==", "#", "%", "%=", "&", "&&", "&&=", "&=", "(", "*", "*=", /* "+", */ "+=", ",", /* "-", */ "-=", "->", /*".", "..", "...", handled below */ "/", "/=", ":", "::", ";", "<", "<<", "<<=", "<=", "=", "==", "===", ">", ">=", ">>", ">>=", ">>>", ">>>=", "?", "@", "[", "^", "^=", "^^", "^^=", "{", "|", "|=", "||", "||=", "~" /* handles =~ and !~ */, "break", "case", "continue", "delete", "do", "else", "finally", "instanceof", "return", "throw", "try", "typeof" ]; var pattern = '(?:^^|[+-]'; for (var i = 0; i < preceders.length; ++i) { pattern += '|' + preceders[i].replace(/([^=<>:&a-z])/g, '\\$1'); } pattern += ')\\s*'; // matches at end, and matches empty string return pattern; // CAVEAT: this does not properly handle the case where a regular // expression immediately follows another since a regular expression may // have flags for case-sensitivity and the like. Having regexp tokens // adjacent is not valid in any language I'm aware of, so I'm punting. // TODO: maybe style special characters inside a regexp as punctuation. }(); // Define regexps here so that the interpreter doesn't have to create an // object each time the function containing them is called. // The language spec requires a new object created even if you don't access // the $1 members. var pr_amp = /&/g; var pr_lt = //g; var pr_quot = /\"/g; /** like textToHtml but escapes double quotes to be attribute safe. */ function attribToHtml(str) { return str.replace(pr_amp, '&') .replace(pr_lt, '<') .replace(pr_gt, '>') .replace(pr_quot, '"'); } /** escapest html special characters to html. */ function textToHtml(str) { return str.replace(pr_amp, '&') .replace(pr_lt, '<') .replace(pr_gt, '>'); } var pr_ltEnt = /</g; var pr_gtEnt = />/g; var pr_aposEnt = /'/g; var pr_quotEnt = /"/g; var pr_ampEnt = /&/g; var pr_nbspEnt = / /g; /** unescapes html to plain text. */ function htmlToText(html) { var pos = html.indexOf('&'); if (pos < 0) { return html; } // Handle numeric entities specially. We can't use functional substitution // since that doesn't work in older versions of Safari. // These should be rare since most browsers convert them to normal chars. for (--pos; (pos = html.indexOf('', pos + 1)) >= 0;) { var end = html.indexOf(';', pos); if (end >= 0) { var num = html.substring(pos + 3, end); var radix = 10; if (num && num.charAt(0) === 'x') { num = num.substring(1); radix = 16; } var codePoint = parseInt(num, radix); if (!isNaN(codePoint)) { html = (html.substring(0, pos) + String.fromCharCode(codePoint) + html.substring(end + 1)); } } } return html.replace(pr_ltEnt, '<') .replace(pr_gtEnt, '>') .replace(pr_aposEnt, "'") .replace(pr_quotEnt, '"') .replace(pr_nbspEnt, ' ') .replace(pr_ampEnt, '&'); } /** is the given node's innerHTML normally unescaped? */ function isRawContent(node) { return 'XMP' === node.tagName; } var newlineRe = /[\r\n]/g; /** * Are newlines and adjacent spaces significant in the given node's innerHTML? */ function isPreformatted(node, content) { // PRE means preformatted, and is a very common case, so don't create // unnecessary computed style objects. if ('PRE' === node.tagName) { return true; } if (!newlineRe.test(content)) { return true; } // Don't care var whitespace = ''; // For disconnected nodes, IE has no currentStyle. if (node.currentStyle) { whitespace = node.currentStyle.whiteSpace; } else if (window.getComputedStyle) { // Firefox makes a best guess if node is disconnected whereas Safari // returns the empty string. whitespace = window.getComputedStyle(node, null).whiteSpace; } return !whitespace || whitespace === 'pre'; } function normalizedHtml(node, out, opt_sortAttrs) { switch (node.nodeType) { case 1: // an element var name = node.tagName.toLowerCase(); out.push('<', name); var attrs = node.attributes; var n = attrs.length; if (n) { if (opt_sortAttrs) { var sortedAttrs = []; for (var i = n; --i >= 0;) { sortedAttrs[i] = attrs[i]; } sortedAttrs.sort(function (a, b) { return (a.name < b.name) ? -1 : a.name === b.name ? 0 : 1; }); attrs = sortedAttrs; } for (var i = 0; i < n; ++i) { var attr = attrs[i]; if (!attr.specified) { continue; } out.push(' ', attr.name.toLowerCase(), '="', attribToHtml(attr.value), '"'); } } out.push('>'); for (var child = node.firstChild; child; child = child.nextSibling) { normalizedHtml(child, out, opt_sortAttrs); } if (node.firstChild || !/^(?:br|link|img)$/.test(name)) { out.push('<\/', name, '>'); } break; case 3: case 4: // text out.push(textToHtml(node.nodeValue)); break; } } /** * Given a group of {@link RegExp}s, returns a {@code RegExp} that globally * matches the union o the sets o strings matched d by the input RegExp. * Since it matches globally, if the input strings have a start-of-input * anchor (/^.../), it is ignored for the purposes of unioning. * @param {Array.} regexs non multiline, non-global regexs. * @return {RegExp} a global regex. */ function combinePrefixPatterns(regexs) { var capturedGroupIndex = 0; var needToFoldCase = false; var ignoreCase = false; for (var i = 0, n = regexs.length; i < n; ++i) { var regex = regexs[i]; if (regex.ignoreCase) { ignoreCase = true; } else if (/[a-z]/i.test(regex.source.replace( /\\u[0-9a-f]{4}|\\x[0-9a-f]{2}|\\[^ux]/gi, ''))) { needToFoldCase = true; ignoreCase = false; break; } } function decodeEscape(charsetPart) { if (charsetPart.charAt(0) !== '\\') { return charsetPart.charCodeAt(0); } switch (charsetPart.charAt(1)) { case 'b': return 8; case 't': return 9; case 'n': return 0xa; case 'v': return 0xb; case 'f': return 0xc; case 'r': return 0xd; case 'u': case 'x': return parseInt(charsetPart.substring(2), 16) || charsetPart.charCodeAt(1); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': return parseInt(charsetPart.substring(1), 8); default: return charsetPart.charCodeAt(1); } } function encodeEscape(charCode) { if (charCode < 0x20) { return (charCode < 0x10 ? '\\x0' : '\\x') + charCode.toString(16); } var ch = String.fromCharCode(charCode); if (ch === '\\' || ch === '-' || ch === '[' || ch === ']') { ch = '\\' + ch; } return ch; } function caseFoldCharset(charSet) { var charsetParts = charSet.substring(1, charSet.length - 1).match( new RegExp( '\\\\u[0-9A-Fa-f]{4}' + '|\\\\x[0-9A-Fa-f]{2}' + '|\\\\[0-3][0-7]{0,2}' + '|\\\\[0-7]{1,2}' + '|\\\\[\\s\\S]' + '|-' + '|[^-\\\\]', 'g')); var groups = []; var ranges = []; var inverse = charsetParts[0] === '^'; for (var i = inverse ? 1 : 0, n = charsetParts.length; i < n; ++i) { var p = charsetParts[i]; switch (p) { case '\\B': case '\\b': case '\\D': case '\\d': case '\\S': case '\\s': case '\\W': case '\\w': groups.push(p); continue; } var start = decodeEscape(p); var end; if (i + 2 < n && '-' === charsetParts[i + 1]) { end = decodeEscape(charsetParts[i + 2]); i += 2; } else { end = start; } ranges.push([start, end]); // If the range might intersect letters, then expand it. if (!(end < 65 || start > 122)) { if (!(end < 65 || start > 90)) { ranges.push([Math.max(65, start) | 32, Math.min(end, 90) | 32]); } if (!(end < 97 || start > 122)) { ranges.push([Math.max(97, start) & ~32, Math.min(end, 122) & ~32]); } } } // [[1, 10], [3, 4], [8, 12], [14, 14], [16, 16], [17, 17]] // -> [[1, 12], [14, 14], [16, 17]] ranges.sort(function (a, b) { return (a[0] - b[0]) || (b[1] - a[1]); }); var consolidatedRanges = []; var lastRange = [NaN, NaN]; for (var i = 0; i < ranges.length; ++i) { var range = ranges[i]; if (range[0] <= lastRange[1] + 1) { lastRange[1] = Math.max(lastRange[1], range[1]); } else { consolidatedRanges.push(lastRange = range); } } var out = ['[']; if (inverse) { out.push('^'); } out.push.apply(out, groups); for (var i = 0; i < consolidatedRanges.length; ++i) { var range = consolidatedRanges[i]; out.push(encodeEscape(range[0])); if (range[1] > range[0]) { if (range[1] + 1 > range[0]) { out.push('-'); } out.push(encodeEscape(range[1])); } } out.push(']'); return out.join(''); } function allowAnywhereFoldCaseAndRenumberGroups(regex) { // Split into character sets, escape sequences, punctuation strings // like ('(', '(?:', ')', '^'), and runs of characters that do not // include any of the above. var parts = regex.source.match( new RegExp( '(?:' + '\\[(?:[^\\x5C\\x5D]|\\\\[\\s\\S])*\\]' // a character set + '|\\\\u[A-Fa-f0-9]{4}' // a unicode escape + '|\\\\x[A-Fa-f0-9]{2}' // a hex escape + '|\\\\[0-9]+' // a back-reference or octal escape + '|\\\\[^ux0-9]' // other escape sequence + '|\\(\\?[:!=]' // start of a non-capturing group + '|[\\(\\)\\^]' // start/emd of a group, or line start + '|[^\\x5B\\x5C\\(\\)\\^]+' // run of other characters + ')', 'g')); var n = parts.length; // Maps captured group numbers to the number they will occupy in // the output or to -1 if that has not been determined, or to // undefined if they need not be capturing in the output. var capturedGroups = []; // Walk over and identify back references to build the capturedGroups // mapping. for (var i = 0, groupIndex = 0; i < n; ++i) { var p = parts[i]; if (p === '(') { // groups are 1-indexed, so max group index is count of '(' ++groupIndex; } else if ('\\' === p.charAt(0)) { var decimalValue = +p.substring(1); if (decimalValue && decimalValue <= groupIndex) { capturedGroups[decimalValue] = -1; } } } // Renumber groups and reduce capturing groups to non-capturing groups // where possible. for (var i = 1; i < capturedGroups.length; ++i) { if (-1 === capturedGroups[i]) { capturedGroups[i] = ++capturedGroupIndex; } } for (var i = 0, groupIndex = 0; i < n; ++i) { var p = parts[i]; if (p === '(') { ++groupIndex; if (capturedGroups[groupIndex] === undefined) { parts[i] = '(?:'; } } else if ('\\' === p.charAt(0)) { var decimalValue = +p.substring(1); if (decimalValue && decimalValue <= groupIndex) { parts[i] = '\\' + capturedGroups[groupIndex]; } } } // Remove any prefix anchors so that the output will match anywhere. // ^^ really does mean an anchored match though. for (var i = 0, groupIndex = 0; i < n; ++i) { if ('^' === parts[i] && '^' !== parts[i + 1]) { parts[i] = ''; } } // Expand letters to groupts to handle mixing of case-sensitive and // case-insensitive patterns if necessary. if (regex.ignoreCase && needToFoldCase) { for (var i = 0; i < n; ++i) { var p = parts[i]; var ch0 = p.charAt(0); if (p.length >= 2 && ch0 === '[') { parts[i] = caseFoldCharset(p); } else if (ch0 !== '\\') { // TODO: handle letters in numeric escapes. parts[i] = p.replace( /[a-zA-Z]/g, function (ch) { var cc = ch.charCodeAt(0); return '[' + String.fromCharCode(cc & ~32, cc | 32) + ']'; }); } } } return parts.join(''); } var rewritten = []; for (var i = 0, n = regexs.length; i < n; ++i) { var regex = regexs[i]; if (regex.global || regex.multiline) { throw new Error('' + regex); } rewritten.push( '(?:' + allowAnywhereFoldCaseAndRenumberGroups(regex) + ')'); } return new RegExp(rewritten.join('|'), ignoreCase ? 'gi' : 'g'); } var PR_innerHtmlWorks = null; function getInnerHtml(node) { // inner html is hopelessly broken in Safari 2.0.4 when the content is // an html description of well formed XML and the containing tag is a PRE // tag, so we detect that case and emulate innerHTML. if (null === PR_innerHtmlWorks) { var testNode = document.createElement('PRE'); testNode.appendChild( document.createTextNode('\n')); PR_innerHtmlWorks = !/)[\r\n]+/g, '$1') .replace(/(?:[\r\n]+[ \t]*)+/g, ' '); } return content; } var out = []; for (var child = node.firstChild; child; child = child.nextSibling) { normalizedHtml(child, out); } return out.join(''); } /** returns a function that expand tabs to spaces. This function can be fed * successive chunks of text, and will maintain its own internal state to * keep track of how tabs are expanded. * @return {function (string) : string} a function that takes * plain text and return the text with tabs expanded. * @private */ function makeTabExpander(tabWidth) { var SPACES = ' '; var charInLine = 0; return function (plainText) { // walk over each character looking for tabs and newlines. // On tabs, expand them. On newlines, reset charInLine. // Otherwise increment charInLine var out = null; var pos = 0; for (var i = 0, n = plainText.length; i < n; ++i) { var ch = plainText.charAt(i); switch (ch) { case '\t': if (!out) { out = []; } out.push(plainText.substring(pos, i)); // calculate how much space we need in front of this part // nSpaces is the amount of padding -- the number of spaces needed // to move us to the next column, where columns occur at factors of // tabWidth. var nSpaces = tabWidth - (charInLine % tabWidth); charInLine += nSpaces; for (; nSpaces >= 0; nSpaces -= SPACES.length) { out.push(SPACES.substring(0, nSpaces)); } pos = i + 1; break; case '\n': charInLine = 0; break; default: ++charInLine; } } if (!out) { return plainText; } out.push(plainText.substring(pos)); return out.join(''); }; } var pr_chunkPattern = new RegExp( '[^<]+' // A run of characters other than '<' + '|<\!--[\\s\\S]*?--\>' // an HTML comment + '|' // a CDATA section // a probable tag that should not be highlighted + '|<\/?[a-zA-Z](?:[^>\"\']|\'[^\']*\'|\"[^\"]*\")*>' + '|<', // A '<' that does not begin a larger chunk 'g'); var pr_commentPrefix = /^<\!--/; var pr_cdataPrefix = /^) into their textual equivalent. * * @param {string} s html where whitespace is considered significant. * @return {Object} source code and extracted tags. * @private */ function extractTags(s) { // since the pattern has the 'g' modifier and defines no capturing groups, // this will return a list of all chunks which we then classify and wrap as // PR_Tokens var matches = s.match(pr_chunkPattern); var sourceBuf = []; var sourceBufLen = 0; var extractedTags = []; if (matches) { for (var i = 0, n = matches.length; i < n; ++i) { var match = matches[i]; if (match.length > 1 && match.charAt(0) === '<') { if (pr_commentPrefix.test(match)) { continue; } if (pr_cdataPrefix.test(match)) { // strip CDATA prefix and suffix. Don't unescape since it's CDATA sourceBuf.push(match.substring(9, match.length - 3)); sourceBufLen += match.length - 12; } else if (pr_brPrefix.test(match)) { // tags are lexically significant so convert them to text. // This is undone later. sourceBuf.push('\n'); ++sourceBufLen; } else { if (match.indexOf(PR_NOCODE) >= 0 && isNoCodeTag(match)) { // A will start a section that should be // ignored. Continue walking the list until we see a matching end // tag. var name = match.match(pr_tagNameRe)[2]; var depth = 1; var j; end_tag_loop: for (j = i + 1; j < n; ++j) { var name2 = matches[j].match(pr_tagNameRe); if (name2 && name2[2] === name) { if (name2[1] === '/') { if (--depth === 0) { break end_tag_loop; } } else { ++depth; } } } if (j < n) { extractedTags.push( sourceBufLen, matches.slice(i, j + 1).join('')); i = j; } else { // Ignore unclosed sections. extractedTags.push(sourceBufLen, match); } } else { extractedTags.push(sourceBufLen, match); } } } else { var literalText = htmlToText(match); sourceBuf.push(literalText); sourceBufLen += literalText.length; } } } return { source: sourceBuf.join(''), tags: extractedTags }; } /** True if the given tag contains a class attribute with the nocode class. */ function isNoCodeTag(tag) { return !!tag // First canonicalize the representation of attributes .replace(/\s(\w+)\s*=\s*(?:\"([^\"]*)\"|'([^\']*)'|(\S+))/g, ' $1="$2$3$4"') // Then look for the attribute we want. .match(/[cC][lL][aA][sS][sS]=\"[^\"]*\bnocode\b/); } /** * Apply the given language handler to sourceCode and add the resulting * decorations to out. * @param {number} basePos the index of sourceCode within the chunk of source * whose decorations are already present on out. */ function appendDecorations(basePos, sourceCode, langHandler, out) { if (!sourceCode) { return; } var job = { source: sourceCode, basePos: basePos }; langHandler(job); out.push.apply(out, job.decorations); } /** Given triples of [style, pattern, context] returns a lexing function, * The lexing function interprets the patterns to find token boundaries and * returns a decoration list of the form * [index_0, style_0, index_1, style_1, ..., index_n, style_n] * where index_n is an index into the sourceCode, and style_n is a style * constant like PR_PLAIN. index_n-1 <= index_n, and style_n-1 applies to * all characters in sourceCode[index_n-1:index_n]. * * The stylePatterns is a list whose elements have the form * [style : string, pattern : RegExp, DEPRECATED, shortcut : string]. * * Style is a style constant like PR_PLAIN, or can be a string of the * form 'lang-FOO', where FOO is a language extension describing the * language of the portion of the token in $1 after pattern executes. * E.g., if style is 'lang-lisp', and group 1 contains the text * '(hello (world))', then that portion of the token will be passed to the * registered lisp handler for formatting. * The text before and after group 1 will be restyled using this decorator * so decorators should take care that this doesn't result in infinite * recursion. For example, the HTML lexer rule for SCRIPT elements looks * something like ['lang-js', /<[s]cript>(.+?)<\/script>/]. This may match * ' 登录后可以享受更多权益 您还没有登录,登录后您可以: 收藏Android系统代码 收藏喜欢的文章 多个平台共享账号 去登录 首次使用?从这里 注册
* Change log: * cbeust, 2006/08/22 *
* Java annotations (start with "@") are now captured as literals ("lit") *
} and {@code } tags in the DOM with * {@code class=prettyprint} and prettify them. * @param {Function?} opt_whenDone if specified, called when the last entry * has been finished. */ = window['prettyPrint'] = void 0; /** browser detection. @extern @returns false if not IE, otherwise the major version. */ window['_pr_isIE6'] = function () { var ieVersion = navigator && navigator.userAgent && navigator.userAgent.match(/\bMSIE ([678])\./); ieVersion = ieVersion ? +ieVersion[1] : false; window['_pr_isIE6'] = function () { return ieVersion; }; return ieVersion; }; (function () { // Keyword lists for various languages. var FLOW_CONTROL_KEYWORDS = "break continue do else for if return while "; var C_KEYWORDS = FLOW_CONTROL_KEYWORDS + "auto case char const default " + "double enum extern float goto int long register short signed sizeof " + "static struct switch typedef union unsigned void volatile "; var COMMON_KEYWORDS = C_KEYWORDS + "catch class delete false import " + "new operator private protected public this throw true try typeof "; var CPP_KEYWORDS = COMMON_KEYWORDS + "alignof align_union asm axiom bool " + "concept concept_map const_cast constexpr decltype " + "dynamic_cast explicit export friend inline late_check " + "mutable namespace nullptr reinterpret_cast static_assert static_cast " + "template typeid typename using virtual wchar_t where "; var JAVA_KEYWORDS = COMMON_KEYWORDS + "abstract boolean byte extends final finally implements import " + "instanceof null native package strictfp super synchronized throws " + "transient "; var CSHARP_KEYWORDS = JAVA_KEYWORDS + "as base by checked decimal delegate descending dynamic event " + "fixed foreach from group implicit in interface internal into is lock " + "object out override orderby params partial readonly ref sbyte sealed " + "stackalloc string select uint ulong unchecked unsafe ushort var "; var COFFEE_KEYWORDS = "all and by catch class else extends false finally " + "for if in is isnt loop new no not null of off on or return super then " + "true try unless until when while yes "; var JSCRIPT_KEYWORDS = COMMON_KEYWORDS + "debugger eval export function get null set undefined var with " + "Infinity NaN "; var PERL_KEYWORDS = "caller delete die do dump elsif eval exit foreach for " + "goto if import last local my next no our print package redo require " + "sub undef unless until use wantarray while BEGIN END "; var PYTHON_KEYWORDS = FLOW_CONTROL_KEYWORDS + "and as assert class def del " + "elif except exec finally from global import in is lambda " + "nonlocal not or pass print raise try with yield " + "False True None "; var RUBY_KEYWORDS = FLOW_CONTROL_KEYWORDS + "alias and begin case class def" + " defined elsif end ensure false in module next nil not or redo rescue " + "retry self super then true undef unless until when yield BEGIN END "; var SH_KEYWORDS = FLOW_CONTROL_KEYWORDS + "case done elif esac eval fi " + "function in local set then until "; var ALL_KEYWORDS = ( CPP_KEYWORDS + CSHARP_KEYWORDS + JSCRIPT_KEYWORDS + PERL_KEYWORDS + PYTHON_KEYWORDS + RUBY_KEYWORDS + SH_KEYWORDS); // token style names. correspond to css classes /** token style for a string literal */ var PR_STRING = 'str'; /** token style for a keyword */ var PR_KEYWORD = 'kwd'; /** token style for a comment */ var PR_COMMENT = 'com'; /** token style for a type */ var PR_TYPE = 'typ'; /** token style for a literal value. e.g. 1, null, true. */ var PR_LITERAL = 'lit'; /** token style for a punctuation string. */ var PR_PUNCTUATION = 'pun'; /** token style for a punctuation string. */ var PR_PLAIN = 'pln'; /** token style for an sgml tag. */ var PR_TAG = 'tag'; /** token style for a markup declaration such as a DOCTYPE. */ var PR_DECLARATION = 'dec'; /** token style for embedded source. */ var PR_SOURCE = 'src'; /** token style for an sgml attribute name. */ var PR_ATTRIB_NAME = 'atn'; /** token style for an sgml attribute value. */ var PR_ATTRIB_VALUE = 'atv'; /** * A class that indicates a section of markup that is not code, e.g. to allow * embedding of line numbers within code listings. */ var PR_NOCODE = 'nocode'; /** A set of tokens that can precede a regular expression literal in * javascript. * http://www.mozilla.org/js/language/js20/rationale/syntax.html has the full * list, but I've removed ones that might be problematic when seen in * languages that don't support regular expression literals. * * Specifically, I've removed any keywords that can't precede a regexp * literal in a syntactically legal javascript program, and I've removed the * "in" keyword since it's not a keyword in many languages, and might be used * as a count of inches. * * The link a above does not accurately describe EcmaScript rules since * it fails to distinguish between (a=++/b/i) and (a++/b/i) but it works * very well in practice. * * @private */ var REGEXP_PRECEDER_PATTERN = function () { var preceders = [ "!", "!=", "!==", "#", "%", "%=", "&", "&&", "&&=", "&=", "(", "*", "*=", /* "+", */ "+=", ",", /* "-", */ "-=", "->", /*".", "..", "...", handled below */ "/", "/=", ":", "::", ";", "<", "<<", "<<=", "<=", "=", "==", "===", ">", ">=", ">>", ">>=", ">>>", ">>>=", "?", "@", "[", "^", "^=", "^^", "^^=", "{", "|", "|=", "||", "||=", "~" /* handles =~ and !~ */, "break", "case", "continue", "delete", "do", "else", "finally", "instanceof", "return", "throw", "try", "typeof" ]; var pattern = '(?:^^|[+-]'; for (var i = 0; i < preceders.length; ++i) { pattern += '|' + preceders[i].replace(/([^=<>:&a-z])/g, '\\$1'); } pattern += ')\\s*'; // matches at end, and matches empty string return pattern; // CAVEAT: this does not properly handle the case where a regular // expression immediately follows another since a regular expression may // have flags for case-sensitivity and the like. Having regexp tokens // adjacent is not valid in any language I'm aware of, so I'm punting. // TODO: maybe style special characters inside a regexp as punctuation. }(); // Define regexps here so that the interpreter doesn't have to create an // object each time the function containing them is called. // The language spec requires a new object created even if you don't access // the $1 members. var pr_amp = /&/g; var pr_lt = //g; var pr_quot = /\"/g; /** like textToHtml but escapes double quotes to be attribute safe. */ function attribToHtml(str) { return str.replace(pr_amp, '&') .replace(pr_lt, '<') .replace(pr_gt, '>') .replace(pr_quot, '"'); } /** escapest html special characters to html. */ function textToHtml(str) { return str.replace(pr_amp, '&') .replace(pr_lt, '<') .replace(pr_gt, '>'); } var pr_ltEnt = /</g; var pr_gtEnt = />/g; var pr_aposEnt = /'/g; var pr_quotEnt = /"/g; var pr_ampEnt = /&/g; var pr_nbspEnt = / /g; /** unescapes html to plain text. */ function htmlToText(html) { var pos = html.indexOf('&'); if (pos < 0) { return html; } // Handle numeric entities specially. We can't use functional substitution // since that doesn't work in older versions of Safari. // These should be rare since most browsers convert them to normal chars. for (--pos; (pos = html.indexOf('', pos + 1)) >= 0;) { var end = html.indexOf(';', pos); if (end >= 0) { var num = html.substring(pos + 3, end); var radix = 10; if (num && num.charAt(0) === 'x') { num = num.substring(1); radix = 16; } var codePoint = parseInt(num, radix); if (!isNaN(codePoint)) { html = (html.substring(0, pos) + String.fromCharCode(codePoint) + html.substring(end + 1)); } } } return html.replace(pr_ltEnt, '<') .replace(pr_gtEnt, '>') .replace(pr_aposEnt, "'") .replace(pr_quotEnt, '"') .replace(pr_nbspEnt, ' ') .replace(pr_ampEnt, '&'); } /** is the given node's innerHTML normally unescaped? */ function isRawContent(node) { return 'XMP' === node.tagName; } var newlineRe = /[\r\n]/g; /** * Are newlines and adjacent spaces significant in the given node's innerHTML? */ function isPreformatted(node, content) { // PRE means preformatted, and is a very common case, so don't create // unnecessary computed style objects. if ('PRE' === node.tagName) { return true; } if (!newlineRe.test(content)) { return true; } // Don't care var whitespace = ''; // For disconnected nodes, IE has no currentStyle. if (node.currentStyle) { whitespace = node.currentStyle.whiteSpace; } else if (window.getComputedStyle) { // Firefox makes a best guess if node is disconnected whereas Safari // returns the empty string. whitespace = window.getComputedStyle(node, null).whiteSpace; } return !whitespace || whitespace === 'pre'; } function normalizedHtml(node, out, opt_sortAttrs) { switch (node.nodeType) { case 1: // an element var name = node.tagName.toLowerCase(); out.push('<', name); var attrs = node.attributes; var n = attrs.length; if (n) { if (opt_sortAttrs) { var sortedAttrs = []; for (var i = n; --i >= 0;) { sortedAttrs[i] = attrs[i]; } sortedAttrs.sort(function (a, b) { return (a.name < b.name) ? -1 : a.name === b.name ? 0 : 1; }); attrs = sortedAttrs; } for (var i = 0; i < n; ++i) { var attr = attrs[i]; if (!attr.specified) { continue; } out.push(' ', attr.name.toLowerCase(), '="', attribToHtml(attr.value), '"'); } } out.push('>'); for (var child = node.firstChild; child; child = child.nextSibling) { normalizedHtml(child, out, opt_sortAttrs); } if (node.firstChild || !/^(?:br|link|img)$/.test(name)) { out.push('<\/', name, '>'); } break; case 3: case 4: // text out.push(textToHtml(node.nodeValue)); break; } } /** * Given a group of {@link RegExp}s, returns a {@code RegExp} that globally * matches the union o the sets o strings matched d by the input RegExp. * Since it matches globally, if the input strings have a start-of-input * anchor (/^.../), it is ignored for the purposes of unioning. * @param {Array.} regexs non multiline, non-global regexs. * @return {RegExp} a global regex. */ function combinePrefixPatterns(regexs) { var capturedGroupIndex = 0; var needToFoldCase = false; var ignoreCase = false; for (var i = 0, n = regexs.length; i < n; ++i) { var regex = regexs[i]; if (regex.ignoreCase) { ignoreCase = true; } else if (/[a-z]/i.test(regex.source.replace( /\\u[0-9a-f]{4}|\\x[0-9a-f]{2}|\\[^ux]/gi, ''))) { needToFoldCase = true; ignoreCase = false; break; } } function decodeEscape(charsetPart) { if (charsetPart.charAt(0) !== '\\') { return charsetPart.charCodeAt(0); } switch (charsetPart.charAt(1)) { case 'b': return 8; case 't': return 9; case 'n': return 0xa; case 'v': return 0xb; case 'f': return 0xc; case 'r': return 0xd; case 'u': case 'x': return parseInt(charsetPart.substring(2), 16) || charsetPart.charCodeAt(1); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': return parseInt(charsetPart.substring(1), 8); default: return charsetPart.charCodeAt(1); } } function encodeEscape(charCode) { if (charCode < 0x20) { return (charCode < 0x10 ? '\\x0' : '\\x') + charCode.toString(16); } var ch = String.fromCharCode(charCode); if (ch === '\\' || ch === '-' || ch === '[' || ch === ']') { ch = '\\' + ch; } return ch; } function caseFoldCharset(charSet) { var charsetParts = charSet.substring(1, charSet.length - 1).match( new RegExp( '\\\\u[0-9A-Fa-f]{4}' + '|\\\\x[0-9A-Fa-f]{2}' + '|\\\\[0-3][0-7]{0,2}' + '|\\\\[0-7]{1,2}' + '|\\\\[\\s\\S]' + '|-' + '|[^-\\\\]', 'g')); var groups = []; var ranges = []; var inverse = charsetParts[0] === '^'; for (var i = inverse ? 1 : 0, n = charsetParts.length; i < n; ++i) { var p = charsetParts[i]; switch (p) { case '\\B': case '\\b': case '\\D': case '\\d': case '\\S': case '\\s': case '\\W': case '\\w': groups.push(p); continue; } var start = decodeEscape(p); var end; if (i + 2 < n && '-' === charsetParts[i + 1]) { end = decodeEscape(charsetParts[i + 2]); i += 2; } else { end = start; } ranges.push([start, end]); // If the range might intersect letters, then expand it. if (!(end < 65 || start > 122)) { if (!(end < 65 || start > 90)) { ranges.push([Math.max(65, start) | 32, Math.min(end, 90) | 32]); } if (!(end < 97 || start > 122)) { ranges.push([Math.max(97, start) & ~32, Math.min(end, 122) & ~32]); } } } // [[1, 10], [3, 4], [8, 12], [14, 14], [16, 16], [17, 17]] // -> [[1, 12], [14, 14], [16, 17]] ranges.sort(function (a, b) { return (a[0] - b[0]) || (b[1] - a[1]); }); var consolidatedRanges = []; var lastRange = [NaN, NaN]; for (var i = 0; i < ranges.length; ++i) { var range = ranges[i]; if (range[0] <= lastRange[1] + 1) { lastRange[1] = Math.max(lastRange[1], range[1]); } else { consolidatedRanges.push(lastRange = range); } } var out = ['[']; if (inverse) { out.push('^'); } out.push.apply(out, groups); for (var i = 0; i < consolidatedRanges.length; ++i) { var range = consolidatedRanges[i]; out.push(encodeEscape(range[0])); if (range[1] > range[0]) { if (range[1] + 1 > range[0]) { out.push('-'); } out.push(encodeEscape(range[1])); } } out.push(']'); return out.join(''); } function allowAnywhereFoldCaseAndRenumberGroups(regex) { // Split into character sets, escape sequences, punctuation strings // like ('(', '(?:', ')', '^'), and runs of characters that do not // include any of the above. var parts = regex.source.match( new RegExp( '(?:' + '\\[(?:[^\\x5C\\x5D]|\\\\[\\s\\S])*\\]' // a character set + '|\\\\u[A-Fa-f0-9]{4}' // a unicode escape + '|\\\\x[A-Fa-f0-9]{2}' // a hex escape + '|\\\\[0-9]+' // a back-reference or octal escape + '|\\\\[^ux0-9]' // other escape sequence + '|\\(\\?[:!=]' // start of a non-capturing group + '|[\\(\\)\\^]' // start/emd of a group, or line start + '|[^\\x5B\\x5C\\(\\)\\^]+' // run of other characters + ')', 'g')); var n = parts.length; // Maps captured group numbers to the number they will occupy in // the output or to -1 if that has not been determined, or to // undefined if they need not be capturing in the output. var capturedGroups = []; // Walk over and identify back references to build the capturedGroups // mapping. for (var i = 0, groupIndex = 0; i < n; ++i) { var p = parts[i]; if (p === '(') { // groups are 1-indexed, so max group index is count of '(' ++groupIndex; } else if ('\\' === p.charAt(0)) { var decimalValue = +p.substring(1); if (decimalValue && decimalValue <= groupIndex) { capturedGroups[decimalValue] = -1; } } } // Renumber groups and reduce capturing groups to non-capturing groups // where possible. for (var i = 1; i < capturedGroups.length; ++i) { if (-1 === capturedGroups[i]) { capturedGroups[i] = ++capturedGroupIndex; } } for (var i = 0, groupIndex = 0; i < n; ++i) { var p = parts[i]; if (p === '(') { ++groupIndex; if (capturedGroups[groupIndex] === undefined) { parts[i] = '(?:'; } } else if ('\\' === p.charAt(0)) { var decimalValue = +p.substring(1); if (decimalValue && decimalValue <= groupIndex) { parts[i] = '\\' + capturedGroups[groupIndex]; } } } // Remove any prefix anchors so that the output will match anywhere. // ^^ really does mean an anchored match though. for (var i = 0, groupIndex = 0; i < n; ++i) { if ('^' === parts[i] && '^' !== parts[i + 1]) { parts[i] = ''; } } // Expand letters to groupts to handle mixing of case-sensitive and // case-insensitive patterns if necessary. if (regex.ignoreCase && needToFoldCase) { for (var i = 0; i < n; ++i) { var p = parts[i]; var ch0 = p.charAt(0); if (p.length >= 2 && ch0 === '[') { parts[i] = caseFoldCharset(p); } else if (ch0 !== '\\') { // TODO: handle letters in numeric escapes. parts[i] = p.replace( /[a-zA-Z]/g, function (ch) { var cc = ch.charCodeAt(0); return '[' + String.fromCharCode(cc & ~32, cc | 32) + ']'; }); } } } return parts.join(''); } var rewritten = []; for (var i = 0, n = regexs.length; i < n; ++i) { var regex = regexs[i]; if (regex.global || regex.multiline) { throw new Error('' + regex); } rewritten.push( '(?:' + allowAnywhereFoldCaseAndRenumberGroups(regex) + ')'); } return new RegExp(rewritten.join('|'), ignoreCase ? 'gi' : 'g'); } var PR_innerHtmlWorks = null; function getInnerHtml(node) { // inner html is hopelessly broken in Safari 2.0.4 when the content is // an html description of well formed XML and the containing tag is a PRE // tag, so we detect that case and emulate innerHTML. if (null === PR_innerHtmlWorks) { var testNode = document.createElement('PRE'); testNode.appendChild( document.createTextNode('\n')); PR_innerHtmlWorks = !/)[\r\n]+/g, '$1') .replace(/(?:[\r\n]+[ \t]*)+/g, ' '); } return content; } var out = []; for (var child = node.firstChild; child; child = child.nextSibling) { normalizedHtml(child, out); } return out.join(''); } /** returns a function that expand tabs to spaces. This function can be fed * successive chunks of text, and will maintain its own internal state to * keep track of how tabs are expanded. * @return {function (string) : string} a function that takes * plain text and return the text with tabs expanded. * @private */ function makeTabExpander(tabWidth) { var SPACES = ' '; var charInLine = 0; return function (plainText) { // walk over each character looking for tabs and newlines. // On tabs, expand them. On newlines, reset charInLine. // Otherwise increment charInLine var out = null; var pos = 0; for (var i = 0, n = plainText.length; i < n; ++i) { var ch = plainText.charAt(i); switch (ch) { case '\t': if (!out) { out = []; } out.push(plainText.substring(pos, i)); // calculate how much space we need in front of this part // nSpaces is the amount of padding -- the number of spaces needed // to move us to the next column, where columns occur at factors of // tabWidth. var nSpaces = tabWidth - (charInLine % tabWidth); charInLine += nSpaces; for (; nSpaces >= 0; nSpaces -= SPACES.length) { out.push(SPACES.substring(0, nSpaces)); } pos = i + 1; break; case '\n': charInLine = 0; break; default: ++charInLine; } } if (!out) { return plainText; } out.push(plainText.substring(pos)); return out.join(''); }; } var pr_chunkPattern = new RegExp( '[^<]+' // A run of characters other than '<' + '|<\!--[\\s\\S]*?--\>' // an HTML comment + '|' // a CDATA section // a probable tag that should not be highlighted + '|<\/?[a-zA-Z](?:[^>\"\']|\'[^\']*\'|\"[^\"]*\")*>' + '|<', // A '<' that does not begin a larger chunk 'g'); var pr_commentPrefix = /^<\!--/; var pr_cdataPrefix = /^) into their textual equivalent. * * @param {string} s html where whitespace is considered significant. * @return {Object} source code and extracted tags. * @private */ function extractTags(s) { // since the pattern has the 'g' modifier and defines no capturing groups, // this will return a list of all chunks which we then classify and wrap as // PR_Tokens var matches = s.match(pr_chunkPattern); var sourceBuf = []; var sourceBufLen = 0; var extractedTags = []; if (matches) { for (var i = 0, n = matches.length; i < n; ++i) { var match = matches[i]; if (match.length > 1 && match.charAt(0) === '<') { if (pr_commentPrefix.test(match)) { continue; } if (pr_cdataPrefix.test(match)) { // strip CDATA prefix and suffix. Don't unescape since it's CDATA sourceBuf.push(match.substring(9, match.length - 3)); sourceBufLen += match.length - 12; } else if (pr_brPrefix.test(match)) { // tags are lexically significant so convert them to text. // This is undone later. sourceBuf.push('\n'); ++sourceBufLen; } else { if (match.indexOf(PR_NOCODE) >= 0 && isNoCodeTag(match)) { // A will start a section that should be // ignored. Continue walking the list until we see a matching end // tag. var name = match.match(pr_tagNameRe)[2]; var depth = 1; var j; end_tag_loop: for (j = i + 1; j < n; ++j) { var name2 = matches[j].match(pr_tagNameRe); if (name2 && name2[2] === name) { if (name2[1] === '/') { if (--depth === 0) { break end_tag_loop; } } else { ++depth; } } } if (j < n) { extractedTags.push( sourceBufLen, matches.slice(i, j + 1).join('')); i = j; } else { // Ignore unclosed sections. extractedTags.push(sourceBufLen, match); } } else { extractedTags.push(sourceBufLen, match); } } } else { var literalText = htmlToText(match); sourceBuf.push(literalText); sourceBufLen += literalText.length; } } } return { source: sourceBuf.join(''), tags: extractedTags }; } /** True if the given tag contains a class attribute with the nocode class. */ function isNoCodeTag(tag) { return !!tag // First canonicalize the representation of attributes .replace(/\s(\w+)\s*=\s*(?:\"([^\"]*)\"|'([^\']*)'|(\S+))/g, ' $1="$2$3$4"') // Then look for the attribute we want. .match(/[cC][lL][aA][sS][sS]=\"[^\"]*\bnocode\b/); } /** * Apply the given language handler to sourceCode and add the resulting * decorations to out. * @param {number} basePos the index of sourceCode within the chunk of source * whose decorations are already present on out. */ function appendDecorations(basePos, sourceCode, langHandler, out) { if (!sourceCode) { return; } var job = { source: sourceCode, basePos: basePos }; langHandler(job); out.push.apply(out, job.decorations); } /** Given triples of [style, pattern, context] returns a lexing function, * The lexing function interprets the patterns to find token boundaries and * returns a decoration list of the form * [index_0, style_0, index_1, style_1, ..., index_n, style_n] * where index_n is an index into the sourceCode, and style_n is a style * constant like PR_PLAIN. index_n-1 <= index_n, and style_n-1 applies to * all characters in sourceCode[index_n-1:index_n]. * * The stylePatterns is a list whose elements have the form * [style : string, pattern : RegExp, DEPRECATED, shortcut : string]. * * Style is a style constant like PR_PLAIN, or can be a string of the * form 'lang-FOO', where FOO is a language extension describing the * language of the portion of the token in $1 after pattern executes. * E.g., if style is 'lang-lisp', and group 1 contains the text * '(hello (world))', then that portion of the token will be passed to the * registered lisp handler for formatting. * The text before and after group 1 will be restyled using this decorator * so decorators should take care that this doesn't result in infinite * recursion. For example, the HTML lexer rule for SCRIPT elements looks * something like ['lang-js', /<[s]cript>(.+?)<\/script>/]. This may match * ' 登录后可以享受更多权益 您还没有登录,登录后您可以: 收藏Android系统代码 收藏喜欢的文章 多个平台共享账号 去登录 首次使用?从这里 注册
} tags in the DOM with * {@code class=prettyprint} and prettify them. * @param {Function?} opt_whenDone if specified, called when the last entry * has been finished. */ = window['prettyPrint'] = void 0; /** browser detection. @extern @returns false if not IE, otherwise the major version. */ window['_pr_isIE6'] = function () { var ieVersion = navigator && navigator.userAgent && navigator.userAgent.match(/\bMSIE ([678])\./); ieVersion = ieVersion ? +ieVersion[1] : false; window['_pr_isIE6'] = function () { return ieVersion; }; return ieVersion; }; (function () { // Keyword lists for various languages. var FLOW_CONTROL_KEYWORDS = "break continue do else for if return while "; var C_KEYWORDS = FLOW_CONTROL_KEYWORDS + "auto case char const default " + "double enum extern float goto int long register short signed sizeof " + "static struct switch typedef union unsigned void volatile "; var COMMON_KEYWORDS = C_KEYWORDS + "catch class delete false import " + "new operator private protected public this throw true try typeof "; var CPP_KEYWORDS = COMMON_KEYWORDS + "alignof align_union asm axiom bool " + "concept concept_map const_cast constexpr decltype " + "dynamic_cast explicit export friend inline late_check " + "mutable namespace nullptr reinterpret_cast static_assert static_cast " + "template typeid typename using virtual wchar_t where "; var JAVA_KEYWORDS = COMMON_KEYWORDS + "abstract boolean byte extends final finally implements import " + "instanceof null native package strictfp super synchronized throws " + "transient "; var CSHARP_KEYWORDS = JAVA_KEYWORDS + "as base by checked decimal delegate descending dynamic event " + "fixed foreach from group implicit in interface internal into is lock " + "object out override orderby params partial readonly ref sbyte sealed " + "stackalloc string select uint ulong unchecked unsafe ushort var "; var COFFEE_KEYWORDS = "all and by catch class else extends false finally " + "for if in is isnt loop new no not null of off on or return super then " + "true try unless until when while yes "; var JSCRIPT_KEYWORDS = COMMON_KEYWORDS + "debugger eval export function get null set undefined var with " + "Infinity NaN "; var PERL_KEYWORDS = "caller delete die do dump elsif eval exit foreach for " + "goto if import last local my next no our print package redo require " + "sub undef unless until use wantarray while BEGIN END "; var PYTHON_KEYWORDS = FLOW_CONTROL_KEYWORDS + "and as assert class def del " + "elif except exec finally from global import in is lambda " + "nonlocal not or pass print raise try with yield " + "False True None "; var RUBY_KEYWORDS = FLOW_CONTROL_KEYWORDS + "alias and begin case class def" + " defined elsif end ensure false in module next nil not or redo rescue " + "retry self super then true undef unless until when yield BEGIN END "; var SH_KEYWORDS = FLOW_CONTROL_KEYWORDS + "case done elif esac eval fi " + "function in local set then until "; var ALL_KEYWORDS = ( CPP_KEYWORDS + CSHARP_KEYWORDS + JSCRIPT_KEYWORDS + PERL_KEYWORDS + PYTHON_KEYWORDS + RUBY_KEYWORDS + SH_KEYWORDS); // token style names. correspond to css classes /** token style for a string literal */ var PR_STRING = 'str'; /** token style for a keyword */ var PR_KEYWORD = 'kwd'; /** token style for a comment */ var PR_COMMENT = 'com'; /** token style for a type */ var PR_TYPE = 'typ'; /** token style for a literal value. e.g. 1, null, true. */ var PR_LITERAL = 'lit'; /** token style for a punctuation string. */ var PR_PUNCTUATION = 'pun'; /** token style for a punctuation string. */ var PR_PLAIN = 'pln'; /** token style for an sgml tag. */ var PR_TAG = 'tag'; /** token style for a markup declaration such as a DOCTYPE. */ var PR_DECLARATION = 'dec'; /** token style for embedded source. */ var PR_SOURCE = 'src'; /** token style for an sgml attribute name. */ var PR_ATTRIB_NAME = 'atn'; /** token style for an sgml attribute value. */ var PR_ATTRIB_VALUE = 'atv'; /** * A class that indicates a section of markup that is not code, e.g. to allow * embedding of line numbers within code listings. */ var PR_NOCODE = 'nocode'; /** A set of tokens that can precede a regular expression literal in * javascript. * http://www.mozilla.org/js/language/js20/rationale/syntax.html has the full * list, but I've removed ones that might be problematic when seen in * languages that don't support regular expression literals. * * Specifically, I've removed any keywords that can't precede a regexp * literal in a syntactically legal javascript program, and I've removed the * "in" keyword since it's not a keyword in many languages, and might be used * as a count of inches. * * The link a above does not accurately describe EcmaScript rules since * it fails to distinguish between (a=++/b/i) and (a++/b/i) but it works * very well in practice. * * @private */ var REGEXP_PRECEDER_PATTERN = function () { var preceders = [ "!", "!=", "!==", "#", "%", "%=", "&", "&&", "&&=", "&=", "(", "*", "*=", /* "+", */ "+=", ",", /* "-", */ "-=", "->", /*".", "..", "...", handled below */ "/", "/=", ":", "::", ";", "<", "<<", "<<=", "<=", "=", "==", "===", ">", ">=", ">>", ">>=", ">>>", ">>>=", "?", "@", "[", "^", "^=", "^^", "^^=", "{", "|", "|=", "||", "||=", "~" /* handles =~ and !~ */, "break", "case", "continue", "delete", "do", "else", "finally", "instanceof", "return", "throw", "try", "typeof" ]; var pattern = '(?:^^|[+-]'; for (var i = 0; i < preceders.length; ++i) { pattern += '|' + preceders[i].replace(/([^=<>:&a-z])/g, '\\$1'); } pattern += ')\\s*'; // matches at end, and matches empty string return pattern; // CAVEAT: this does not properly handle the case where a regular // expression immediately follows another since a regular expression may // have flags for case-sensitivity and the like. Having regexp tokens // adjacent is not valid in any language I'm aware of, so I'm punting. // TODO: maybe style special characters inside a regexp as punctuation. }(); // Define regexps here so that the interpreter doesn't have to create an // object each time the function containing them is called. // The language spec requires a new object created even if you don't access // the $1 members. var pr_amp = /&/g; var pr_lt = //g; var pr_quot = /\"/g; /** like textToHtml but escapes double quotes to be attribute safe. */ function attribToHtml(str) { return str.replace(pr_amp, '&') .replace(pr_lt, '<') .replace(pr_gt, '>') .replace(pr_quot, '"'); } /** escapest html special characters to html. */ function textToHtml(str) { return str.replace(pr_amp, '&') .replace(pr_lt, '<') .replace(pr_gt, '>'); } var pr_ltEnt = /</g; var pr_gtEnt = />/g; var pr_aposEnt = /'/g; var pr_quotEnt = /"/g; var pr_ampEnt = /&/g; var pr_nbspEnt = / /g; /** unescapes html to plain text. */ function htmlToText(html) { var pos = html.indexOf('&'); if (pos < 0) { return html; } // Handle numeric entities specially. We can't use functional substitution // since that doesn't work in older versions of Safari. // These should be rare since most browsers convert them to normal chars. for (--pos; (pos = html.indexOf('', pos + 1)) >= 0;) { var end = html.indexOf(';', pos); if (end >= 0) { var num = html.substring(pos + 3, end); var radix = 10; if (num && num.charAt(0) === 'x') { num = num.substring(1); radix = 16; } var codePoint = parseInt(num, radix); if (!isNaN(codePoint)) { html = (html.substring(0, pos) + String.fromCharCode(codePoint) + html.substring(end + 1)); } } } return html.replace(pr_ltEnt, '<') .replace(pr_gtEnt, '>') .replace(pr_aposEnt, "'") .replace(pr_quotEnt, '"') .replace(pr_nbspEnt, ' ') .replace(pr_ampEnt, '&'); } /** is the given node's innerHTML normally unescaped? */ function isRawContent(node) { return 'XMP' === node.tagName; } var newlineRe = /[\r\n]/g; /** * Are newlines and adjacent spaces significant in the given node's innerHTML? */ function isPreformatted(node, content) { // PRE means preformatted, and is a very common case, so don't create // unnecessary computed style objects. if ('PRE' === node.tagName) { return true; } if (!newlineRe.test(content)) { return true; } // Don't care var whitespace = ''; // For disconnected nodes, IE has no currentStyle. if (node.currentStyle) { whitespace = node.currentStyle.whiteSpace; } else if (window.getComputedStyle) { // Firefox makes a best guess if node is disconnected whereas Safari // returns the empty string. whitespace = window.getComputedStyle(node, null).whiteSpace; } return !whitespace || whitespace === 'pre'; } function normalizedHtml(node, out, opt_sortAttrs) { switch (node.nodeType) { case 1: // an element var name = node.tagName.toLowerCase(); out.push('<', name); var attrs = node.attributes; var n = attrs.length; if (n) { if (opt_sortAttrs) { var sortedAttrs = []; for (var i = n; --i >= 0;) { sortedAttrs[i] = attrs[i]; } sortedAttrs.sort(function (a, b) { return (a.name < b.name) ? -1 : a.name === b.name ? 0 : 1; }); attrs = sortedAttrs; } for (var i = 0; i < n; ++i) { var attr = attrs[i]; if (!attr.specified) { continue; } out.push(' ', attr.name.toLowerCase(), '="', attribToHtml(attr.value), '"'); } } out.push('>'); for (var child = node.firstChild; child; child = child.nextSibling) { normalizedHtml(child, out, opt_sortAttrs); } if (node.firstChild || !/^(?:br|link|img)$/.test(name)) { out.push('<\/', name, '>'); } break; case 3: case 4: // text out.push(textToHtml(node.nodeValue)); break; } } /** * Given a group of {@link RegExp}s, returns a {@code RegExp} that globally * matches the union o the sets o strings matched d by the input RegExp. * Since it matches globally, if the input strings have a start-of-input * anchor (/^.../), it is ignored for the purposes of unioning. * @param {Array.} regexs non multiline, non-global regexs. * @return {RegExp} a global regex. */ function combinePrefixPatterns(regexs) { var capturedGroupIndex = 0; var needToFoldCase = false; var ignoreCase = false; for (var i = 0, n = regexs.length; i < n; ++i) { var regex = regexs[i]; if (regex.ignoreCase) { ignoreCase = true; } else if (/[a-z]/i.test(regex.source.replace( /\\u[0-9a-f]{4}|\\x[0-9a-f]{2}|\\[^ux]/gi, ''))) { needToFoldCase = true; ignoreCase = false; break; } } function decodeEscape(charsetPart) { if (charsetPart.charAt(0) !== '\\') { return charsetPart.charCodeAt(0); } switch (charsetPart.charAt(1)) { case 'b': return 8; case 't': return 9; case 'n': return 0xa; case 'v': return 0xb; case 'f': return 0xc; case 'r': return 0xd; case 'u': case 'x': return parseInt(charsetPart.substring(2), 16) || charsetPart.charCodeAt(1); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': return parseInt(charsetPart.substring(1), 8); default: return charsetPart.charCodeAt(1); } } function encodeEscape(charCode) { if (charCode < 0x20) { return (charCode < 0x10 ? '\\x0' : '\\x') + charCode.toString(16); } var ch = String.fromCharCode(charCode); if (ch === '\\' || ch === '-' || ch === '[' || ch === ']') { ch = '\\' + ch; } return ch; } function caseFoldCharset(charSet) { var charsetParts = charSet.substring(1, charSet.length - 1).match( new RegExp( '\\\\u[0-9A-Fa-f]{4}' + '|\\\\x[0-9A-Fa-f]{2}' + '|\\\\[0-3][0-7]{0,2}' + '|\\\\[0-7]{1,2}' + '|\\\\[\\s\\S]' + '|-' + '|[^-\\\\]', 'g')); var groups = []; var ranges = []; var inverse = charsetParts[0] === '^'; for (var i = inverse ? 1 : 0, n = charsetParts.length; i < n; ++i) { var p = charsetParts[i]; switch (p) { case '\\B': case '\\b': case '\\D': case '\\d': case '\\S': case '\\s': case '\\W': case '\\w': groups.push(p); continue; } var start = decodeEscape(p); var end; if (i + 2 < n && '-' === charsetParts[i + 1]) { end = decodeEscape(charsetParts[i + 2]); i += 2; } else { end = start; } ranges.push([start, end]); // If the range might intersect letters, then expand it. if (!(end < 65 || start > 122)) { if (!(end < 65 || start > 90)) { ranges.push([Math.max(65, start) | 32, Math.min(end, 90) | 32]); } if (!(end < 97 || start > 122)) { ranges.push([Math.max(97, start) & ~32, Math.min(end, 122) & ~32]); } } } // [[1, 10], [3, 4], [8, 12], [14, 14], [16, 16], [17, 17]] // -> [[1, 12], [14, 14], [16, 17]] ranges.sort(function (a, b) { return (a[0] - b[0]) || (b[1] - a[1]); }); var consolidatedRanges = []; var lastRange = [NaN, NaN]; for (var i = 0; i < ranges.length; ++i) { var range = ranges[i]; if (range[0] <= lastRange[1] + 1) { lastRange[1] = Math.max(lastRange[1], range[1]); } else { consolidatedRanges.push(lastRange = range); } } var out = ['[']; if (inverse) { out.push('^'); } out.push.apply(out, groups); for (var i = 0; i < consolidatedRanges.length; ++i) { var range = consolidatedRanges[i]; out.push(encodeEscape(range[0])); if (range[1] > range[0]) { if (range[1] + 1 > range[0]) { out.push('-'); } out.push(encodeEscape(range[1])); } } out.push(']'); return out.join(''); } function allowAnywhereFoldCaseAndRenumberGroups(regex) { // Split into character sets, escape sequences, punctuation strings // like ('(', '(?:', ')', '^'), and runs of characters that do not // include any of the above. var parts = regex.source.match( new RegExp( '(?:' + '\\[(?:[^\\x5C\\x5D]|\\\\[\\s\\S])*\\]' // a character set + '|\\\\u[A-Fa-f0-9]{4}' // a unicode escape + '|\\\\x[A-Fa-f0-9]{2}' // a hex escape + '|\\\\[0-9]+' // a back-reference or octal escape + '|\\\\[^ux0-9]' // other escape sequence + '|\\(\\?[:!=]' // start of a non-capturing group + '|[\\(\\)\\^]' // start/emd of a group, or line start + '|[^\\x5B\\x5C\\(\\)\\^]+' // run of other characters + ')', 'g')); var n = parts.length; // Maps captured group numbers to the number they will occupy in // the output or to -1 if that has not been determined, or to // undefined if they need not be capturing in the output. var capturedGroups = []; // Walk over and identify back references to build the capturedGroups // mapping. for (var i = 0, groupIndex = 0; i < n; ++i) { var p = parts[i]; if (p === '(') { // groups are 1-indexed, so max group index is count of '(' ++groupIndex; } else if ('\\' === p.charAt(0)) { var decimalValue = +p.substring(1); if (decimalValue && decimalValue <= groupIndex) { capturedGroups[decimalValue] = -1; } } } // Renumber groups and reduce capturing groups to non-capturing groups // where possible. for (var i = 1; i < capturedGroups.length; ++i) { if (-1 === capturedGroups[i]) { capturedGroups[i] = ++capturedGroupIndex; } } for (var i = 0, groupIndex = 0; i < n; ++i) { var p = parts[i]; if (p === '(') { ++groupIndex; if (capturedGroups[groupIndex] === undefined) { parts[i] = '(?:'; } } else if ('\\' === p.charAt(0)) { var decimalValue = +p.substring(1); if (decimalValue && decimalValue <= groupIndex) { parts[i] = '\\' + capturedGroups[groupIndex]; } } } // Remove any prefix anchors so that the output will match anywhere. // ^^ really does mean an anchored match though. for (var i = 0, groupIndex = 0; i < n; ++i) { if ('^' === parts[i] && '^' !== parts[i + 1]) { parts[i] = ''; } } // Expand letters to groupts to handle mixing of case-sensitive and // case-insensitive patterns if necessary. if (regex.ignoreCase && needToFoldCase) { for (var i = 0; i < n; ++i) { var p = parts[i]; var ch0 = p.charAt(0); if (p.length >= 2 && ch0 === '[') { parts[i] = caseFoldCharset(p); } else if (ch0 !== '\\') { // TODO: handle letters in numeric escapes. parts[i] = p.replace( /[a-zA-Z]/g, function (ch) { var cc = ch.charCodeAt(0); return '[' + String.fromCharCode(cc & ~32, cc | 32) + ']'; }); } } } return parts.join(''); } var rewritten = []; for (var i = 0, n = regexs.length; i < n; ++i) { var regex = regexs[i]; if (regex.global || regex.multiline) { throw new Error('' + regex); } rewritten.push( '(?:' + allowAnywhereFoldCaseAndRenumberGroups(regex) + ')'); } return new RegExp(rewritten.join('|'), ignoreCase ? 'gi' : 'g'); } var PR_innerHtmlWorks = null; function getInnerHtml(node) { // inner html is hopelessly broken in Safari 2.0.4 when the content is // an html description of well formed XML and the containing tag is a PRE // tag, so we detect that case and emulate innerHTML. if (null === PR_innerHtmlWorks) { var testNode = document.createElement('PRE'); testNode.appendChild( document.createTextNode('\n')); PR_innerHtmlWorks = !/)[\r\n]+/g, '$1') .replace(/(?:[\r\n]+[ \t]*)+/g, ' '); } return content; } var out = []; for (var child = node.firstChild; child; child = child.nextSibling) { normalizedHtml(child, out); } return out.join(''); } /** returns a function that expand tabs to spaces. This function can be fed * successive chunks of text, and will maintain its own internal state to * keep track of how tabs are expanded. * @return {function (string) : string} a function that takes * plain text and return the text with tabs expanded. * @private */ function makeTabExpander(tabWidth) { var SPACES = ' '; var charInLine = 0; return function (plainText) { // walk over each character looking for tabs and newlines. // On tabs, expand them. On newlines, reset charInLine. // Otherwise increment charInLine var out = null; var pos = 0; for (var i = 0, n = plainText.length; i < n; ++i) { var ch = plainText.charAt(i); switch (ch) { case '\t': if (!out) { out = []; } out.push(plainText.substring(pos, i)); // calculate how much space we need in front of this part // nSpaces is the amount of padding -- the number of spaces needed // to move us to the next column, where columns occur at factors of // tabWidth. var nSpaces = tabWidth - (charInLine % tabWidth); charInLine += nSpaces; for (; nSpaces >= 0; nSpaces -= SPACES.length) { out.push(SPACES.substring(0, nSpaces)); } pos = i + 1; break; case '\n': charInLine = 0; break; default: ++charInLine; } } if (!out) { return plainText; } out.push(plainText.substring(pos)); return out.join(''); }; } var pr_chunkPattern = new RegExp( '[^<]+' // A run of characters other than '<' + '|<\!--[\\s\\S]*?--\>' // an HTML comment + '|' // a CDATA section // a probable tag that should not be highlighted + '|<\/?[a-zA-Z](?:[^>\"\']|\'[^\']*\'|\"[^\"]*\")*>' + '|<', // A '<' that does not begin a larger chunk 'g'); var pr_commentPrefix = /^<\!--/; var pr_cdataPrefix = /^) into their textual equivalent. * * @param {string} s html where whitespace is considered significant. * @return {Object} source code and extracted tags. * @private */ function extractTags(s) { // since the pattern has the 'g' modifier and defines no capturing groups, // this will return a list of all chunks which we then classify and wrap as // PR_Tokens var matches = s.match(pr_chunkPattern); var sourceBuf = []; var sourceBufLen = 0; var extractedTags = []; if (matches) { for (var i = 0, n = matches.length; i < n; ++i) { var match = matches[i]; if (match.length > 1 && match.charAt(0) === '<') { if (pr_commentPrefix.test(match)) { continue; } if (pr_cdataPrefix.test(match)) { // strip CDATA prefix and suffix. Don't unescape since it's CDATA sourceBuf.push(match.substring(9, match.length - 3)); sourceBufLen += match.length - 12; } else if (pr_brPrefix.test(match)) { // tags are lexically significant so convert them to text. // This is undone later. sourceBuf.push('\n'); ++sourceBufLen; } else { if (match.indexOf(PR_NOCODE) >= 0 && isNoCodeTag(match)) { // A will start a section that should be // ignored. Continue walking the list until we see a matching end // tag. var name = match.match(pr_tagNameRe)[2]; var depth = 1; var j; end_tag_loop: for (j = i + 1; j < n; ++j) { var name2 = matches[j].match(pr_tagNameRe); if (name2 && name2[2] === name) { if (name2[1] === '/') { if (--depth === 0) { break end_tag_loop; } } else { ++depth; } } } if (j < n) { extractedTags.push( sourceBufLen, matches.slice(i, j + 1).join('')); i = j; } else { // Ignore unclosed sections. extractedTags.push(sourceBufLen, match); } } else { extractedTags.push(sourceBufLen, match); } } } else { var literalText = htmlToText(match); sourceBuf.push(literalText); sourceBufLen += literalText.length; } } } return { source: sourceBuf.join(''), tags: extractedTags }; } /** True if the given tag contains a class attribute with the nocode class. */ function isNoCodeTag(tag) { return !!tag // First canonicalize the representation of attributes .replace(/\s(\w+)\s*=\s*(?:\"([^\"]*)\"|'([^\']*)'|(\S+))/g, ' $1="$2$3$4"') // Then look for the attribute we want. .match(/[cC][lL][aA][sS][sS]=\"[^\"]*\bnocode\b/); } /** * Apply the given language handler to sourceCode and add the resulting * decorations to out. * @param {number} basePos the index of sourceCode within the chunk of source * whose decorations are already present on out. */ function appendDecorations(basePos, sourceCode, langHandler, out) { if (!sourceCode) { return; } var job = { source: sourceCode, basePos: basePos }; langHandler(job); out.push.apply(out, job.decorations); } /** Given triples of [style, pattern, context] returns a lexing function, * The lexing function interprets the patterns to find token boundaries and * returns a decoration list of the form * [index_0, style_0, index_1, style_1, ..., index_n, style_n] * where index_n is an index into the sourceCode, and style_n is a style * constant like PR_PLAIN. index_n-1 <= index_n, and style_n-1 applies to * all characters in sourceCode[index_n-1:index_n]. * * The stylePatterns is a list whose elements have the form * [style : string, pattern : RegExp, DEPRECATED, shortcut : string]. * * Style is a style constant like PR_PLAIN, or can be a string of the * form 'lang-FOO', where FOO is a language extension describing the * language of the portion of the token in $1 after pattern executes. * E.g., if style is 'lang-lisp', and group 1 contains the text * '(hello (world))', then that portion of the token will be passed to the * registered lisp handler for formatting. * The text before and after group 1 will be restyled using this decorator * so decorators should take care that this doesn't result in infinite * recursion. For example, the HTML lexer rule for SCRIPT elements looks * something like ['lang-js', /<[s]cript>(.+?)<\/script>/]. This may match * ' 登录后可以享受更多权益 您还没有登录,登录后您可以: 收藏Android系统代码 收藏喜欢的文章 多个平台共享账号 去登录 首次使用?从这里 注册
Specifically, I've removed any keywords that can't precede a regexp * literal in a syntactically legal javascript program, and I've removed the * "in" keyword since it's not a keyword in many languages, and might be used * as a count of inches. * *
The link a above does not accurately describe EcmaScript rules since * it fails to distinguish between (a=++/b/i) and (a++/b/i) but it works * very well in practice. * * @private */ var REGEXP_PRECEDER_PATTERN = function () { var preceders = [ "!", "!=", "!==", "#", "%", "%=", "&", "&&", "&&=", "&=", "(", "*", "*=", /* "+", */ "+=", ",", /* "-", */ "-=", "->", /*".", "..", "...", handled below */ "/", "/=", ":", "::", ";", "<", "<<", "<<=", "<=", "=", "==", "===", ">", ">=", ">>", ">>=", ">>>", ">>>=", "?", "@", "[", "^", "^=", "^^", "^^=", "{", "|", "|=", "||", "||=", "~" /* handles =~ and !~ */, "break", "case", "continue", "delete", "do", "else", "finally", "instanceof", "return", "throw", "try", "typeof" ]; var pattern = '(?:^^|[+-]'; for (var i = 0; i < preceders.length; ++i) { pattern += '|' + preceders[i].replace(/([^=<>:&a-z])/g, '\\$1'); } pattern += ')\\s*'; // matches at end, and matches empty string return pattern; // CAVEAT: this does not properly handle the case where a regular // expression immediately follows another since a regular expression may // have flags for case-sensitivity and the like. Having regexp tokens // adjacent is not valid in any language I'm aware of, so I'm punting. // TODO: maybe style special characters inside a regexp as punctuation. }(); // Define regexps here so that the interpreter doesn't have to create an // object each time the function containing them is called. // The language spec requires a new object created even if you don't access // the $1 members. var pr_amp = /&/g; var pr_lt = //g; var pr_quot = /\"/g; /** like textToHtml but escapes double quotes to be attribute safe. */ function attribToHtml(str) { return str.replace(pr_amp, '&') .replace(pr_lt, '<') .replace(pr_gt, '>') .replace(pr_quot, '"'); } /** escapest html special characters to html. */ function textToHtml(str) { return str.replace(pr_amp, '&') .replace(pr_lt, '<') .replace(pr_gt, '>'); } var pr_ltEnt = /</g; var pr_gtEnt = />/g; var pr_aposEnt = /'/g; var pr_quotEnt = /"/g; var pr_ampEnt = /&/g; var pr_nbspEnt = / /g; /** unescapes html to plain text. */ function htmlToText(html) { var pos = html.indexOf('&'); if (pos < 0) { return html; } // Handle numeric entities specially. We can't use functional substitution // since that doesn't work in older versions of Safari. // These should be rare since most browsers convert them to normal chars. for (--pos; (pos = html.indexOf('', pos + 1)) >= 0;) { var end = html.indexOf(';', pos); if (end >= 0) { var num = html.substring(pos + 3, end); var radix = 10; if (num && num.charAt(0) === 'x') { num = num.substring(1); radix = 16; } var codePoint = parseInt(num, radix); if (!isNaN(codePoint)) { html = (html.substring(0, pos) + String.fromCharCode(codePoint) + html.substring(end + 1)); } } } return html.replace(pr_ltEnt, '<') .replace(pr_gtEnt, '>') .replace(pr_aposEnt, "'") .replace(pr_quotEnt, '"') .replace(pr_nbspEnt, ' ') .replace(pr_ampEnt, '&'); } /** is the given node's innerHTML normally unescaped? */ function isRawContent(node) { return 'XMP' === node.tagName; } var newlineRe = /[\r\n]/g; /** * Are newlines and adjacent spaces significant in the given node's innerHTML? */ function isPreformatted(node, content) { // PRE means preformatted, and is a very common case, so don't create // unnecessary computed style objects. if ('PRE' === node.tagName) { return true; } if (!newlineRe.test(content)) { return true; } // Don't care var whitespace = ''; // For disconnected nodes, IE has no currentStyle. if (node.currentStyle) { whitespace = node.currentStyle.whiteSpace; } else if (window.getComputedStyle) { // Firefox makes a best guess if node is disconnected whereas Safari // returns the empty string. whitespace = window.getComputedStyle(node, null).whiteSpace; } return !whitespace || whitespace === 'pre'; } function normalizedHtml(node, out, opt_sortAttrs) { switch (node.nodeType) { case 1: // an element var name = node.tagName.toLowerCase(); out.push('<', name); var attrs = node.attributes; var n = attrs.length; if (n) { if (opt_sortAttrs) { var sortedAttrs = []; for (var i = n; --i >= 0;) { sortedAttrs[i] = attrs[i]; } sortedAttrs.sort(function (a, b) { return (a.name < b.name) ? -1 : a.name === b.name ? 0 : 1; }); attrs = sortedAttrs; } for (var i = 0; i < n; ++i) { var attr = attrs[i]; if (!attr.specified) { continue; } out.push(' ', attr.name.toLowerCase(), '="', attribToHtml(attr.value), '"'); } } out.push('>'); for (var child = node.firstChild; child; child = child.nextSibling) { normalizedHtml(child, out, opt_sortAttrs); } if (node.firstChild || !/^(?:br|link|img)$/.test(name)) { out.push('<\/', name, '>'); } break; case 3: case 4: // text out.push(textToHtml(node.nodeValue)); break; } } /** * Given a group of {@link RegExp}s, returns a {@code RegExp} that globally * matches the union o the sets o strings matched d by the input RegExp. * Since it matches globally, if the input strings have a start-of-input * anchor (/^.../), it is ignored for the purposes of unioning. * @param {Array.} regexs non multiline, non-global regexs. * @return {RegExp} a global regex. */ function combinePrefixPatterns(regexs) { var capturedGroupIndex = 0; var needToFoldCase = false; var ignoreCase = false; for (var i = 0, n = regexs.length; i < n; ++i) { var regex = regexs[i]; if (regex.ignoreCase) { ignoreCase = true; } else if (/[a-z]/i.test(regex.source.replace( /\\u[0-9a-f]{4}|\\x[0-9a-f]{2}|\\[^ux]/gi, ''))) { needToFoldCase = true; ignoreCase = false; break; } } function decodeEscape(charsetPart) { if (charsetPart.charAt(0) !== '\\') { return charsetPart.charCodeAt(0); } switch (charsetPart.charAt(1)) { case 'b': return 8; case 't': return 9; case 'n': return 0xa; case 'v': return 0xb; case 'f': return 0xc; case 'r': return 0xd; case 'u': case 'x': return parseInt(charsetPart.substring(2), 16) || charsetPart.charCodeAt(1); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': return parseInt(charsetPart.substring(1), 8); default: return charsetPart.charCodeAt(1); } } function encodeEscape(charCode) { if (charCode < 0x20) { return (charCode < 0x10 ? '\\x0' : '\\x') + charCode.toString(16); } var ch = String.fromCharCode(charCode); if (ch === '\\' || ch === '-' || ch === '[' || ch === ']') { ch = '\\' + ch; } return ch; } function caseFoldCharset(charSet) { var charsetParts = charSet.substring(1, charSet.length - 1).match( new RegExp( '\\\\u[0-9A-Fa-f]{4}' + '|\\\\x[0-9A-Fa-f]{2}' + '|\\\\[0-3][0-7]{0,2}' + '|\\\\[0-7]{1,2}' + '|\\\\[\\s\\S]' + '|-' + '|[^-\\\\]', 'g')); var groups = []; var ranges = []; var inverse = charsetParts[0] === '^'; for (var i = inverse ? 1 : 0, n = charsetParts.length; i < n; ++i) { var p = charsetParts[i]; switch (p) { case '\\B': case '\\b': case '\\D': case '\\d': case '\\S': case '\\s': case '\\W': case '\\w': groups.push(p); continue; } var start = decodeEscape(p); var end; if (i + 2 < n && '-' === charsetParts[i + 1]) { end = decodeEscape(charsetParts[i + 2]); i += 2; } else { end = start; } ranges.push([start, end]); // If the range might intersect letters, then expand it. if (!(end < 65 || start > 122)) { if (!(end < 65 || start > 90)) { ranges.push([Math.max(65, start) | 32, Math.min(end, 90) | 32]); } if (!(end < 97 || start > 122)) { ranges.push([Math.max(97, start) & ~32, Math.min(end, 122) & ~32]); } } } // [[1, 10], [3, 4], [8, 12], [14, 14], [16, 16], [17, 17]] // -> [[1, 12], [14, 14], [16, 17]] ranges.sort(function (a, b) { return (a[0] - b[0]) || (b[1] - a[1]); }); var consolidatedRanges = []; var lastRange = [NaN, NaN]; for (var i = 0; i < ranges.length; ++i) { var range = ranges[i]; if (range[0] <= lastRange[1] + 1) { lastRange[1] = Math.max(lastRange[1], range[1]); } else { consolidatedRanges.push(lastRange = range); } } var out = ['[']; if (inverse) { out.push('^'); } out.push.apply(out, groups); for (var i = 0; i < consolidatedRanges.length; ++i) { var range = consolidatedRanges[i]; out.push(encodeEscape(range[0])); if (range[1] > range[0]) { if (range[1] + 1 > range[0]) { out.push('-'); } out.push(encodeEscape(range[1])); } } out.push(']'); return out.join(''); } function allowAnywhereFoldCaseAndRenumberGroups(regex) { // Split into character sets, escape sequences, punctuation strings // like ('(', '(?:', ')', '^'), and runs of characters that do not // include any of the above. var parts = regex.source.match( new RegExp( '(?:' + '\\[(?:[^\\x5C\\x5D]|\\\\[\\s\\S])*\\]' // a character set + '|\\\\u[A-Fa-f0-9]{4}' // a unicode escape + '|\\\\x[A-Fa-f0-9]{2}' // a hex escape + '|\\\\[0-9]+' // a back-reference or octal escape + '|\\\\[^ux0-9]' // other escape sequence + '|\\(\\?[:!=]' // start of a non-capturing group + '|[\\(\\)\\^]' // start/emd of a group, or line start + '|[^\\x5B\\x5C\\(\\)\\^]+' // run of other characters + ')', 'g')); var n = parts.length; // Maps captured group numbers to the number they will occupy in // the output or to -1 if that has not been determined, or to // undefined if they need not be capturing in the output. var capturedGroups = []; // Walk over and identify back references to build the capturedGroups // mapping. for (var i = 0, groupIndex = 0; i < n; ++i) { var p = parts[i]; if (p === '(') { // groups are 1-indexed, so max group index is count of '(' ++groupIndex; } else if ('\\' === p.charAt(0)) { var decimalValue = +p.substring(1); if (decimalValue && decimalValue <= groupIndex) { capturedGroups[decimalValue] = -1; } } } // Renumber groups and reduce capturing groups to non-capturing groups // where possible. for (var i = 1; i < capturedGroups.length; ++i) { if (-1 === capturedGroups[i]) { capturedGroups[i] = ++capturedGroupIndex; } } for (var i = 0, groupIndex = 0; i < n; ++i) { var p = parts[i]; if (p === '(') { ++groupIndex; if (capturedGroups[groupIndex] === undefined) { parts[i] = '(?:'; } } else if ('\\' === p.charAt(0)) { var decimalValue = +p.substring(1); if (decimalValue && decimalValue <= groupIndex) { parts[i] = '\\' + capturedGroups[groupIndex]; } } } // Remove any prefix anchors so that the output will match anywhere. // ^^ really does mean an anchored match though. for (var i = 0, groupIndex = 0; i < n; ++i) { if ('^' === parts[i] && '^' !== parts[i + 1]) { parts[i] = ''; } } // Expand letters to groupts to handle mixing of case-sensitive and // case-insensitive patterns if necessary. if (regex.ignoreCase && needToFoldCase) { for (var i = 0; i < n; ++i) { var p = parts[i]; var ch0 = p.charAt(0); if (p.length >= 2 && ch0 === '[') { parts[i] = caseFoldCharset(p); } else if (ch0 !== '\\') { // TODO: handle letters in numeric escapes. parts[i] = p.replace( /[a-zA-Z]/g, function (ch) { var cc = ch.charCodeAt(0); return '[' + String.fromCharCode(cc & ~32, cc | 32) + ']'; }); } } } return parts.join(''); } var rewritten = []; for (var i = 0, n = regexs.length; i < n; ++i) { var regex = regexs[i]; if (regex.global || regex.multiline) { throw new Error('' + regex); } rewritten.push( '(?:' + allowAnywhereFoldCaseAndRenumberGroups(regex) + ')'); } return new RegExp(rewritten.join('|'), ignoreCase ? 'gi' : 'g'); } var PR_innerHtmlWorks = null; function getInnerHtml(node) { // inner html is hopelessly broken in Safari 2.0.4 when the content is // an html description of well formed XML and the containing tag is a PRE // tag, so we detect that case and emulate innerHTML. if (null === PR_innerHtmlWorks) { var testNode = document.createElement('PRE'); testNode.appendChild( document.createTextNode('\n')); PR_innerHtmlWorks = !/)[\r\n]+/g, '$1') .replace(/(?:[\r\n]+[ \t]*)+/g, ' '); } return content; } var out = []; for (var child = node.firstChild; child; child = child.nextSibling) { normalizedHtml(child, out); } return out.join(''); } /** returns a function that expand tabs to spaces. This function can be fed * successive chunks of text, and will maintain its own internal state to * keep track of how tabs are expanded. * @return {function (string) : string} a function that takes * plain text and return the text with tabs expanded. * @private */ function makeTabExpander(tabWidth) { var SPACES = ' '; var charInLine = 0; return function (plainText) { // walk over each character looking for tabs and newlines. // On tabs, expand them. On newlines, reset charInLine. // Otherwise increment charInLine var out = null; var pos = 0; for (var i = 0, n = plainText.length; i < n; ++i) { var ch = plainText.charAt(i); switch (ch) { case '\t': if (!out) { out = []; } out.push(plainText.substring(pos, i)); // calculate how much space we need in front of this part // nSpaces is the amount of padding -- the number of spaces needed // to move us to the next column, where columns occur at factors of // tabWidth. var nSpaces = tabWidth - (charInLine % tabWidth); charInLine += nSpaces; for (; nSpaces >= 0; nSpaces -= SPACES.length) { out.push(SPACES.substring(0, nSpaces)); } pos = i + 1; break; case '\n': charInLine = 0; break; default: ++charInLine; } } if (!out) { return plainText; } out.push(plainText.substring(pos)); return out.join(''); }; } var pr_chunkPattern = new RegExp( '[^<]+' // A run of characters other than '<' + '|<\!--[\\s\\S]*?--\>' // an HTML comment + '|' // a CDATA section // a probable tag that should not be highlighted + '|<\/?[a-zA-Z](?:[^>\"\']|\'[^\']*\'|\"[^\"]*\")*>' + '|<', // A '<' that does not begin a larger chunk 'g'); var pr_commentPrefix = /^<\!--/; var pr_cdataPrefix = /^) into their textual equivalent. * * @param {string} s html where whitespace is considered significant. * @return {Object} source code and extracted tags. * @private */ function extractTags(s) { // since the pattern has the 'g' modifier and defines no capturing groups, // this will return a list of all chunks which we then classify and wrap as // PR_Tokens var matches = s.match(pr_chunkPattern); var sourceBuf = []; var sourceBufLen = 0; var extractedTags = []; if (matches) { for (var i = 0, n = matches.length; i < n; ++i) { var match = matches[i]; if (match.length > 1 && match.charAt(0) === '<') { if (pr_commentPrefix.test(match)) { continue; } if (pr_cdataPrefix.test(match)) { // strip CDATA prefix and suffix. Don't unescape since it's CDATA sourceBuf.push(match.substring(9, match.length - 3)); sourceBufLen += match.length - 12; } else if (pr_brPrefix.test(match)) { // tags are lexically significant so convert them to text. // This is undone later. sourceBuf.push('\n'); ++sourceBufLen; } else { if (match.indexOf(PR_NOCODE) >= 0 && isNoCodeTag(match)) { // A will start a section that should be // ignored. Continue walking the list until we see a matching end // tag. var name = match.match(pr_tagNameRe)[2]; var depth = 1; var j; end_tag_loop: for (j = i + 1; j < n; ++j) { var name2 = matches[j].match(pr_tagNameRe); if (name2 && name2[2] === name) { if (name2[1] === '/') { if (--depth === 0) { break end_tag_loop; } } else { ++depth; } } } if (j < n) { extractedTags.push( sourceBufLen, matches.slice(i, j + 1).join('')); i = j; } else { // Ignore unclosed sections. extractedTags.push(sourceBufLen, match); } } else { extractedTags.push(sourceBufLen, match); } } } else { var literalText = htmlToText(match); sourceBuf.push(literalText); sourceBufLen += literalText.length; } } } return { source: sourceBuf.join(''), tags: extractedTags }; } /** True if the given tag contains a class attribute with the nocode class. */ function isNoCodeTag(tag) { return !!tag // First canonicalize the representation of attributes .replace(/\s(\w+)\s*=\s*(?:\"([^\"]*)\"|'([^\']*)'|(\S+))/g, ' $1="$2$3$4"') // Then look for the attribute we want. .match(/[cC][lL][aA][sS][sS]=\"[^\"]*\bnocode\b/); } /** * Apply the given language handler to sourceCode and add the resulting * decorations to out. * @param {number} basePos the index of sourceCode within the chunk of source * whose decorations are already present on out. */ function appendDecorations(basePos, sourceCode, langHandler, out) { if (!sourceCode) { return; } var job = { source: sourceCode, basePos: basePos }; langHandler(job); out.push.apply(out, job.decorations); } /** Given triples of [style, pattern, context] returns a lexing function, * The lexing function interprets the patterns to find token boundaries and * returns a decoration list of the form * [index_0, style_0, index_1, style_1, ..., index_n, style_n] * where index_n is an index into the sourceCode, and style_n is a style * constant like PR_PLAIN. index_n-1 <= index_n, and style_n-1 applies to * all characters in sourceCode[index_n-1:index_n]. * * The stylePatterns is a list whose elements have the form * [style : string, pattern : RegExp, DEPRECATED, shortcut : string]. * * Style is a style constant like PR_PLAIN, or can be a string of the * form 'lang-FOO', where FOO is a language extension describing the * language of the portion of the token in $1 after pattern executes. * E.g., if style is 'lang-lisp', and group 1 contains the text * '(hello (world))', then that portion of the token will be passed to the * registered lisp handler for formatting. * The text before and after group 1 will be restyled using this decorator * so decorators should take care that this doesn't result in infinite * recursion. For example, the HTML lexer rule for SCRIPT elements looks * something like ['lang-js', /<[s]cript>(.+?)<\/script>/]. This may match * ' 登录后可以享受更多权益 您还没有登录,登录后您可以: 收藏Android系统代码 收藏喜欢的文章 多个平台共享账号 去登录 首次使用?从这里 注册
您还没有登录,登录后您可以:
首次使用?从这里 注册