// Copyright 2011 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package template import ( "bytes" "strings" ) // transitionFunc is the array of context transition functions for text nodes. // A transition function takes a context and template text input, and returns // the updated context and the number of bytes consumed from the front of the // input. var transitionFunc = [...]func(context, []byte) (context, int){ stateText: tText, stateTag: tTag, stateAttrName: tAttrName, stateAfterName: tAfterName, stateBeforeValue: tBeforeValue, stateHTMLCmt: tHTMLCmt, stateRCDATA: tSpecialTagEnd, stateAttr: tAttr, stateURL: tURL, stateSrcset: tURL, stateJS: tJS, stateJSDqStr: tJSDelimited, stateJSSqStr: tJSDelimited, stateJSRegexp: tJSDelimited, stateJSBlockCmt: tBlockCmt, stateJSLineCmt: tLineCmt, stateCSS: tCSS, stateCSSDqStr: tCSSStr, stateCSSSqStr: tCSSStr, stateCSSDqURL: tCSSStr, stateCSSSqURL: tCSSStr, stateCSSURL: tCSSStr, stateCSSBlockCmt: tBlockCmt, stateCSSLineCmt: tLineCmt, stateError: tError, } var commentStart = []byte("<!--") var commentEnd = []byte("-->") // tText is the context transition function for the text state. func tText(c context, s []byte) (context, int) { k := 0 for { i := k + bytes.IndexByte(s[k:], '<') if i < k || i+1 == len(s) { return c, len(s) } else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) { return context{state: stateHTMLCmt}, i + 4 } i++ end := false if s[i] == '/' { if i+1 == len(s) { return c, len(s) } end, i = true, i+1 } j, e := eatTagName(s, i) if j != i { if end { e = elementNone } // We've found an HTML tag. return context{state: stateTag, element: e}, j } k = j } } var elementContentType = [...]state{ elementNone: stateText, elementScript: stateJS, elementStyle: stateCSS, elementTextarea: stateRCDATA, elementTitle: stateRCDATA, } // tTag is the context transition function for the tag state. func tTag(c context, s []byte) (context, int) { // Find the attribute name. i := eatWhiteSpace(s, 0) if i == len(s) { return c, len(s) } if s[i] == '>' { return context{ state: elementContentType[c.element], element: c.element, }, i + 1 } j, err := eatAttrName(s, i) if err != nil { return context{state: stateError, err: err}, len(s) } state, attr := stateTag, attrNone if i == j { return context{ state: stateError, err: errorf(ErrBadHTML, nil, 0, "expected space, attr name, or end of tag, but got %q", s[i:]), }, len(s) } attrName := strings.ToLower(string(s[i:j])) if c.element == elementScript && attrName == "type" { attr = attrScriptType } else { switch attrType(attrName) { case contentTypeURL: attr = attrURL case contentTypeCSS: attr = attrStyle case contentTypeJS: attr = attrScript case contentTypeSrcset: attr = attrSrcset } } if j == len(s) { state = stateAttrName } else { state = stateAfterName } return context{state: state, element: c.element, attr: attr}, j } // tAttrName is the context transition function for stateAttrName. func tAttrName(c context, s []byte) (context, int) { i, err := eatAttrName(s, 0) if err != nil { return context{state: stateError, err: err}, len(s) } else if i != len(s) { c.state = stateAfterName } return c, i } // tAfterName is the context transition function for stateAfterName. func tAfterName(c context, s []byte) (context, int) { // Look for the start of the value. i := eatWhiteSpace(s, 0) if i == len(s) { return c, len(s) } else if s[i] != '=' { // Occurs due to tag ending '>', and valueless attribute. c.state = stateTag return c, i } c.state = stateBeforeValue // Consume the "=". return c, i + 1 } var attrStartStates = [...]state{ attrNone: stateAttr, attrScript: stateJS, attrScriptType: stateAttr, attrStyle: stateCSS, attrURL: stateURL, attrSrcset: stateSrcset, } // tBeforeValue is the context transition function for stateBeforeValue. func tBeforeValue(c context, s []byte) (context, int) { i := eatWhiteSpace(s, 0) if i == len(s) { return c, len(s) } // Find the attribute delimiter. delim := delimSpaceOrTagEnd switch s[i] { case '\'': delim, i = delimSingleQuote, i+1 case '"': delim, i = delimDoubleQuote, i+1 } c.state, c.delim = attrStartStates[c.attr], delim return c, i } // tHTMLCmt is the context transition function for stateHTMLCmt. func tHTMLCmt(c context, s []byte) (context, int) { if i := bytes.Index(s, commentEnd); i != -1 { return context{}, i + 3 } return c, len(s) } // specialTagEndMarkers maps element types to the character sequence that // case-insensitively signals the end of the special tag body. var specialTagEndMarkers = [...][]byte{ elementScript: []byte("script"), elementStyle: []byte("style"), elementTextarea: []byte("textarea"), elementTitle: []byte("title"), } var ( specialTagEndPrefix = []byte("</") tagEndSeparators = []byte("> \t\n\f/") ) // tSpecialTagEnd is the context transition function for raw text and RCDATA // element states. func tSpecialTagEnd(c context, s []byte) (context, int) { if c.element != elementNone { if i := indexTagEnd(s, specialTagEndMarkers[c.element]); i != -1 { return context{}, i } } return c, len(s) } // indexTagEnd finds the index of a special tag end in a case insensitive way, or returns -1 func indexTagEnd(s []byte, tag []byte) int { res := 0 plen := len(specialTagEndPrefix) for len(s) > 0 { // Try to find the tag end prefix first i := bytes.Index(s, specialTagEndPrefix) if i == -1 { return i } s = s[i+plen:] // Try to match the actual tag if there is still space for it if len(tag) <= len(s) && bytes.EqualFold(tag, s[:len(tag)]) { s = s[len(tag):] // Check the tag is followed by a proper separator if len(s) > 0 && bytes.IndexByte(tagEndSeparators, s[0]) != -1 { return res + i } res += len(tag) } res += i + plen } return -1 } // tAttr is the context transition function for the attribute state. func tAttr(c context, s []byte) (context, int) { return c, len(s) } // tURL is the context transition function for the URL state. func tURL(c context, s []byte) (context, int) { if bytes.ContainsAny(s, "#?") { c.urlPart = urlPartQueryOrFrag } else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone { // HTML5 uses "Valid URL potentially surrounded by spaces" for // attrs: http://www.w3.org/TR/html5/index.html#attributes-1 c.urlPart = urlPartPreQuery } return c, len(s) } // tJS is the context transition function for the JS state. func tJS(c context, s []byte) (context, int) { i := bytes.IndexAny(s, `"'/`) if i == -1 { // Entire input is non string, comment, regexp tokens. c.jsCtx = nextJSCtx(s, c.jsCtx) return c, len(s) } c.jsCtx = nextJSCtx(s[:i], c.jsCtx) switch s[i] { case '"': c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp case '\'': c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp case '/': switch { case i+1 < len(s) && s[i+1] == '/': c.state, i = stateJSLineCmt, i+1 case i+1 < len(s) && s[i+1] == '*': c.state, i = stateJSBlockCmt, i+1 case c.jsCtx == jsCtxRegexp: c.state = stateJSRegexp case c.jsCtx == jsCtxDivOp: c.jsCtx = jsCtxRegexp default: return context{ state: stateError, err: errorf(ErrSlashAmbig, nil, 0, "'/' could start a division or regexp: %.32q", s[i:]), }, len(s) } default: panic("unreachable") } return c, i + 1 } // tJSDelimited is the context transition function for the JS string and regexp // states. func tJSDelimited(c context, s []byte) (context, int) { specials := `\"` switch c.state { case stateJSSqStr: specials = `\'` case stateJSRegexp: specials = `\/[]` } k, inCharset := 0, false for { i := k + bytes.IndexAny(s[k:], specials) if i < k { break } switch s[i] { case '\\': i++ if i == len(s) { return context{ state: stateError, err: errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in JS string: %q", s), }, len(s) } case '[': inCharset = true case ']': inCharset = false default: // end delimiter if !inCharset { c.state, c.jsCtx = stateJS, jsCtxDivOp return c, i + 1 } } k = i + 1 } if inCharset { // This can be fixed by making context richer if interpolation // into charsets is desired. return context{ state: stateError, err: errorf(ErrPartialCharset, nil, 0, "unfinished JS regexp charset: %q", s), }, len(s) } return c, len(s) } var blockCommentEnd = []byte("*/") // tBlockCmt is the context transition function for /*comment*/ states. func tBlockCmt(c context, s []byte) (context, int) { i := bytes.Index(s, blockCommentEnd) if i == -1 { return c, len(s) } switch c.state { case stateJSBlockCmt: c.state = stateJS case stateCSSBlockCmt: c.state = stateCSS default: panic(c.state.String()) } return c, i + 2 } // tLineCmt is the context transition function for //comment states. func tLineCmt(c context, s []byte) (context, int) { var lineTerminators string var endState state switch c.state { case stateJSLineCmt: lineTerminators, endState = "\n\r\u2028\u2029", stateJS case stateCSSLineCmt: lineTerminators, endState = "\n\f\r", stateCSS // Line comments are not part of any published CSS standard but // are supported by the 4 major browsers. // This defines line comments as // LINECOMMENT ::= "//" [^\n\f\d]* // since http://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines // newlines: // nl ::= #xA | #xD #xA | #xD | #xC default: panic(c.state.String()) } i := bytes.IndexAny(s, lineTerminators) if i == -1 { return c, len(s) } c.state = endState // Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4 // "However, the LineTerminator at the end of the line is not // considered to be part of the single-line comment; it is // recognized separately by the lexical grammar and becomes part // of the stream of input elements for the syntactic grammar." return c, i } // tCSS is the context transition function for the CSS state. func tCSS(c context, s []byte) (context, int) { // CSS quoted strings are almost never used except for: // (1) URLs as in background: "/foo.png" // (2) Multiword font-names as in font-family: "Times New Roman" // (3) List separators in content values as in inline-lists: // <style> // ul.inlineList { list-style: none; padding:0 } // ul.inlineList > li { display: inline } // ul.inlineList > li:before { content: ", " } // ul.inlineList > li:first-child:before { content: "" } // </style> // <ul class=inlineList><li>One<li>Two<li>Three</ul> // (4) Attribute value selectors as in a[href="http://example.com/"] // // We conservatively treat all strings as URLs, but make some // allowances to avoid confusion. // // In (1), our conservative assumption is justified. // In (2), valid font names do not contain ':', '?', or '#', so our // conservative assumption is fine since we will never transition past // urlPartPreQuery. // In (3), our protocol heuristic should not be tripped, and there // should not be non-space content after a '?' or '#', so as long as // we only %-encode RFC 3986 reserved characters we are ok. // In (4), we should URL escape for URL attributes, and for others we // have the attribute name available if our conservative assumption // proves problematic for real code. k := 0 for { i := k + bytes.IndexAny(s[k:], `("'/`) if i < k { return c, len(s) } switch s[i] { case '(': // Look for url to the left. p := bytes.TrimRight(s[:i], "\t\n\f\r ") if endsWithCSSKeyword(p, "url") { j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r ")) switch { case j != len(s) && s[j] == '"': c.state, j = stateCSSDqURL, j+1 case j != len(s) && s[j] == '\'': c.state, j = stateCSSSqURL, j+1 default: c.state = stateCSSURL } return c, j } case '/': if i+1 < len(s) { switch s[i+1] { case '/': c.state = stateCSSLineCmt return c, i + 2 case '*': c.state = stateCSSBlockCmt return c, i + 2 } } case '"': c.state = stateCSSDqStr return c, i + 1 case '\'': c.state = stateCSSSqStr return c, i + 1 } k = i + 1 } } // tCSSStr is the context transition function for the CSS string and URL states. func tCSSStr(c context, s []byte) (context, int) { var endAndEsc string switch c.state { case stateCSSDqStr, stateCSSDqURL: endAndEsc = `\"` case stateCSSSqStr, stateCSSSqURL: endAndEsc = `\'` case stateCSSURL: // Unquoted URLs end with a newline or close parenthesis. // The below includes the wc (whitespace character) and nl. endAndEsc = "\\\t\n\f\r )" default: panic(c.state.String()) } k := 0 for { i := k + bytes.IndexAny(s[k:], endAndEsc) if i < k { c, nread := tURL(c, decodeCSS(s[k:])) return c, k + nread } if s[i] == '\\' { i++ if i == len(s) { return context{ state: stateError, err: errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in CSS string: %q", s), }, len(s) } } else { c.state = stateCSS return c, i + 1 } c, _ = tURL(c, decodeCSS(s[:i+1])) k = i + 1 } } // tError is the context transition function for the error state. func tError(c context, s []byte) (context, int) { return c, len(s) } // eatAttrName returns the largest j such that s[i:j] is an attribute name. // It returns an error if s[i:] does not look like it begins with an // attribute name, such as encountering a quote mark without a preceding // equals sign. func eatAttrName(s []byte, i int) (int, *Error) { for j := i; j < len(s); j++ { switch s[j] { case ' ', '\t', '\n', '\f', '\r', '=', '>': return j, nil case '\'', '"', '<': // These result in a parse warning in HTML5 and are // indicative of serious problems if seen in an attr // name in a template. return -1, errorf(ErrBadHTML, nil, 0, "%q in attribute name: %.32q", s[j:j+1], s) default: // No-op. } } return len(s), nil } var elementNameMap = map[string]element{ "script": elementScript, "style": elementStyle, "textarea": elementTextarea, "title": elementTitle, } // asciiAlpha reports whether c is an ASCII letter. func asciiAlpha(c byte) bool { return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' } // asciiAlphaNum reports whether c is an ASCII letter or digit. func asciiAlphaNum(c byte) bool { return asciiAlpha(c) || '0' <= c && c <= '9' } // eatTagName returns the largest j such that s[i:j] is a tag name and the tag type. func eatTagName(s []byte, i int) (int, element) { if i == len(s) || !asciiAlpha(s[i]) { return i, elementNone } j := i + 1 for j < len(s) { x := s[j] if asciiAlphaNum(x) { j++ continue } // Allow "x-y" or "x:y" but not "x-", "-y", or "x--y". if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) { j += 2 continue } break } return j, elementNameMap[strings.ToLower(string(s[i:j]))] } // eatWhiteSpace returns the largest j such that s[i:j] is white space. func eatWhiteSpace(s []byte, i int) int { for j := i; j < len(s); j++ { switch s[j] { case ' ', '\t', '\n', '\f', '\r': // No-op. default: return j } } return len(s) }