// Copyright 2011 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package template import ( "bytes" "fmt" "strings" ) // urlFilter returns its input unless it contains an unsafe scheme in which // case it defangs the entire URL. // // Schemes that cause unintended side effects that are irreversible without user // interaction are considered unsafe. For example, clicking on a "javascript:" // link can immediately trigger JavaScript code execution. // // This filter conservatively assumes that all schemes other than the following // are unsafe: // * http: Navigates to a new website, and may open a new window or tab. // These side effects can be reversed by navigating back to the // previous website, or closing the window or tab. No irreversible // changes will take place without further user interaction with // the new website. // * https: Same as http. // * mailto: Opens an email program and starts a new draft. This side effect // is not irreversible until the user explicitly clicks send; it // can be undone by closing the email program. // // To allow URLs containing other schemes to bypass this filter, developers must // explicitly indicate that such a URL is expected and safe by encapsulating it // in a template.URL value. func urlFilter(args ...interface{}) string { s, t := stringify(args...) if t == contentTypeURL { return s } if !isSafeUrl(s) { return "#" + filterFailsafe } return s } // isSafeUrl is true if s is a relative URL or if URL has a protocol in // (http, https, mailto). func isSafeUrl(s string) bool { if i := strings.IndexRune(s, ':'); i >= 0 && !strings.ContainsRune(s[:i], '/') { protocol := s[:i] if !strings.EqualFold(protocol, "http") && !strings.EqualFold(protocol, "https") && !strings.EqualFold(protocol, "mailto") { return false } } return true } // urlEscaper produces an output that can be embedded in a URL query. // The output can be embedded in an HTML attribute without further escaping. func urlEscaper(args ...interface{}) string { return urlProcessor(false, args...) } // urlNormalizer normalizes URL content so it can be embedded in a quote-delimited // string or parenthesis delimited url(...). // The normalizer does not encode all HTML specials. Specifically, it does not // encode '&' so correct embedding in an HTML attribute requires escaping of // '&' to '&'. func urlNormalizer(args ...interface{}) string { return urlProcessor(true, args...) } // urlProcessor normalizes (when norm is true) or escapes its input to produce // a valid hierarchical or opaque URL part. func urlProcessor(norm bool, args ...interface{}) string { s, t := stringify(args...) if t == contentTypeURL { norm = true } var b bytes.Buffer if processUrlOnto(s, norm, &b) { return b.String() } return s } // processUrlOnto appends a normalized URL corresponding to its input to b // and returns true if the appended content differs from s. func processUrlOnto(s string, norm bool, b *bytes.Buffer) bool { b.Grow(b.Cap() + len(s) + 16) written := 0 // The byte loop below assumes that all URLs use UTF-8 as the // content-encoding. This is similar to the URI to IRI encoding scheme // defined in section 3.1 of RFC 3987, and behaves the same as the // EcmaScript builtin encodeURIComponent. // It should not cause any misencoding of URLs in pages with // Content-type: text/html;charset=UTF-8. for i, n := 0, len(s); i < n; i++ { c := s[i] switch c { // Single quote and parens are sub-delims in RFC 3986, but we // escape them so the output can be embedded in single // quoted attributes and unquoted CSS url(...) constructs. // Single quotes are reserved in URLs, but are only used in // the obsolete "mark" rule in an appendix in RFC 3986 // so can be safely encoded. case '!', '#', '$', '&', '*', '+', ',', '/', ':', ';', '=', '?', '@', '[', ']': if norm { continue } // Unreserved according to RFC 3986 sec 2.3 // "For consistency, percent-encoded octets in the ranges of // ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), // period (%2E), underscore (%5F), or tilde (%7E) should not be // created by URI producers case '-', '.', '_', '~': continue case '%': // When normalizing do not re-encode valid escapes. if norm && i+2 < len(s) && isHex(s[i+1]) && isHex(s[i+2]) { continue } default: // Unreserved according to RFC 3986 sec 2.3 if 'a' <= c && c <= 'z' { continue } if 'A' <= c && c <= 'Z' { continue } if '0' <= c && c <= '9' { continue } } b.WriteString(s[written:i]) fmt.Fprintf(b, "%%%02x", c) written = i + 1 } b.WriteString(s[written:]) return written != 0 } // Filters and normalizes srcset values which are comma separated // URLs followed by metadata. func srcsetFilterAndEscaper(args ...interface{}) string { s, t := stringify(args...) switch t { case contentTypeSrcset: return s case contentTypeURL: // Normalizing gets rid of all HTML whitespace // which separate the image URL from its metadata. var b bytes.Buffer if processUrlOnto(s, true, &b) { s = b.String() } // Additionally, commas separate one source from another. return strings.Replace(s, ",", "%2c", -1) } var b bytes.Buffer written := 0 for i := 0; i < len(s); i++ { if s[i] == ',' { filterSrcsetElement(s, written, i, &b) b.WriteString(",") written = i + 1 } } filterSrcsetElement(s, written, len(s), &b) return b.String() } // Derived from https://play.golang.org/p/Dhmj7FORT5 const htmlSpaceAndAsciiAlnumBytes = "\x00\x36\x00\x00\x01\x00\xff\x03\xfe\xff\xff\x07\xfe\xff\xff\x07" // isHtmlSpace is true iff c is a whitespace character per // https://infra.spec.whatwg.org/#ascii-whitespace func isHtmlSpace(c byte) bool { return (c <= 0x20) && 0 != (htmlSpaceAndAsciiAlnumBytes[c>>3]&(1<<uint(c&0x7))) } func isHtmlSpaceOrAsciiAlnum(c byte) bool { return (c < 0x80) && 0 != (htmlSpaceAndAsciiAlnumBytes[c>>3]&(1<<uint(c&0x7))) } func filterSrcsetElement(s string, left int, right int, b *bytes.Buffer) { start := left for start < right && isHtmlSpace(s[start]) { start += 1 } end := right for i := start; i < right; i++ { if isHtmlSpace(s[i]) { end = i break } } if url := s[start:end]; isSafeUrl(url) { // If image metadata is only spaces or alnums then // we don't need to URL normalize it. metadataOk := true for i := end; i < right; i++ { if !isHtmlSpaceOrAsciiAlnum(s[i]) { metadataOk = false break } } if metadataOk { b.WriteString(s[left:start]) processUrlOnto(url, true, b) b.WriteString(s[end:right]) return } } b.WriteString("#") b.WriteString(filterFailsafe) }