Skip to content
Snippets Groups Projects
js.go 14.2 KiB
Newer Older
  • Learn to ignore specific revisions
  • // Copyright 2011 The Go Authors. All rights reserved.
    // Use of this source code is governed by a BSD-style
    // license that can be found in the LICENSE file.
    
    
    package template
    
    	"encoding/json"
    
    	"unicode/utf8"
    
    // jsWhitespace contains all of the JS whitespace characters, as defined
    // by the \s character class.
    // See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions/Character_classes.
    const jsWhitespace = "\f\n\r\t\v\u0020\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000\ufeff"
    
    
    // nextJSCtx returns the context that determines whether a slash after the
    
    Shenghou Ma's avatar
    Shenghou Ma committed
    // given run of tokens starts a regular expression instead of a division
    
    // operator: / or /=.
    //
    // This assumes that the token run does not include any string tokens, comment
    // tokens, regular expression literal tokens, or division operators.
    //
    // This fails on some valid but nonsensical JavaScript programs like
    // "x = ++/foo/i" which is quite different than "x++/foo/i", but is not known to
    // fail on any known useful programs. It is based on the draft
    // JavaScript 2.0 lexical grammar and requires one token of lookbehind:
    
    // https://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html
    
    func nextJSCtx(s []byte, preceding jsCtx) jsCtx {
    
    	// Trim all JS whitespace characters
    	s = bytes.TrimRight(s, jsWhitespace)
    
    	if len(s) == 0 {
    		return preceding
    	}
    
    	// All cases below are in the single-byte UTF-8 group.
    	switch c, n := s[len(s)-1], len(s); c {
    	case '+', '-':
    		// ++ and -- are not regexp preceders, but + and - are whether
    		// they are used as infix or prefix operators.
    		start := n - 1
    		// Count the number of adjacent dashes or pluses.
    		for start > 0 && s[start-1] == c {
    			start--
    		}
    		if (n-start)&1 == 1 {
    			// Reached for trailing minus signs since "---" is the
    			// same as "-- -".
    			return jsCtxRegexp
    		}
    		return jsCtxDivOp
    	case '.':
    		// Handle "42."
    		if n != 1 && '0' <= s[n-2] && s[n-2] <= '9' {
    			return jsCtxDivOp
    		}
    		return jsCtxRegexp
    	// Suffixes for all punctuators from section 7.7 of the language spec
    	// that only end binary operators not handled above.
    	case ',', '<', '>', '=', '*', '%', '&', '|', '^', '?':
    		return jsCtxRegexp
    	// Suffixes for all punctuators from section 7.7 of the language spec
    	// that are prefix operators not handled above.
    	case '!', '~':
    		return jsCtxRegexp
    	// Matches all the punctuators from section 7.7 of the language spec
    	// that are open brackets not handled above.
    	case '(', '[':
    		return jsCtxRegexp
    	// Matches all the punctuators from section 7.7 of the language spec
    	// that precede expression starts.
    	case ':', ';', '{':
    		return jsCtxRegexp
    	// CAVEAT: the close punctuators ('}', ']', ')') precede div ops and
    	// are handled in the default except for '}' which can precede a
    	// division op as in
    	//    ({ valueOf: function () { return 42 } } / 2
    	// which is valid, but, in practice, developers don't divide object
    	// literals, so our heuristic works well for code like
    	//    function () { ... }  /foo/.test(x) && sideEffect();
    	// The ')' punctuator can precede a regular expression as in
    	//     if (b) /foo/.test(x) && ...
    	// but this is much less likely than
    	//     (a + b) / c
    	case '}':
    		return jsCtxRegexp
    	default:
    		// Look for an IdentifierName and see if it is a keyword that
    		// can precede a regular expression.
    		j := n
    
    Russ Cox's avatar
    Russ Cox committed
    		for j > 0 && isJSIdentPart(rune(s[j-1])) {
    
    			j--
    		}
    		if regexpPrecederKeywords[string(s[j:])] {
    			return jsCtxRegexp
    		}
    	}
    	// Otherwise is a punctuator not listed above, or
    	// a string which precedes a div op, or an identifier
    	// which precedes a div op.
    	return jsCtxDivOp
    }
    
    
    Robert Hencke's avatar
    Robert Hencke committed
    // regexpPrecederKeywords is a set of reserved JS keywords that can precede a
    
    // regular expression in JS source.
    var regexpPrecederKeywords = map[string]bool{
    	"break":      true,
    	"case":       true,
    	"continue":   true,
    	"delete":     true,
    	"do":         true,
    	"else":       true,
    	"finally":    true,
    	"in":         true,
    	"instanceof": true,
    	"return":     true,
    	"throw":      true,
    	"try":        true,
    	"typeof":     true,
    	"void":       true,
    }
    
    
    var jsonMarshalType = reflect.TypeFor[json.Marshaler]()
    
    
    // indirectToJSONMarshaler returns the value, after dereferencing as many times
    // as necessary to reach the base type (or nil) or an implementation of json.Marshal.
    
    func indirectToJSONMarshaler(a any) any {
    
    	// text/template now supports passing untyped nil as a func call
    	// argument, so we must support it. Otherwise we'd panic below, as one
    	// cannot call the Type or Interface methods on an invalid
    	// reflect.Value. See golang.org/issue/18716.
    	if a == nil {
    		return nil
    	}
    
    
    	v := reflect.ValueOf(a)
    
    	for !v.Type().Implements(jsonMarshalType) && v.Kind() == reflect.Pointer && !v.IsNil() {
    
    var scriptTagRe = regexp.MustCompile("(?i)<(/?)script")
    
    
    // jsValEscaper escapes its inputs to a JS Expression (section 11.14) that has
    
    // neither side-effects nor free variables outside (NaN, Infinity).
    
    func jsValEscaper(args ...any) string {
    	var a any
    
    	if len(args) == 1 {
    
    		a = indirectToJSONMarshaler(args[0])
    
    		switch t := a.(type) {
    		case JS:
    			return string(t)
    		case JSStr:
    			// TODO: normalize quotes.
    			return `"` + string(t) + `"`
    		case json.Marshaler:
    			// Do not treat as a Stringer.
    		case fmt.Stringer:
    			a = t.String()
    		}
    
    		for i, arg := range args {
    			args[i] = indirectToJSONMarshaler(arg)
    		}
    
    		a = fmt.Sprint(args...)
    	}
    	// TODO: detect cycles before calling Marshal which loops infinitely on
    
    	// cyclic data. This may be an unacceptable DoS risk.
    
    	b, err := json.Marshal(a)
    	if err != nil {
    
    		// While the standard JSON marshaler does not include user controlled
    
    		// information in the error message, if a type has a MarshalJSON method,
    		// the content of the error message is not guaranteed. Since we insert
    		// the error into the template, as part of a comment, we attempt to
    		// prevent the error from either terminating the comment, or the script
    		// block itself.
    		//
    		// In particular we:
    		//   * replace "*/" comment end tokens with "* /", which does not
    		//     terminate the comment
    
    		//   * replace "<script" and "</script" with "\x3Cscript" and "\x3C/script"
    		//     (case insensitively), and "<!--" with "\x3C!--", which prevents
    		//     confusing script block termination semantics
    
    		//
    		// We also put a space before the comment so that if it is flush against
    
    		// a division operator it is not turned into a line comment:
    		//     x/{{y}}
    		// turning into
    
    		//     x//* error marshaling y:
    
    		//          second line of error message */null
    
    		errStr = string(scriptTagRe.ReplaceAll([]byte(errStr), []byte(`\x3C${1}script`)))
    
    		errStr = strings.ReplaceAll(errStr, "*/", "* /")
    		errStr = strings.ReplaceAll(errStr, "<!--", `\x3C!--`)
    		return fmt.Sprintf(" /* %s */null ", errStr)
    
    
    	// TODO: maybe post-process output to prevent it from containing
    	// "<!--", "-->", "<![CDATA[", "]]>", or "</script"
    
    	// in case custom marshalers produce output containing those.
    
    	// Note: Do not use \x escaping to save bytes because it is not JSON compatible and this escaper
    	// supports ld+json content-type.
    
    	if len(b) == 0 {
    		// In, `x=y/{{.}}*z` a json.Marshaler that produces "" should
    		// not cause the output `x=y/*z`.
    		return " null "
    	}
    	first, _ := utf8.DecodeRune(b)
    	last, _ := utf8.DecodeLastRune(b)
    
    	var buf strings.Builder
    
    	// Prevent IdentifierNames and NumericLiterals from running into
    	// keywords: in, instanceof, typeof, void
    	pad := isJSIdentPart(first) || isJSIdentPart(last)
    	if pad {
    		buf.WriteByte(' ')
    	}
    	written := 0
    	// Make sure that json.Marshal escapes codepoints U+2028 & U+2029
    	// so it falls within the subset of JSON which is valid JS.
    	for i := 0; i < len(b); {
    		rune, n := utf8.DecodeRune(b[i:])
    		repl := ""
    		if rune == 0x2028 {
    			repl = `\u2028`
    		} else if rune == 0x2029 {
    			repl = `\u2029`
    		}
    		if repl != "" {
    			buf.Write(b[written:i])
    			buf.WriteString(repl)
    			written = i + n
    		}
    		i += n
    	}
    	if buf.Len() != 0 {
    		buf.Write(b[written:])
    		if pad {
    			buf.WriteByte(' ')
    
    		return buf.String()
    
    	}
    	return string(b)
    }
    
    // jsStrEscaper produces a string that can be included between quotes in
    // JavaScript source, in JavaScript embedded in an HTML5 <script> element,
    // or in an HTML5 event handler attribute such as onclick.
    
    func jsStrEscaper(args ...any) string {
    
    	s, t := stringify(args...)
    	if t == contentTypeJSStr {
    		return replace(s, jsStrNormReplacementTable)
    	}
    	return replace(s, jsStrReplacementTable)
    
    func jsTmplLitEscaper(args ...any) string {
    	s, _ := stringify(args...)
    	return replace(s, jsBqStrReplacementTable)
    }
    
    
    // jsRegexpEscaper behaves like jsStrEscaper but escapes regular expression
    // specials so the result is treated literally when included in a regular
    // expression literal. /foo{{.X}}bar/ matches the string "foo" followed by
    // the literal text of {{.X}} followed by the string "bar".
    
    func jsRegexpEscaper(args ...any) string {
    
    	s, _ := stringify(args...)
    	s = replace(s, jsRegexpReplacementTable)
    
    	if s == "" {
    		// /{{.X}}/ should not produce a line comment when .X == "".
    		return "(?:)"
    	}
    	return s
    
    }
    
    // replace replaces each rune r of s with replacementTable[r], provided that
    // r < len(replacementTable). If replacementTable[r] is the empty string then
    // no replacement is made.
    
    // It also replaces runes U+2028 and U+2029 with the raw strings `\u2028` and
    // `\u2029`.
    
    func replace(s string, replacementTable []string) string {
    
    	var b strings.Builder
    
    	r, w, written := rune(0), 0, 0
    	for i := 0; i < len(s); i += w {
    		// See comment in htmlEscaper.
    		r, w = utf8.DecodeRuneInString(s[i:])
    
    		var repl string
    
    		case int(r) < len(lowUnicodeReplacementTable):
    			repl = lowUnicodeReplacementTable[r]
    
    Russ Cox's avatar
    Russ Cox committed
    		case int(r) < len(replacementTable) && replacementTable[r] != "":
    
    			repl = replacementTable[r]
    		case r == '\u2028':
    
    			repl = `\u2028`
    
    		case r == '\u2029':
    
    			repl = `\u2029`
    		default:
    			continue
    		}
    
    		if written == 0 {
    			b.Grow(len(s))
    		}
    
    		b.WriteString(s[written:i])
    		b.WriteString(repl)
    
    		return s
    	}
    	b.WriteString(s[written:])
    	return b.String()
    }
    
    
    var lowUnicodeReplacementTable = []string{
    	0: `\u0000`, 1: `\u0001`, 2: `\u0002`, 3: `\u0003`, 4: `\u0004`, 5: `\u0005`, 6: `\u0006`,
    	'\a': `\u0007`,
    	'\b': `\u0008`,
    	'\t': `\t`,
    	'\n': `\n`,
    	'\v': `\u000b`, // "\v" == "v" on IE 6.
    	'\f': `\f`,
    	'\r': `\r`,
    	0xe:  `\u000e`, 0xf: `\u000f`, 0x10: `\u0010`, 0x11: `\u0011`, 0x12: `\u0012`, 0x13: `\u0013`,
    	0x14: `\u0014`, 0x15: `\u0015`, 0x16: `\u0016`, 0x17: `\u0017`, 0x18: `\u0018`, 0x19: `\u0019`,
    	0x1a: `\u001a`, 0x1b: `\u001b`, 0x1c: `\u001c`, 0x1d: `\u001d`, 0x1e: `\u001e`, 0x1f: `\u001f`,
    }
    
    
    var jsStrReplacementTable = []string{
    
    	'\t': `\t`,
    	'\n': `\n`,
    
    	'\v': `\u000b`, // "\v" == "v" on IE 6.
    
    	'\f': `\f`,
    	'\r': `\r`,
    	// Encode HTML specials as hex so the output can be embedded
    	// in HTML attributes without further encoding.
    
    	'&':  `\u0026`,
    	'\'': `\u0027`,
    	'+':  `\u002b`,
    
    // jsBqStrReplacementTable is like jsStrReplacementTable except it also contains
    // the special characters for JS template literals: $, {, and }.
    var jsBqStrReplacementTable = []string{
    	0:    `\u0000`,
    	'\t': `\t`,
    	'\n': `\n`,
    	'\v': `\u000b`, // "\v" == "v" on IE 6.
    	'\f': `\f`,
    	'\r': `\r`,
    	// Encode HTML specials as hex so the output can be embedded
    	// in HTML attributes without further encoding.
    	'"':  `\u0022`,
    	'`':  `\u0060`,
    	'&':  `\u0026`,
    	'\'': `\u0027`,
    	'+':  `\u002b`,
    	'/':  `\/`,
    	'<':  `\u003c`,
    	'>':  `\u003e`,
    	'\\': `\\`,
    	'$':  `\u0024`,
    	'{':  `\u007b`,
    	'}':  `\u007d`,
    }
    
    
    // jsStrNormReplacementTable is like jsStrReplacementTable but does not
    // overencode existing escapes since this table has no entry for `\`.
    var jsStrNormReplacementTable = []string{
    
    	'\t': `\t`,
    	'\n': `\n`,
    
    	'\v': `\u000b`, // "\v" == "v" on IE 6.
    
    	'\f': `\f`,
    	'\r': `\r`,
    	// Encode HTML specials as hex so the output can be embedded
    	// in HTML attributes without further encoding.
    
    	'"':  `\u0022`,
    	'&':  `\u0026`,
    	'\'': `\u0027`,
    
    var jsRegexpReplacementTable = []string{
    
    	'\t': `\t`,
    	'\n': `\n`,
    
    	'\v': `\u000b`, // "\v" == "v" on IE 6.
    
    	'\f': `\f`,
    	'\r': `\r`,
    	// Encode HTML specials as hex so the output can be embedded
    	// in HTML attributes without further encoding.
    
    	'(':  `\(`,
    	')':  `\)`,
    	'*':  `\*`,
    
    	'-':  `\-`,
    	'.':  `\.`,
    	'/':  `\/`,
    
    	'?':  `\?`,
    	'[':  `\[`,
    	'\\': `\\`,
    	']':  `\]`,
    	'^':  `\^`,
    	'{':  `\{`,
    	'|':  `\|`,
    	'}':  `\}`,
    }
    
    
    // isJSIdentPart reports whether the given rune is a JS identifier part.
    
    // It does not handle all the non-Latin letters, joiners, and combining marks,
    // but it does handle every codepoint that can occur in a numeric literal or
    // a keyword.
    
    Russ Cox's avatar
    Russ Cox committed
    func isJSIdentPart(r rune) bool {
    
    Russ Cox's avatar
    Russ Cox committed
    	case r == '$':
    
    Russ Cox's avatar
    Russ Cox committed
    	case '0' <= r && r <= '9':
    
    Russ Cox's avatar
    Russ Cox committed
    	case 'A' <= r && r <= 'Z':
    
    Russ Cox's avatar
    Russ Cox committed
    	case r == '_':
    
    Russ Cox's avatar
    Russ Cox committed
    	case 'a' <= r && r <= 'z':
    
    		return true
    	}
    	return false
    }
    
    // isJSType reports whether the given MIME type should be considered JavaScript.
    
    //
    // It is used to determine whether a script tag with a type attribute is a javascript container.
    func isJSType(mimeType string) bool {
    	// per
    
    	//   https://www.w3.org/TR/html5/scripting-1.html#attr-script-type
    
    	//   https://tools.ietf.org/html/rfc7231#section-3.1.1
    
    	//   https://tools.ietf.org/html/rfc4329#section-3
    	//   https://www.ietf.org/rfc/rfc4627.txt
    
    	mimeType, _, _ = strings.Cut(mimeType, ";")
    
    	mimeType = strings.ToLower(mimeType)
    
    	mimeType = strings.TrimSpace(mimeType)
    	switch mimeType {
    	case
    		"application/ecmascript",
    		"application/javascript",
    
    		"application/x-ecmascript",
    		"application/x-javascript",
    
    		"text/ecmascript",
    		"text/javascript",
    		"text/javascript1.0",
    		"text/javascript1.1",
    		"text/javascript1.2",
    		"text/javascript1.3",
    		"text/javascript1.4",
    		"text/javascript1.5",
    		"text/jscript",
    		"text/livescript",
    		"text/x-ecmascript",
    		"text/x-javascript":
    		return true
    	default:
    		return false
    	}
    }