hugo/tpl/internal/go_templates/htmltemplate/escape.go

1011 lines
33 KiB
Go

// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package template
import (
"bytes"
"fmt"
"html"
//"internal/godebug"
"io"
"regexp"
template "github.com/gohugoio/hugo/tpl/internal/go_templates/texttemplate"
"github.com/gohugoio/hugo/tpl/internal/go_templates/texttemplate/parse"
)
// escapeTemplate rewrites the named template, which must be
// associated with t, to guarantee that the output of any of the named
// templates is properly escaped. If no error is returned, then the named templates have
// been modified. Otherwise the named templates have been rendered
// unusable.
func escapeTemplate(tmpl *Template, node parse.Node, name string) error {
c, _ := tmpl.esc.escapeTree(context{}, node, name, 0)
var err error
if c.err != nil {
err, c.err.Name = c.err, name
} else if c.state != stateText {
err = &Error{ErrEndContext, nil, name, 0, fmt.Sprintf("ends in a non-text context: %v", c)}
}
if err != nil {
// Prevent execution of unsafe templates.
if t := tmpl.set[name]; t != nil {
t.escapeErr = err
t.text.Tree = nil
t.Tree = nil
}
return err
}
tmpl.esc.commit()
if t := tmpl.set[name]; t != nil {
t.escapeErr = escapeOK
t.Tree = t.text.Tree
}
return nil
}
// evalArgs formats the list of arguments into a string. It is equivalent to
// fmt.Sprint(args...), except that it dereferences all pointers.
func evalArgs(args ...any) string {
// Optimization for simple common case of a single string argument.
if len(args) == 1 {
if s, ok := args[0].(string); ok {
return s
}
}
for i, arg := range args {
args[i] = indirectToStringerOrError(arg)
}
return fmt.Sprint(args...)
}
// funcMap maps command names to functions that render their inputs safe.
var funcMap = template.FuncMap{
"_html_template_attrescaper": attrEscaper,
"_html_template_commentescaper": commentEscaper,
"_html_template_cssescaper": cssEscaper,
"_html_template_cssvaluefilter": cssValueFilter,
"_html_template_htmlnamefilter": htmlNameFilter,
"_html_template_htmlescaper": htmlEscaper,
"_html_template_jsregexpescaper": jsRegexpEscaper,
"_html_template_jsstrescaper": jsStrEscaper,
"_html_template_jsvalescaper": jsValEscaper,
"_html_template_nospaceescaper": htmlNospaceEscaper,
"_html_template_rcdataescaper": rcdataEscaper,
"_html_template_srcsetescaper": srcsetFilterAndEscaper,
"_html_template_urlescaper": urlEscaper,
"_html_template_urlfilter": urlFilter,
"_html_template_urlnormalizer": urlNormalizer,
"_eval_args_": evalArgs,
}
// escaper collects type inferences about templates and changes needed to make
// templates injection safe.
type escaper struct {
// ns is the nameSpace that this escaper is associated with.
ns *nameSpace
// output[templateName] is the output context for a templateName that
// has been mangled to include its input context.
output map[string]context
// derived[c.mangle(name)] maps to a template derived from the template
// named name templateName for the start context c.
derived map[string]*template.Template
// called[templateName] is a set of called mangled template names.
called map[string]bool
// xxxNodeEdits are the accumulated edits to apply during commit.
// Such edits are not applied immediately in case a template set
// executes a given template in different escaping contexts.
actionNodeEdits map[*parse.ActionNode][]string
templateNodeEdits map[*parse.TemplateNode]string
textNodeEdits map[*parse.TextNode][]byte
// rangeContext holds context about the current range loop.
rangeContext *rangeContext
}
// rangeContext holds information about the current range loop.
type rangeContext struct {
outer *rangeContext // outer loop
breaks []context // context at each break action
continues []context // context at each continue action
}
// makeEscaper creates a blank escaper for the given set.
func makeEscaper(n *nameSpace) escaper {
return escaper{
n,
map[string]context{},
map[string]*template.Template{},
map[string]bool{},
map[*parse.ActionNode][]string{},
map[*parse.TemplateNode]string{},
map[*parse.TextNode][]byte{},
nil,
}
}
// filterFailsafe is an innocuous word that is emitted in place of unsafe values
// by sanitizer functions. It is not a keyword in any programming language,
// contains no special characters, is not empty, and when it appears in output
// it is distinct enough that a developer can find the source of the problem
// via a search engine.
const filterFailsafe = "ZgotmplZ"
// escape escapes a template node.
func (e *escaper) escape(c context, n parse.Node) context {
switch n := n.(type) {
case *parse.ActionNode:
return e.escapeAction(c, n)
case *parse.BreakNode:
c.n = n
e.rangeContext.breaks = append(e.rangeContext.breaks, c)
return context{state: stateDead}
case *parse.CommentNode:
return c
case *parse.ContinueNode:
c.n = n
e.rangeContext.continues = append(e.rangeContext.breaks, c)
return context{state: stateDead}
case *parse.IfNode:
return e.escapeBranch(c, &n.BranchNode, "if")
case *parse.ListNode:
return e.escapeList(c, n)
case *parse.RangeNode:
return e.escapeBranch(c, &n.BranchNode, "range")
case *parse.TemplateNode:
return e.escapeTemplate(c, n)
case *parse.TextNode:
return e.escapeText(c, n)
case *parse.WithNode:
return e.escapeBranch(c, &n.BranchNode, "with")
}
panic("escaping " + n.String() + " is unimplemented")
}
// Modified by Hugo.
// var debugAllowActionJSTmpl = godebug.New("jstmpllitinterp")
// escapeAction escapes an action template node.
func (e *escaper) escapeAction(c context, n *parse.ActionNode) context {
if len(n.Pipe.Decl) != 0 {
// A local variable assignment, not an interpolation.
return c
}
c = nudge(c)
// Check for disallowed use of predefined escapers in the pipeline.
for pos, idNode := range n.Pipe.Cmds {
node, ok := idNode.Args[0].(*parse.IdentifierNode)
if !ok {
// A predefined escaper "esc" will never be found as an identifier in a
// Chain or Field node, since:
// - "esc.x ..." is invalid, since predefined escapers return strings, and
// strings do not have methods, keys or fields.
// - "... .esc" is invalid, since predefined escapers are global functions,
// not methods or fields of any types.
// Therefore, it is safe to ignore these two node types.
continue
}
ident := node.Ident
if _, ok := predefinedEscapers[ident]; ok {
if pos < len(n.Pipe.Cmds)-1 ||
c.state == stateAttr && c.delim == delimSpaceOrTagEnd && ident == "html" {
return context{
state: stateError,
err: errorf(ErrPredefinedEscaper, n, n.Line, "predefined escaper %q disallowed in template", ident),
}
}
}
}
s := make([]string, 0, 3)
switch c.state {
case stateError:
return c
case stateURL, stateCSSDqStr, stateCSSSqStr, stateCSSDqURL, stateCSSSqURL, stateCSSURL:
switch c.urlPart {
case urlPartNone:
s = append(s, "_html_template_urlfilter")
fallthrough
case urlPartPreQuery:
switch c.state {
case stateCSSDqStr, stateCSSSqStr:
s = append(s, "_html_template_cssescaper")
default:
s = append(s, "_html_template_urlnormalizer")
}
case urlPartQueryOrFrag:
s = append(s, "_html_template_urlescaper")
case urlPartUnknown:
return context{
state: stateError,
err: errorf(ErrAmbigContext, n, n.Line, "%s appears in an ambiguous context within a URL", n),
}
default:
panic(c.urlPart.String())
}
case stateJS:
s = append(s, "_html_template_jsvalescaper")
// A slash after a value starts a div operator.
c.jsCtx = jsCtxDivOp
case stateJSDqStr, stateJSSqStr:
s = append(s, "_html_template_jsstrescaper")
case stateJSBqStr:
if SecurityAllowActionJSTmpl.Load() {
// debugAllowActionJSTmpl.IncNonDefault()
s = append(s, "_html_template_jsstrescaper")
} else {
return context{
state: stateError,
err: errorf(ErrJSTemplate, n, n.Line, "%s appears in a JS template literal", n),
}
}
case stateJSRegexp:
s = append(s, "_html_template_jsregexpescaper")
case stateCSS:
s = append(s, "_html_template_cssvaluefilter")
case stateText:
s = append(s, "_html_template_htmlescaper")
case stateRCDATA:
s = append(s, "_html_template_rcdataescaper")
case stateAttr:
// Handled below in delim check.
case stateAttrName, stateTag:
c.state = stateAttrName
s = append(s, "_html_template_htmlnamefilter")
case stateSrcset:
s = append(s, "_html_template_srcsetescaper")
default:
if isComment(c.state) {
s = append(s, "_html_template_commentescaper")
} else {
panic("unexpected state " + c.state.String())
}
}
switch c.delim {
case delimNone:
// No extra-escaping needed for raw text content.
case delimSpaceOrTagEnd:
s = append(s, "_html_template_nospaceescaper")
default:
s = append(s, "_html_template_attrescaper")
}
e.editActionNode(n, s)
return c
}
// ensurePipelineContains ensures that the pipeline ends with the commands with
// the identifiers in s in order. If the pipeline ends with a predefined escaper
// (i.e. "html" or "urlquery"), merge it with the identifiers in s.
func ensurePipelineContains(p *parse.PipeNode, s []string) {
if len(s) == 0 {
// Do not rewrite pipeline if we have no escapers to insert.
return
}
// Precondition: p.Cmds contains at most one predefined escaper and the
// escaper will be present at p.Cmds[len(p.Cmds)-1]. This precondition is
// always true because of the checks in escapeAction.
pipelineLen := len(p.Cmds)
if pipelineLen > 0 {
lastCmd := p.Cmds[pipelineLen-1]
if idNode, ok := lastCmd.Args[0].(*parse.IdentifierNode); ok {
if esc := idNode.Ident; predefinedEscapers[esc] {
// Pipeline ends with a predefined escaper.
if len(p.Cmds) == 1 && len(lastCmd.Args) > 1 {
// Special case: pipeline is of the form {{ esc arg1 arg2 ... argN }},
// where esc is the predefined escaper, and arg1...argN are its arguments.
// Convert this into the equivalent form
// {{ _eval_args_ arg1 arg2 ... argN | esc }}, so that esc can be easily
// merged with the escapers in s.
lastCmd.Args[0] = parse.NewIdentifier("_eval_args_").SetTree(nil).SetPos(lastCmd.Args[0].Position())
p.Cmds = appendCmd(p.Cmds, newIdentCmd(esc, p.Position()))
pipelineLen++
}
// If any of the commands in s that we are about to insert is equivalent
// to the predefined escaper, use the predefined escaper instead.
dup := false
for i, escaper := range s {
if escFnsEq(esc, escaper) {
s[i] = idNode.Ident
dup = true
}
}
if dup {
// The predefined escaper will already be inserted along with the
// escapers in s, so do not copy it to the rewritten pipeline.
pipelineLen--
}
}
}
}
// Rewrite the pipeline, creating the escapers in s at the end of the pipeline.
newCmds := make([]*parse.CommandNode, pipelineLen, pipelineLen+len(s))
insertedIdents := make(map[string]bool)
for i := 0; i < pipelineLen; i++ {
cmd := p.Cmds[i]
newCmds[i] = cmd
if idNode, ok := cmd.Args[0].(*parse.IdentifierNode); ok {
insertedIdents[normalizeEscFn(idNode.Ident)] = true
}
}
for _, name := range s {
if !insertedIdents[normalizeEscFn(name)] {
// When two templates share an underlying parse tree via the use of
// AddParseTree and one template is executed after the other, this check
// ensures that escapers that were already inserted into the pipeline on
// the first escaping pass do not get inserted again.
newCmds = appendCmd(newCmds, newIdentCmd(name, p.Position()))
}
}
p.Cmds = newCmds
}
// predefinedEscapers contains template predefined escapers that are equivalent
// to some contextual escapers. Keep in sync with equivEscapers.
var predefinedEscapers = map[string]bool{
"html": true,
"urlquery": true,
}
// equivEscapers matches contextual escapers to equivalent predefined
// template escapers.
var equivEscapers = map[string]string{
// The following pairs of HTML escapers provide equivalent security
// guarantees, since they all escape '\000', '\'', '"', '&', '<', and '>'.
"_html_template_attrescaper": "html",
"_html_template_htmlescaper": "html",
"_html_template_rcdataescaper": "html",
// These two URL escapers produce URLs safe for embedding in a URL query by
// percent-encoding all the reserved characters specified in RFC 3986 Section
// 2.2
"_html_template_urlescaper": "urlquery",
// These two functions are not actually equivalent; urlquery is stricter as it
// escapes reserved characters (e.g. '#'), while _html_template_urlnormalizer
// does not. It is therefore only safe to replace _html_template_urlnormalizer
// with urlquery (this happens in ensurePipelineContains), but not the otherI've
// way around. We keep this entry around to preserve the behavior of templates
// written before Go 1.9, which might depend on this substitution taking place.
"_html_template_urlnormalizer": "urlquery",
}
// escFnsEq reports whether the two escaping functions are equivalent.
func escFnsEq(a, b string) bool {
return normalizeEscFn(a) == normalizeEscFn(b)
}
// normalizeEscFn(a) is equal to normalizeEscFn(b) for any pair of names of
// escaper functions a and b that are equivalent.
func normalizeEscFn(e string) string {
if norm := equivEscapers[e]; norm != "" {
return norm
}
return e
}
// redundantFuncs[a][b] implies that funcMap[b](funcMap[a](x)) == funcMap[a](x)
// for all x.
var redundantFuncs = map[string]map[string]bool{
"_html_template_commentescaper": {
"_html_template_attrescaper": true,
"_html_template_htmlescaper": true,
},
"_html_template_cssescaper": {
"_html_template_attrescaper": true,
},
"_html_template_jsregexpescaper": {
"_html_template_attrescaper": true,
},
"_html_template_jsstrescaper": {
"_html_template_attrescaper": true,
},
"_html_template_urlescaper": {
"_html_template_urlnormalizer": true,
},
}
// appendCmd appends the given command to the end of the command pipeline
// unless it is redundant with the last command.
func appendCmd(cmds []*parse.CommandNode, cmd *parse.CommandNode) []*parse.CommandNode {
if n := len(cmds); n != 0 {
last, okLast := cmds[n-1].Args[0].(*parse.IdentifierNode)
next, okNext := cmd.Args[0].(*parse.IdentifierNode)
if okLast && okNext && redundantFuncs[last.Ident][next.Ident] {
return cmds
}
}
return append(cmds, cmd)
}
// newIdentCmd produces a command containing a single identifier node.
func newIdentCmd(identifier string, pos parse.Pos) *parse.CommandNode {
return &parse.CommandNode{
NodeType: parse.NodeCommand,
Args: []parse.Node{parse.NewIdentifier(identifier).SetTree(nil).SetPos(pos)}, // TODO: SetTree.
}
}
// nudge returns the context that would result from following empty string
// transitions from the input context.
// For example, parsing:
//
// `<a href=`
//
// will end in context{stateBeforeValue, attrURL}, but parsing one extra rune:
//
// `<a href=x`
//
// will end in context{stateURL, delimSpaceOrTagEnd, ...}.
// There are two transitions that happen when the 'x' is seen:
// (1) Transition from a before-value state to a start-of-value state without
//
// consuming any character.
//
// (2) Consume 'x' and transition past the first value character.
// In this case, nudging produces the context after (1) happens.
func nudge(c context) context {
switch c.state {
case stateTag:
// In `<foo {{.}}`, the action should emit an attribute.
c.state = stateAttrName
case stateBeforeValue:
// In `<foo bar={{.}}`, the action is an undelimited value.
c.state, c.delim, c.attr = attrStartStates[c.attr], delimSpaceOrTagEnd, attrNone
case stateAfterName:
// In `<foo bar {{.}}`, the action is an attribute name.
c.state, c.attr = stateAttrName, attrNone
}
return c
}
// join joins the two contexts of a branch template node. The result is an
// error context if either of the input contexts are error contexts, or if the
// input contexts differ.
func join(a, b context, node parse.Node, nodeName string) context {
if a.state == stateError {
return a
}
if b.state == stateError {
return b
}
if a.state == stateDead {
return b
}
if b.state == stateDead {
return a
}
if a.eq(b) {
return a
}
c := a
c.urlPart = b.urlPart
if c.eq(b) {
// The contexts differ only by urlPart.
c.urlPart = urlPartUnknown
return c
}
c = a
c.jsCtx = b.jsCtx
if c.eq(b) {
// The contexts differ only by jsCtx.
c.jsCtx = jsCtxUnknown
return c
}
// Allow a nudged context to join with an unnudged one.
// This means that
// <p title={{if .C}}{{.}}{{end}}
// ends in an unquoted value state even though the else branch
// ends in stateBeforeValue.
if c, d := nudge(a), nudge(b); !(c.eq(a) && d.eq(b)) {
if e := join(c, d, node, nodeName); e.state != stateError {
return e
}
}
return context{
state: stateError,
err: errorf(ErrBranchEnd, node, 0, "{{%s}} branches end in different contexts: %v, %v", nodeName, a, b),
}
}
// escapeBranch escapes a branch template node: "if", "range" and "with".
func (e *escaper) escapeBranch(c context, n *parse.BranchNode, nodeName string) context {
if nodeName == "range" {
e.rangeContext = &rangeContext{outer: e.rangeContext}
}
c0 := e.escapeList(c, n.List)
if nodeName == "range" {
if c0.state != stateError {
c0 = joinRange(c0, e.rangeContext)
}
e.rangeContext = e.rangeContext.outer
if c0.state == stateError {
return c0
}
// The "true" branch of a "range" node can execute multiple times.
// We check that executing n.List once results in the same context
// as executing n.List twice.
e.rangeContext = &rangeContext{outer: e.rangeContext}
c1, _ := e.escapeListConditionally(c0, n.List, nil)
c0 = join(c0, c1, n, nodeName)
if c0.state == stateError {
e.rangeContext = e.rangeContext.outer
// Make clear that this is a problem on loop re-entry
// since developers tend to overlook that branch when
// debugging templates.
c0.err.Line = n.Line
c0.err.Description = "on range loop re-entry: " + c0.err.Description
return c0
}
c0 = joinRange(c0, e.rangeContext)
e.rangeContext = e.rangeContext.outer
if c0.state == stateError {
return c0
}
}
c1 := e.escapeList(c, n.ElseList)
return join(c0, c1, n, nodeName)
}
func joinRange(c0 context, rc *rangeContext) context {
// Merge contexts at break and continue statements into overall body context.
// In theory we could treat breaks differently from continues, but for now it is
// enough to treat them both as going back to the start of the loop (which may then stop).
for _, c := range rc.breaks {
c0 = join(c0, c, c.n, "range")
if c0.state == stateError {
c0.err.Line = c.n.(*parse.BreakNode).Line
c0.err.Description = "at range loop break: " + c0.err.Description
return c0
}
}
for _, c := range rc.continues {
c0 = join(c0, c, c.n, "range")
if c0.state == stateError {
c0.err.Line = c.n.(*parse.ContinueNode).Line
c0.err.Description = "at range loop continue: " + c0.err.Description
return c0
}
}
return c0
}
// escapeList escapes a list template node.
func (e *escaper) escapeList(c context, n *parse.ListNode) context {
if n == nil {
return c
}
for _, m := range n.Nodes {
c = e.escape(c, m)
if c.state == stateDead {
break
}
}
return c
}
// escapeListConditionally escapes a list node but only preserves edits and
// inferences in e if the inferences and output context satisfy filter.
// It returns the best guess at an output context, and the result of the filter
// which is the same as whether e was updated.
func (e *escaper) escapeListConditionally(c context, n *parse.ListNode, filter func(*escaper, context) bool) (context, bool) {
e1 := makeEscaper(e.ns)
e1.rangeContext = e.rangeContext
// Make type inferences available to f.
for k, v := range e.output {
e1.output[k] = v
}
c = e1.escapeList(c, n)
ok := filter != nil && filter(&e1, c)
if ok {
// Copy inferences and edits from e1 back into e.
for k, v := range e1.output {
e.output[k] = v
}
for k, v := range e1.derived {
e.derived[k] = v
}
for k, v := range e1.called {
e.called[k] = v
}
for k, v := range e1.actionNodeEdits {
e.editActionNode(k, v)
}
for k, v := range e1.templateNodeEdits {
e.editTemplateNode(k, v)
}
for k, v := range e1.textNodeEdits {
e.editTextNode(k, v)
}
}
return c, ok
}
// escapeTemplate escapes a {{template}} call node.
func (e *escaper) escapeTemplate(c context, n *parse.TemplateNode) context {
c, name := e.escapeTree(c, n, n.Name, n.Line)
if name != n.Name {
e.editTemplateNode(n, name)
}
return c
}
// escapeTree escapes the named template starting in the given context as
// necessary and returns its output context.
func (e *escaper) escapeTree(c context, node parse.Node, name string, line int) (context, string) {
// Mangle the template name with the input context to produce a reliable
// identifier.
dname := c.mangle(name)
e.called[dname] = true
if out, ok := e.output[dname]; ok {
// Already escaped.
return out, dname
}
t := e.template(name)
if t == nil {
// Two cases: The template exists but is empty, or has never been mentioned at
// all. Distinguish the cases in the error messages.
if e.ns.set[name] != nil {
return context{
state: stateError,
err: errorf(ErrNoSuchTemplate, node, line, "%q is an incomplete or empty template", name),
}, dname
}
return context{
state: stateError,
err: errorf(ErrNoSuchTemplate, node, line, "no such template %q", name),
}, dname
}
if dname != name {
// Use any template derived during an earlier call to escapeTemplate
// with different top level templates, or clone if necessary.
dt := e.template(dname)
if dt == nil {
dt = template.New(dname)
dt.Tree = &parse.Tree{Name: dname, Root: t.Root.CopyList()}
e.derived[dname] = dt
}
t = dt
}
return e.computeOutCtx(c, t), dname
}
// computeOutCtx takes a template and its start context and computes the output
// context while storing any inferences in e.
func (e *escaper) computeOutCtx(c context, t *template.Template) context {
// Propagate context over the body.
c1, ok := e.escapeTemplateBody(c, t)
if !ok {
// Look for a fixed point by assuming c1 as the output context.
if c2, ok2 := e.escapeTemplateBody(c1, t); ok2 {
c1, ok = c2, true
}
// Use c1 as the error context if neither assumption worked.
}
if !ok && c1.state != stateError {
return context{
state: stateError,
err: errorf(ErrOutputContext, t.Tree.Root, 0, "cannot compute output context for template %s", t.Name()),
}
}
return c1
}
// escapeTemplateBody escapes the given template assuming the given output
// context, and returns the best guess at the output context and whether the
// assumption was correct.
func (e *escaper) escapeTemplateBody(c context, t *template.Template) (context, bool) {
filter := func(e1 *escaper, c1 context) bool {
if c1.state == stateError {
// Do not update the input escaper, e.
return false
}
if !e1.called[t.Name()] {
// If t is not recursively called, then c1 is an
// accurate output context.
return true
}
// c1 is accurate if it matches our assumed output context.
return c.eq(c1)
}
// We need to assume an output context so that recursive template calls
// take the fast path out of escapeTree instead of infinitely recurring.
// Naively assuming that the input context is the same as the output
// works >90% of the time.
e.output[t.Name()] = c
return e.escapeListConditionally(c, t.Tree.Root, filter)
}
// delimEnds maps each delim to a string of characters that terminate it.
var delimEnds = [...]string{
delimDoubleQuote: `"`,
delimSingleQuote: "'",
// Determined empirically by running the below in various browsers.
// var div = document.createElement("DIV");
// for (var i = 0; i < 0x10000; ++i) {
// div.innerHTML = "<span title=x" + String.fromCharCode(i) + "-bar>";
// if (div.getElementsByTagName("SPAN")[0].title.indexOf("bar") < 0)
// document.write("<p>U+" + i.toString(16));
// }
delimSpaceOrTagEnd: " \t\n\f\r>",
}
var (
// Per WHATWG HTML specification, section 4.12.1.3, there are extremely
// complicated rules for how to handle the set of opening tags <!--,
// <script, and </script when they appear in JS literals (i.e. strings,
// regexs, and comments). The specification suggests a simple solution,
// rather than implementing the arcane ABNF, which involves simply escaping
// the opening bracket with \x3C. We use the below regex for this, since it
// makes doing the case-insensitive find-replace much simpler.
specialScriptTagRE = regexp.MustCompile("(?i)<(script|/script|!--)")
specialScriptTagReplacement = []byte("\\x3C$1")
)
func containsSpecialScriptTag(s []byte) bool {
return specialScriptTagRE.Match(s)
}
func escapeSpecialScriptTags(s []byte) []byte {
return specialScriptTagRE.ReplaceAll(s, specialScriptTagReplacement)
}
var doctypeBytes = []byte("<!DOCTYPE")
// escapeText escapes a text template node.
func (e *escaper) escapeText(c context, n *parse.TextNode) context {
s, written, i, b := n.Text, 0, 0, new(bytes.Buffer)
for i != len(s) {
c1, nread := contextAfterText(c, s[i:])
i1 := i + nread
if c.state == stateText || c.state == stateRCDATA {
end := i1
if c1.state != c.state {
for j := end - 1; j >= i; j-- {
if s[j] == '<' {
end = j
break
}
}
}
for j := i; j < end; j++ {
if s[j] == '<' && !bytes.HasPrefix(bytes.ToUpper(s[j:]), doctypeBytes) {
b.Write(s[written:j])
b.WriteString("&lt;")
written = j + 1
}
}
} else if isComment(c.state) && c.delim == delimNone {
switch c.state {
case stateJSBlockCmt:
// https://es5.github.io/#x7.4:
// "Comments behave like white space and are
// discarded except that, if a MultiLineComment
// contains a line terminator character, then
// the entire comment is considered to be a
// LineTerminator for purposes of parsing by
// the syntactic grammar."
if bytes.ContainsAny(s[written:i1], "\n\r\u2028\u2029") {
b.WriteByte('\n')
} else {
b.WriteByte(' ')
}
case stateCSSBlockCmt:
b.WriteByte(' ')
}
written = i1
}
if c.state != c1.state && isComment(c1.state) && c1.delim == delimNone {
// Preserve the portion between written and the comment start.
cs := i1 - 2
if c1.state == stateHTMLCmt || c1.state == stateJSHTMLOpenCmt {
// "<!--" instead of "/*" or "//"
cs -= 2
} else if c1.state == stateJSHTMLCloseCmt {
// "-->" instead of "/*" or "//"
cs -= 1
}
b.Write(s[written:cs])
written = i1
}
if isInScriptLiteral(c.state) && containsSpecialScriptTag(s[i:i1]) {
b.Write(s[written:i])
b.Write(escapeSpecialScriptTags(s[i:i1]))
written = i1
}
if i == i1 && c.state == c1.state {
panic(fmt.Sprintf("infinite loop from %v to %v on %q..%q", c, c1, s[:i], s[i:]))
}
c, i = c1, i1
}
if written != 0 && c.state != stateError {
if !isComment(c.state) || c.delim != delimNone {
b.Write(n.Text[written:])
}
e.editTextNode(n, b.Bytes())
}
return c
}
// contextAfterText starts in context c, consumes some tokens from the front of
// s, then returns the context after those tokens and the unprocessed suffix.
func contextAfterText(c context, s []byte) (context, int) {
if c.delim == delimNone {
c1, i := tSpecialTagEnd(c, s)
if i == 0 {
// A special end tag (`</script>`) has been seen and
// all content preceding it has been consumed.
return c1, 0
}
// Consider all content up to any end tag.
return transitionFunc[c.state](c, s[:i])
}
// We are at the beginning of an attribute value.
i := bytes.IndexAny(s, delimEnds[c.delim])
if i == -1 {
i = len(s)
}
if c.delim == delimSpaceOrTagEnd {
// https://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
// lists the runes below as error characters.
// Error out because HTML parsers may differ on whether
// "<a id= onclick=f(" ends inside id's or onclick's value,
// "<a class=`foo " ends inside a value,
// "<a style=font:'Arial'" needs open-quote fixup.
// IE treats '`' as a quotation character.
if j := bytes.IndexAny(s[:i], "\"'<=`"); j >= 0 {
return context{
state: stateError,
err: errorf(ErrBadHTML, nil, 0, "%q in unquoted attr: %q", s[j:j+1], s[:i]),
}, len(s)
}
}
if i == len(s) {
// Remain inside the attribute.
// Decode the value so non-HTML rules can easily handle
// <button onclick="alert(&quot;Hi!&quot;)">
// without having to entity decode token boundaries.
for u := []byte(html.UnescapeString(string(s))); len(u) != 0; {
c1, i1 := transitionFunc[c.state](c, u)
c, u = c1, u[i1:]
}
return c, len(s)
}
element := c.element
// If this is a non-JS "type" attribute inside "script" tag, do not treat the contents as JS.
if c.state == stateAttr && c.element == elementScript && c.attr == attrScriptType && !isJSType(string(s[:i])) {
element = elementNone
}
if c.delim != delimSpaceOrTagEnd {
// Consume any quote.
i++
}
// On exiting an attribute, we discard all state information
// except the state and element.
return context{state: stateTag, element: element}, i
}
// editActionNode records a change to an action pipeline for later commit.
func (e *escaper) editActionNode(n *parse.ActionNode, cmds []string) {
if _, ok := e.actionNodeEdits[n]; ok {
panic(fmt.Sprintf("node %s shared between templates", n))
}
e.actionNodeEdits[n] = cmds
}
// editTemplateNode records a change to a {{template}} callee for later commit.
func (e *escaper) editTemplateNode(n *parse.TemplateNode, callee string) {
if _, ok := e.templateNodeEdits[n]; ok {
panic(fmt.Sprintf("node %s shared between templates", n))
}
e.templateNodeEdits[n] = callee
}
// editTextNode records a change to a text node for later commit.
func (e *escaper) editTextNode(n *parse.TextNode, text []byte) {
if _, ok := e.textNodeEdits[n]; ok {
panic(fmt.Sprintf("node %s shared between templates", n))
}
e.textNodeEdits[n] = text
}
// commit applies changes to actions and template calls needed to contextually
// autoescape content and adds any derived templates to the set.
func (e *escaper) commit() {
for name := range e.output {
e.template(name).Funcs(funcMap)
}
// Any template from the name space associated with this escaper can be used
// to add derived templates to the underlying text/template name space.
tmpl := e.arbitraryTemplate()
for _, t := range e.derived {
if _, err := tmpl.text.AddParseTree(t.Name(), t.Tree); err != nil {
panic("error adding derived template")
}
}
for n, s := range e.actionNodeEdits {
ensurePipelineContains(n.Pipe, s)
}
for n, name := range e.templateNodeEdits {
n.Name = name
}
for n, s := range e.textNodeEdits {
n.Text = s
}
// Reset state that is specific to this commit so that the same changes are
// not re-applied to the template on subsequent calls to commit.
e.called = make(map[string]bool)
e.actionNodeEdits = make(map[*parse.ActionNode][]string)
e.templateNodeEdits = make(map[*parse.TemplateNode]string)
e.textNodeEdits = make(map[*parse.TextNode][]byte)
}
// template returns the named template given a mangled template name.
func (e *escaper) template(name string) *template.Template {
// Any template from the name space associated with this escaper can be used
// to look up templates in the underlying text/template name space.
t := e.arbitraryTemplate().text.Lookup(name)
if t == nil {
t = e.derived[name]
}
return t
}
// arbitraryTemplate returns an arbitrary template from the name space
// associated with e and panics if no templates are found.
func (e *escaper) arbitraryTemplate() *Template {
for _, t := range e.ns.set {
return t
}
panic("no templates in name space")
}
// Forwarding functions so that clients need only import this package
// to reach the general escaping functions of text/template.
// HTMLEscape writes to w the escaped HTML equivalent of the plain text data b.
func HTMLEscape(w io.Writer, b []byte) {
template.HTMLEscape(w, b)
}
// HTMLEscapeString returns the escaped HTML equivalent of the plain text data s.
func HTMLEscapeString(s string) string {
return template.HTMLEscapeString(s)
}
// HTMLEscaper returns the escaped HTML equivalent of the textual
// representation of its arguments.
func HTMLEscaper(args ...any) string {
return template.HTMLEscaper(args...)
}
// JSEscape writes to w the escaped JavaScript equivalent of the plain text data b.
func JSEscape(w io.Writer, b []byte) {
template.JSEscape(w, b)
}
// JSEscapeString returns the escaped JavaScript equivalent of the plain text data s.
func JSEscapeString(s string) string {
return template.JSEscapeString(s)
}
// JSEscaper returns the escaped JavaScript equivalent of the textual
// representation of its arguments.
func JSEscaper(args ...any) string {
return template.JSEscaper(args...)
}
// URLQueryEscaper returns the escaped value of the textual representation of
// its arguments in a form suitable for embedding in a URL query.
func URLQueryEscaper(args ...any) string {
return template.URLQueryEscaper(args...)
}