// Copyright 2011 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package template import ( "bytes" "fmt" "html" //"internal/godebug" "io" "regexp" template "github.com/gohugoio/hugo/tpl/internal/go_templates/texttemplate" "github.com/gohugoio/hugo/tpl/internal/go_templates/texttemplate/parse" ) // escapeTemplate rewrites the named template, which must be // associated with t, to guarantee that the output of any of the named // templates is properly escaped. If no error is returned, then the named templates have // been modified. Otherwise the named templates have been rendered // unusable. func escapeTemplate(tmpl *Template, node parse.Node, name string) error { c, _ := tmpl.esc.escapeTree(context{}, node, name, 0) var err error if c.err != nil { err, c.err.Name = c.err, name } else if c.state != stateText { err = &Error{ErrEndContext, nil, name, 0, fmt.Sprintf("ends in a non-text context: %v", c)} } if err != nil { // Prevent execution of unsafe templates. if t := tmpl.set[name]; t != nil { t.escapeErr = err t.text.Tree = nil t.Tree = nil } return err } tmpl.esc.commit() if t := tmpl.set[name]; t != nil { t.escapeErr = escapeOK t.Tree = t.text.Tree } return nil } // evalArgs formats the list of arguments into a string. It is equivalent to // fmt.Sprint(args...), except that it dereferences all pointers. func evalArgs(args ...any) string { // Optimization for simple common case of a single string argument. if len(args) == 1 { if s, ok := args[0].(string); ok { return s } } for i, arg := range args { args[i] = indirectToStringerOrError(arg) } return fmt.Sprint(args...) } // funcMap maps command names to functions that render their inputs safe. var funcMap = template.FuncMap{ "_html_template_attrescaper": attrEscaper, "_html_template_commentescaper": commentEscaper, "_html_template_cssescaper": cssEscaper, "_html_template_cssvaluefilter": cssValueFilter, "_html_template_htmlnamefilter": htmlNameFilter, "_html_template_htmlescaper": htmlEscaper, "_html_template_jsregexpescaper": jsRegexpEscaper, "_html_template_jsstrescaper": jsStrEscaper, "_html_template_jsvalescaper": jsValEscaper, "_html_template_nospaceescaper": htmlNospaceEscaper, "_html_template_rcdataescaper": rcdataEscaper, "_html_template_srcsetescaper": srcsetFilterAndEscaper, "_html_template_urlescaper": urlEscaper, "_html_template_urlfilter": urlFilter, "_html_template_urlnormalizer": urlNormalizer, "_eval_args_": evalArgs, } // escaper collects type inferences about templates and changes needed to make // templates injection safe. type escaper struct { // ns is the nameSpace that this escaper is associated with. ns *nameSpace // output[templateName] is the output context for a templateName that // has been mangled to include its input context. output map[string]context // derived[c.mangle(name)] maps to a template derived from the template // named name templateName for the start context c. derived map[string]*template.Template // called[templateName] is a set of called mangled template names. called map[string]bool // xxxNodeEdits are the accumulated edits to apply during commit. // Such edits are not applied immediately in case a template set // executes a given template in different escaping contexts. actionNodeEdits map[*parse.ActionNode][]string templateNodeEdits map[*parse.TemplateNode]string textNodeEdits map[*parse.TextNode][]byte // rangeContext holds context about the current range loop. rangeContext *rangeContext } // rangeContext holds information about the current range loop. type rangeContext struct { outer *rangeContext // outer loop breaks []context // context at each break action continues []context // context at each continue action } // makeEscaper creates a blank escaper for the given set. func makeEscaper(n *nameSpace) escaper { return escaper{ n, map[string]context{}, map[string]*template.Template{}, map[string]bool{}, map[*parse.ActionNode][]string{}, map[*parse.TemplateNode]string{}, map[*parse.TextNode][]byte{}, nil, } } // filterFailsafe is an innocuous word that is emitted in place of unsafe values // by sanitizer functions. It is not a keyword in any programming language, // contains no special characters, is not empty, and when it appears in output // it is distinct enough that a developer can find the source of the problem // via a search engine. const filterFailsafe = "ZgotmplZ" // escape escapes a template node. func (e *escaper) escape(c context, n parse.Node) context { switch n := n.(type) { case *parse.ActionNode: return e.escapeAction(c, n) case *parse.BreakNode: c.n = n e.rangeContext.breaks = append(e.rangeContext.breaks, c) return context{state: stateDead} case *parse.CommentNode: return c case *parse.ContinueNode: c.n = n e.rangeContext.continues = append(e.rangeContext.breaks, c) return context{state: stateDead} case *parse.IfNode: return e.escapeBranch(c, &n.BranchNode, "if") case *parse.ListNode: return e.escapeList(c, n) case *parse.RangeNode: return e.escapeBranch(c, &n.BranchNode, "range") case *parse.TemplateNode: return e.escapeTemplate(c, n) case *parse.TextNode: return e.escapeText(c, n) case *parse.WithNode: return e.escapeBranch(c, &n.BranchNode, "with") } panic("escaping " + n.String() + " is unimplemented") } // Modified by Hugo. // var debugAllowActionJSTmpl = godebug.New("jstmpllitinterp") // escapeAction escapes an action template node. func (e *escaper) escapeAction(c context, n *parse.ActionNode) context { if len(n.Pipe.Decl) != 0 { // A local variable assignment, not an interpolation. return c } c = nudge(c) // Check for disallowed use of predefined escapers in the pipeline. for pos, idNode := range n.Pipe.Cmds { node, ok := idNode.Args[0].(*parse.IdentifierNode) if !ok { // A predefined escaper "esc" will never be found as an identifier in a // Chain or Field node, since: // - "esc.x ..." is invalid, since predefined escapers return strings, and // strings do not have methods, keys or fields. // - "... .esc" is invalid, since predefined escapers are global functions, // not methods or fields of any types. // Therefore, it is safe to ignore these two node types. continue } ident := node.Ident if _, ok := predefinedEscapers[ident]; ok { if pos < len(n.Pipe.Cmds)-1 || c.state == stateAttr && c.delim == delimSpaceOrTagEnd && ident == "html" { return context{ state: stateError, err: errorf(ErrPredefinedEscaper, n, n.Line, "predefined escaper %q disallowed in template", ident), } } } } s := make([]string, 0, 3) switch c.state { case stateError: return c case stateURL, stateCSSDqStr, stateCSSSqStr, stateCSSDqURL, stateCSSSqURL, stateCSSURL: switch c.urlPart { case urlPartNone: s = append(s, "_html_template_urlfilter") fallthrough case urlPartPreQuery: switch c.state { case stateCSSDqStr, stateCSSSqStr: s = append(s, "_html_template_cssescaper") default: s = append(s, "_html_template_urlnormalizer") } case urlPartQueryOrFrag: s = append(s, "_html_template_urlescaper") case urlPartUnknown: return context{ state: stateError, err: errorf(ErrAmbigContext, n, n.Line, "%s appears in an ambiguous context within a URL", n), } default: panic(c.urlPart.String()) } case stateJS: s = append(s, "_html_template_jsvalescaper") // A slash after a value starts a div operator. c.jsCtx = jsCtxDivOp case stateJSDqStr, stateJSSqStr: s = append(s, "_html_template_jsstrescaper") case stateJSBqStr: if SecurityAllowActionJSTmpl.Load() { // debugAllowActionJSTmpl.IncNonDefault() s = append(s, "_html_template_jsstrescaper") } else { return context{ state: stateError, err: errorf(ErrJSTemplate, n, n.Line, "%s appears in a JS template literal", n), } } case stateJSRegexp: s = append(s, "_html_template_jsregexpescaper") case stateCSS: s = append(s, "_html_template_cssvaluefilter") case stateText: s = append(s, "_html_template_htmlescaper") case stateRCDATA: s = append(s, "_html_template_rcdataescaper") case stateAttr: // Handled below in delim check. case stateAttrName, stateTag: c.state = stateAttrName s = append(s, "_html_template_htmlnamefilter") case stateSrcset: s = append(s, "_html_template_srcsetescaper") default: if isComment(c.state) { s = append(s, "_html_template_commentescaper") } else { panic("unexpected state " + c.state.String()) } } switch c.delim { case delimNone: // No extra-escaping needed for raw text content. case delimSpaceOrTagEnd: s = append(s, "_html_template_nospaceescaper") default: s = append(s, "_html_template_attrescaper") } e.editActionNode(n, s) return c } // ensurePipelineContains ensures that the pipeline ends with the commands with // the identifiers in s in order. If the pipeline ends with a predefined escaper // (i.e. "html" or "urlquery"), merge it with the identifiers in s. func ensurePipelineContains(p *parse.PipeNode, s []string) { if len(s) == 0 { // Do not rewrite pipeline if we have no escapers to insert. return } // Precondition: p.Cmds contains at most one predefined escaper and the // escaper will be present at p.Cmds[len(p.Cmds)-1]. This precondition is // always true because of the checks in escapeAction. pipelineLen := len(p.Cmds) if pipelineLen > 0 { lastCmd := p.Cmds[pipelineLen-1] if idNode, ok := lastCmd.Args[0].(*parse.IdentifierNode); ok { if esc := idNode.Ident; predefinedEscapers[esc] { // Pipeline ends with a predefined escaper. if len(p.Cmds) == 1 && len(lastCmd.Args) > 1 { // Special case: pipeline is of the form {{ esc arg1 arg2 ... argN }}, // where esc is the predefined escaper, and arg1...argN are its arguments. // Convert this into the equivalent form // {{ _eval_args_ arg1 arg2 ... argN | esc }}, so that esc can be easily // merged with the escapers in s. lastCmd.Args[0] = parse.NewIdentifier("_eval_args_").SetTree(nil).SetPos(lastCmd.Args[0].Position()) p.Cmds = appendCmd(p.Cmds, newIdentCmd(esc, p.Position())) pipelineLen++ } // If any of the commands in s that we are about to insert is equivalent // to the predefined escaper, use the predefined escaper instead. dup := false for i, escaper := range s { if escFnsEq(esc, escaper) { s[i] = idNode.Ident dup = true } } if dup { // The predefined escaper will already be inserted along with the // escapers in s, so do not copy it to the rewritten pipeline. pipelineLen-- } } } } // Rewrite the pipeline, creating the escapers in s at the end of the pipeline. newCmds := make([]*parse.CommandNode, pipelineLen, pipelineLen+len(s)) insertedIdents := make(map[string]bool) for i := 0; i < pipelineLen; i++ { cmd := p.Cmds[i] newCmds[i] = cmd if idNode, ok := cmd.Args[0].(*parse.IdentifierNode); ok { insertedIdents[normalizeEscFn(idNode.Ident)] = true } } for _, name := range s { if !insertedIdents[normalizeEscFn(name)] { // When two templates share an underlying parse tree via the use of // AddParseTree and one template is executed after the other, this check // ensures that escapers that were already inserted into the pipeline on // the first escaping pass do not get inserted again. newCmds = appendCmd(newCmds, newIdentCmd(name, p.Position())) } } p.Cmds = newCmds } // predefinedEscapers contains template predefined escapers that are equivalent // to some contextual escapers. Keep in sync with equivEscapers. var predefinedEscapers = map[string]bool{ "html": true, "urlquery": true, } // equivEscapers matches contextual escapers to equivalent predefined // template escapers. var equivEscapers = map[string]string{ // The following pairs of HTML escapers provide equivalent security // guarantees, since they all escape '\000', '\'', '"', '&', '<', and '>'. "_html_template_attrescaper": "html", "_html_template_htmlescaper": "html", "_html_template_rcdataescaper": "html", // These two URL escapers produce URLs safe for embedding in a URL query by // percent-encoding all the reserved characters specified in RFC 3986 Section // 2.2 "_html_template_urlescaper": "urlquery", // These two functions are not actually equivalent; urlquery is stricter as it // escapes reserved characters (e.g. '#'), while _html_template_urlnormalizer // does not. It is therefore only safe to replace _html_template_urlnormalizer // with urlquery (this happens in ensurePipelineContains), but not the otherI've // way around. We keep this entry around to preserve the behavior of templates // written before Go 1.9, which might depend on this substitution taking place. "_html_template_urlnormalizer": "urlquery", } // escFnsEq reports whether the two escaping functions are equivalent. func escFnsEq(a, b string) bool { return normalizeEscFn(a) == normalizeEscFn(b) } // normalizeEscFn(a) is equal to normalizeEscFn(b) for any pair of names of // escaper functions a and b that are equivalent. func normalizeEscFn(e string) string { if norm := equivEscapers[e]; norm != "" { return norm } return e } // redundantFuncs[a][b] implies that funcMap[b](funcMap[a](x)) == funcMap[a](x) // for all x. var redundantFuncs = map[string]map[string]bool{ "_html_template_commentescaper": { "_html_template_attrescaper": true, "_html_template_htmlescaper": true, }, "_html_template_cssescaper": { "_html_template_attrescaper": true, }, "_html_template_jsregexpescaper": { "_html_template_attrescaper": true, }, "_html_template_jsstrescaper": { "_html_template_attrescaper": true, }, "_html_template_urlescaper": { "_html_template_urlnormalizer": true, }, } // appendCmd appends the given command to the end of the command pipeline // unless it is redundant with the last command. func appendCmd(cmds []*parse.CommandNode, cmd *parse.CommandNode) []*parse.CommandNode { if n := len(cmds); n != 0 { last, okLast := cmds[n-1].Args[0].(*parse.IdentifierNode) next, okNext := cmd.Args[0].(*parse.IdentifierNode) if okLast && okNext && redundantFuncs[last.Ident][next.Ident] { return cmds } } return append(cmds, cmd) } // newIdentCmd produces a command containing a single identifier node. func newIdentCmd(identifier string, pos parse.Pos) *parse.CommandNode { return &parse.CommandNode{ NodeType: parse.NodeCommand, Args: []parse.Node{parse.NewIdentifier(identifier).SetTree(nil).SetPos(pos)}, // TODO: SetTree. } } // nudge returns the context that would result from following empty string // transitions from the input context. // For example, parsing: // // `90% of the time. e.output[t.Name()] = c return e.escapeListConditionally(c, t.Tree.Root, filter) } // delimEnds maps each delim to a string of characters that terminate it. var delimEnds = [...]string{ delimDoubleQuote: `"`, delimSingleQuote: "'", // Determined empirically by running the below in various browsers. // var div = document.createElement("DIV"); // for (var i = 0; i < 0x10000; ++i) { // div.innerHTML = ""; // if (div.getElementsByTagName("SPAN")[0].title.indexOf("bar") < 0) // document.write("

U+" + i.toString(16)); // } delimSpaceOrTagEnd: " \t\n\f\r>", } var ( // Per WHATWG HTML specification, section 4.12.1.3, there are extremely // complicated rules for how to handle the set of opening tags " instead of "/*" or "//" cs -= 1 } b.Write(s[written:cs]) written = i1 } if isInScriptLiteral(c.state) && containsSpecialScriptTag(s[i:i1]) { b.Write(s[written:i]) b.Write(escapeSpecialScriptTags(s[i:i1])) written = i1 } if i == i1 && c.state == c1.state { panic(fmt.Sprintf("infinite loop from %v to %v on %q..%q", c, c1, s[:i], s[i:])) } c, i = c1, i1 } if written != 0 && c.state != stateError { if !isComment(c.state) || c.delim != delimNone { b.Write(n.Text[written:]) } e.editTextNode(n, b.Bytes()) } return c } // contextAfterText starts in context c, consumes some tokens from the front of // s, then returns the context after those tokens and the unprocessed suffix. func contextAfterText(c context, s []byte) (context, int) { if c.delim == delimNone { c1, i := tSpecialTagEnd(c, s) if i == 0 { // A special end tag (``) has been seen and // all content preceding it has been consumed. return c1, 0 } // Consider all content up to any end tag. return transitionFunc[c.state](c, s[:i]) } // We are at the beginning of an attribute value. i := bytes.IndexAny(s, delimEnds[c.delim]) if i == -1 { i = len(s) } if c.delim == delimSpaceOrTagEnd { // https://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state // lists the runes below as error characters. // Error out because HTML parsers may differ on whether // "= 0 { return context{ state: stateError, err: errorf(ErrBadHTML, nil, 0, "%q in unquoted attr: %q", s[j:j+1], s[:i]), }, len(s) } } if i == len(s) { // Remain inside the attribute. // Decode the value so non-HTML rules can easily handle //