Bjørn Erik Pedersen 2023-09-21 20:15:31 +02:00
parent 275c0acbf6
commit 1e9b87f760
7 changed files with 191 additions and 49 deletions

View file

@ -4,7 +4,7 @@ parameters:
defaults: &defaults
resource_class: large
docker:
- image: bepsays/ci-hugoreleaser:1.22000.20100
- image: bepsays/ci-hugoreleaser:1.22100.20100
environment: &buildenv
GOMODCACHE: /root/project/gomodcache
version: 2
@ -60,7 +60,7 @@ jobs:
environment:
<<: [*buildenv]
docker:
- image: bepsays/ci-hugoreleaser-linux-arm64:1.22000.20100
- image: bepsays/ci-hugoreleaser-linux-arm64:1.22100.20100
steps:
- *restore-cache
- &attach-workspace

View file

@ -16,7 +16,7 @@ import (
)
func main() {
// The current is built with c19c4c566c HEAD, tag: go1.21.0.
// The current is built with 2c1e5b05fe39fc5e6c730dd60e82946b8e67c6ba, tag: go1.21.1.
fmt.Println("Forking ...")
defer fmt.Println("Done ...")
@ -162,7 +162,8 @@ func copyGoPackage(dst, src string) {
func doWithGoFiles(dir string,
rewrite func(name string),
transform func(name, in string) string) {
transform func(name, in string) string,
) {
if rewrite == nil && transform == nil {
return
}

View file

@ -129,6 +129,10 @@ const (
stateJSBlockCmt
// stateJSLineCmt occurs inside a JavaScript // line comment.
stateJSLineCmt
// stateJSHTMLOpenCmt occurs inside a JavaScript <!-- HTML-like comment.
stateJSHTMLOpenCmt
// stateJSHTMLCloseCmt occurs inside a JavaScript --> HTML-like comment.
stateJSHTMLCloseCmt
// stateCSS occurs inside a <style> element or style attribute.
stateCSS
// stateCSSDqStr occurs inside a CSS double quoted string.
@ -156,7 +160,7 @@ const (
// authors & maintainers, not for end-users or machines.
func isComment(s state) bool {
switch s {
case stateHTMLCmt, stateJSBlockCmt, stateJSLineCmt, stateCSSBlockCmt, stateCSSLineCmt:
case stateHTMLCmt, stateJSBlockCmt, stateJSLineCmt, stateJSHTMLOpenCmt, stateJSHTMLCloseCmt, stateCSSBlockCmt, stateCSSLineCmt:
return true
}
return false
@ -171,6 +175,20 @@ func isInTag(s state) bool {
return false
}
// isInScriptLiteral returns true if s is one of the literal states within a
// <script> tag, and as such occurances of "<!--", "<script", and "</script"
// need to be treated specially.
func isInScriptLiteral(s state) bool {
// Ignore the comment states (stateJSBlockCmt, stateJSLineCmt,
// stateJSHTMLOpenCmt, stateJSHTMLCloseCmt) because their content is already
// omitted from the output.
switch s {
case stateJSDqStr, stateJSSqStr, stateJSBqStr, stateJSRegexp:
return true
}
return false
}
// delim is the delimiter that will end the current HTML attribute.
type delim uint8

View file

@ -11,6 +11,7 @@ import (
//"internal/godebug"
"io"
"regexp"
template "github.com/gohugoio/hugo/tpl/internal/go_templates/texttemplate"
"github.com/gohugoio/hugo/tpl/internal/go_templates/texttemplate/parse"
@ -231,7 +232,7 @@ func (e *escaper) escapeAction(c context, n *parse.ActionNode) context {
s = append(s, "_html_template_jsstrescaper")
case stateJSBqStr:
if SecurityAllowActionJSTmpl.Load() {
//debugAllowActionJSTmpl.IncNonDefault()
// debugAllowActionJSTmpl.IncNonDefault()
s = append(s, "_html_template_jsstrescaper")
} else {
return context{
@ -732,6 +733,26 @@ var delimEnds = [...]string{
delimSpaceOrTagEnd: " \t\n\f\r>",
}
var (
// Per WHATWG HTML specification, section 4.12.1.3, there are extremely
// complicated rules for how to handle the set of opening tags <!--,
// <script, and </script when they appear in JS literals (i.e. strings,
// regexs, and comments). The specification suggests a simple solution,
// rather than implementing the arcane ABNF, which involves simply escaping
// the opening bracket with \x3C. We use the below regex for this, since it
// makes doing the case-insensitive find-replace much simpler.
specialScriptTagRE = regexp.MustCompile("(?i)<(script|/script|!--)")
specialScriptTagReplacement = []byte("\\x3C$1")
)
func containsSpecialScriptTag(s []byte) bool {
return specialScriptTagRE.Match(s)
}
func escapeSpecialScriptTags(s []byte) []byte {
return specialScriptTagRE.ReplaceAll(s, specialScriptTagReplacement)
}
var doctypeBytes = []byte("<!DOCTYPE")
// escapeText escapes a text template node.
@ -780,13 +801,21 @@ func (e *escaper) escapeText(c context, n *parse.TextNode) context {
if c.state != c1.state && isComment(c1.state) && c1.delim == delimNone {
// Preserve the portion between written and the comment start.
cs := i1 - 2
if c1.state == stateHTMLCmt {
if c1.state == stateHTMLCmt || c1.state == stateJSHTMLOpenCmt {
// "<!--" instead of "/*" or "//"
cs -= 2
} else if c1.state == stateJSHTMLCloseCmt {
// "-->" instead of "/*" or "//"
cs -= 1
}
b.Write(s[written:cs])
written = i1
}
if isInScriptLiteral(c.state) && containsSpecialScriptTag(s[i:i1]) {
b.Write(s[written:i])
b.Write(escapeSpecialScriptTags(s[i:i1]))
written = i1
}
if i == i1 && c.state == c1.state {
panic(fmt.Sprintf("infinite loop from %v to %v on %q..%q", c, c1, s[:i], s[i:]))
}

View file

@ -508,6 +508,31 @@ func TestEscape(t *testing.T) {
"<script>var a/*b*///c\nd</script>",
"<script>var a \nd</script>",
},
{
"JS HTML-like comments",
"<script>before <!-- beep\nbetween\nbefore-->boop\n</script>",
"<script>before \nbetween\nbefore\n</script>",
},
{
"JS hashbang comment",
"<script>#! beep\n</script>",
"<script>\n</script>",
},
{
"Special tags in <script> string literals",
`<script>var a = "asd < 123 <!-- 456 < fgh <script jkl < 789 </script"</script>`,
`<script>var a = "asd < 123 \x3C!-- 456 < fgh \x3Cscript jkl < 789 \x3C/script"</script>`,
},
{
"Special tags in <script> string literals (mixed case)",
`<script>var a = "<!-- <ScripT </ScripT"</script>`,
`<script>var a = "\x3C!-- \x3CScripT \x3C/ScripT"</script>`,
},
{
"Special tags in <script> regex literals (mixed case)",
`<script>var a = /<!-- <ScripT </ScripT/</script>`,
`<script>var a = /\x3C!-- \x3CScripT \x3C/ScripT/</script>`,
},
{
"CSS comments",
"<style>p// paragraph\n" +
@ -1528,8 +1553,38 @@ func TestEscapeText(t *testing.T) {
context{state: stateJS, element: elementScript},
},
{
// <script and </script tags are escaped, so </script> should not
// cause us to exit the JS state.
`<script>document.write("<script>alert(1)</script>");`,
context{state: stateText},
context{state: stateJS, element: elementScript},
},
{
`<script>document.write("<script>`,
context{state: stateJSDqStr, element: elementScript},
},
{
`<script>document.write("<script>alert(1)</script>`,
context{state: stateJSDqStr, element: elementScript},
},
{
`<script>document.write("<script>alert(1)<!--`,
context{state: stateJSDqStr, element: elementScript},
},
{
`<script>document.write("<script>alert(1)</Script>");`,
context{state: stateJS, element: elementScript},
},
{
`<script>document.write("<!--");`,
context{state: stateJS, element: elementScript},
},
{
`<script>let a = /</script`,
context{state: stateJSRegexp, element: elementScript},
},
{
`<script>let a = /</script/`,
context{state: stateJS, element: elementScript, jsCtx: jsCtxDivOp},
},
{
`<script type="text/template">`,

View file

@ -25,21 +25,23 @@ func _() {
_ = x[stateJSRegexp-14]
_ = x[stateJSBlockCmt-15]
_ = x[stateJSLineCmt-16]
_ = x[stateCSS-17]
_ = x[stateCSSDqStr-18]
_ = x[stateCSSSqStr-19]
_ = x[stateCSSDqURL-20]
_ = x[stateCSSSqURL-21]
_ = x[stateCSSURL-22]
_ = x[stateCSSBlockCmt-23]
_ = x[stateCSSLineCmt-24]
_ = x[stateError-25]
_ = x[stateDead-26]
_ = x[stateJSHTMLOpenCmt-17]
_ = x[stateJSHTMLCloseCmt-18]
_ = x[stateCSS-19]
_ = x[stateCSSDqStr-20]
_ = x[stateCSSSqStr-21]
_ = x[stateCSSDqURL-22]
_ = x[stateCSSSqURL-23]
_ = x[stateCSSURL-24]
_ = x[stateCSSBlockCmt-25]
_ = x[stateCSSLineCmt-26]
_ = x[stateError-27]
_ = x[stateDead-28]
}
const _state_name = "stateTextstateTagstateAttrNamestateAfterNamestateBeforeValuestateHTMLCmtstateRCDATAstateAttrstateURLstateSrcsetstateJSstateJSDqStrstateJSSqStrstateJSBqStrstateJSRegexpstateJSBlockCmtstateJSLineCmtstateCSSstateCSSDqStrstateCSSSqStrstateCSSDqURLstateCSSSqURLstateCSSURLstateCSSBlockCmtstateCSSLineCmtstateErrorstateDead"
const _state_name = "stateTextstateTagstateAttrNamestateAfterNamestateBeforeValuestateHTMLCmtstateRCDATAstateAttrstateURLstateSrcsetstateJSstateJSDqStrstateJSSqStrstateJSBqStrstateJSRegexpstateJSBlockCmtstateJSLineCmtstateJSHTMLOpenCmtstateJSHTMLCloseCmtstateCSSstateCSSDqStrstateCSSSqStrstateCSSDqURLstateCSSSqURLstateCSSURLstateCSSBlockCmtstateCSSLineCmtstateErrorstateDead"
var _state_index = [...]uint16{0, 9, 17, 30, 44, 60, 72, 83, 92, 100, 111, 118, 130, 142, 154, 167, 182, 196, 204, 217, 230, 243, 256, 267, 283, 298, 308, 317}
var _state_index = [...]uint16{0, 9, 17, 30, 44, 60, 72, 83, 92, 100, 111, 118, 130, 142, 154, 167, 182, 196, 214, 233, 241, 254, 267, 280, 293, 304, 320, 335, 345, 354}
func (i state) String() string {
if i >= state(len(_state_index)-1) {

View file

@ -14,32 +14,34 @@ import (
// the updated context and the number of bytes consumed from the front of the
// input.
var transitionFunc = [...]func(context, []byte) (context, int){
stateText: tText,
stateTag: tTag,
stateAttrName: tAttrName,
stateAfterName: tAfterName,
stateBeforeValue: tBeforeValue,
stateHTMLCmt: tHTMLCmt,
stateRCDATA: tSpecialTagEnd,
stateAttr: tAttr,
stateURL: tURL,
stateSrcset: tURL,
stateJS: tJS,
stateJSDqStr: tJSDelimited,
stateJSSqStr: tJSDelimited,
stateJSBqStr: tJSDelimited,
stateJSRegexp: tJSDelimited,
stateJSBlockCmt: tBlockCmt,
stateJSLineCmt: tLineCmt,
stateCSS: tCSS,
stateCSSDqStr: tCSSStr,
stateCSSSqStr: tCSSStr,
stateCSSDqURL: tCSSStr,
stateCSSSqURL: tCSSStr,
stateCSSURL: tCSSStr,
stateCSSBlockCmt: tBlockCmt,
stateCSSLineCmt: tLineCmt,
stateError: tError,
stateText: tText,
stateTag: tTag,
stateAttrName: tAttrName,
stateAfterName: tAfterName,
stateBeforeValue: tBeforeValue,
stateHTMLCmt: tHTMLCmt,
stateRCDATA: tSpecialTagEnd,
stateAttr: tAttr,
stateURL: tURL,
stateSrcset: tURL,
stateJS: tJS,
stateJSDqStr: tJSDelimited,
stateJSSqStr: tJSDelimited,
stateJSBqStr: tJSDelimited,
stateJSRegexp: tJSDelimited,
stateJSBlockCmt: tBlockCmt,
stateJSLineCmt: tLineCmt,
stateJSHTMLOpenCmt: tLineCmt,
stateJSHTMLCloseCmt: tLineCmt,
stateCSS: tCSS,
stateCSSDqStr: tCSSStr,
stateCSSSqStr: tCSSStr,
stateCSSDqURL: tCSSStr,
stateCSSSqURL: tCSSStr,
stateCSSURL: tCSSStr,
stateCSSBlockCmt: tBlockCmt,
stateCSSLineCmt: tLineCmt,
stateError: tError,
}
var commentStart = []byte("<!--")
@ -212,6 +214,11 @@ var (
// element states.
func tSpecialTagEnd(c context, s []byte) (context, int) {
if c.element != elementNone {
// script end tags ("</script") within script literals are ignored, so that
// we can properly escape them.
if c.element == elementScript && (isInScriptLiteral(c.state) || isComment(c.state)) {
return c, len(s)
}
if i := indexTagEnd(s, specialTagEndMarkers[c.element]); i != -1 {
return context{}, i
}
@ -263,7 +270,7 @@ func tURL(c context, s []byte) (context, int) {
// tJS is the context transition function for the JS state.
func tJS(c context, s []byte) (context, int) {
i := bytes.IndexAny(s, "\"`'/")
i := bytes.IndexAny(s, "\"`'/<-#")
if i == -1 {
// Entire input is non string, comment, regexp tokens.
c.jsCtx = nextJSCtx(s, c.jsCtx)
@ -293,6 +300,26 @@ func tJS(c context, s []byte) (context, int) {
err: errorf(ErrSlashAmbig, nil, 0, "'/' could start a division or regexp: %.32q", s[i:]),
}, len(s)
}
// ECMAScript supports HTML style comments for legacy reasons, see Appendix
// B.1.1 "HTML-like Comments". The handling of these comments is somewhat
// confusing. Multi-line comments are not supported, i.e. anything on lines
// between the opening and closing tokens is not considered a comment, but
// anything following the opening or closing token, on the same line, is
// ignored. As such we simply treat any line prefixed with "<!--" or "-->"
// as if it were actually prefixed with "//" and move on.
case '<':
if i+3 < len(s) && bytes.Equal(commentStart, s[i:i+4]) {
c.state, i = stateJSHTMLOpenCmt, i+3
}
case '-':
if i+2 < len(s) && bytes.Equal(commentEnd, s[i:i+3]) {
c.state, i = stateJSHTMLCloseCmt, i+2
}
// ECMAScript also supports "hashbang" comment lines, see Section 12.5.
case '#':
if i+1 < len(s) && s[i+1] == '!' {
c.state, i = stateJSLineCmt, i+1
}
default:
panic("unreachable")
}
@ -331,6 +358,16 @@ func tJSDelimited(c context, s []byte) (context, int) {
inCharset = true
case ']':
inCharset = false
case '/':
// If "</script" appears in a regex literal, the '/' should not
// close the regex literal, and it will later be escaped to
// "\x3C/script" in escapeText.
if i > 0 && i+7 <= len(s) && bytes.Compare(bytes.ToLower(s[i-1:i+7]), []byte("</script")) == 0 {
i++
} else if !inCharset {
c.state, c.jsCtx = stateJS, jsCtxDivOp
return c, i + 1
}
default:
// end delimiter
if !inCharset {
@ -372,12 +409,12 @@ func tBlockCmt(c context, s []byte) (context, int) {
return c, i + 2
}
// tLineCmt is the context transition function for //comment states.
// tLineCmt is the context transition function for //comment states, and the JS HTML-like comment state.
func tLineCmt(c context, s []byte) (context, int) {
var lineTerminators string
var endState state
switch c.state {
case stateJSLineCmt:
case stateJSLineCmt, stateJSHTMLOpenCmt, stateJSHTMLCloseCmt:
lineTerminators, endState = "\n\r\u2028\u2029", stateJS
case stateCSSLineCmt:
lineTerminators, endState = "\n\f\r", stateCSS