Bjørn Erik Pedersen 2023-09-21 20:15:31 +02:00
parent 275c0acbf6
commit 1e9b87f760
7 changed files with 191 additions and 49 deletions

View file

@ -4,7 +4,7 @@ parameters:
defaults: &defaults defaults: &defaults
resource_class: large resource_class: large
docker: docker:
- image: bepsays/ci-hugoreleaser:1.22000.20100 - image: bepsays/ci-hugoreleaser:1.22100.20100
environment: &buildenv environment: &buildenv
GOMODCACHE: /root/project/gomodcache GOMODCACHE: /root/project/gomodcache
version: 2 version: 2
@ -60,7 +60,7 @@ jobs:
environment: environment:
<<: [*buildenv] <<: [*buildenv]
docker: docker:
- image: bepsays/ci-hugoreleaser-linux-arm64:1.22000.20100 - image: bepsays/ci-hugoreleaser-linux-arm64:1.22100.20100
steps: steps:
- *restore-cache - *restore-cache
- &attach-workspace - &attach-workspace

View file

@ -16,7 +16,7 @@ import (
) )
func main() { func main() {
// The current is built with c19c4c566c HEAD, tag: go1.21.0. // The current is built with 2c1e5b05fe39fc5e6c730dd60e82946b8e67c6ba, tag: go1.21.1.
fmt.Println("Forking ...") fmt.Println("Forking ...")
defer fmt.Println("Done ...") defer fmt.Println("Done ...")
@ -162,7 +162,8 @@ func copyGoPackage(dst, src string) {
func doWithGoFiles(dir string, func doWithGoFiles(dir string,
rewrite func(name string), rewrite func(name string),
transform func(name, in string) string) { transform func(name, in string) string,
) {
if rewrite == nil && transform == nil { if rewrite == nil && transform == nil {
return return
} }

View file

@ -129,6 +129,10 @@ const (
stateJSBlockCmt stateJSBlockCmt
// stateJSLineCmt occurs inside a JavaScript // line comment. // stateJSLineCmt occurs inside a JavaScript // line comment.
stateJSLineCmt stateJSLineCmt
// stateJSHTMLOpenCmt occurs inside a JavaScript <!-- HTML-like comment.
stateJSHTMLOpenCmt
// stateJSHTMLCloseCmt occurs inside a JavaScript --> HTML-like comment.
stateJSHTMLCloseCmt
// stateCSS occurs inside a <style> element or style attribute. // stateCSS occurs inside a <style> element or style attribute.
stateCSS stateCSS
// stateCSSDqStr occurs inside a CSS double quoted string. // stateCSSDqStr occurs inside a CSS double quoted string.
@ -156,7 +160,7 @@ const (
// authors & maintainers, not for end-users or machines. // authors & maintainers, not for end-users or machines.
func isComment(s state) bool { func isComment(s state) bool {
switch s { switch s {
case stateHTMLCmt, stateJSBlockCmt, stateJSLineCmt, stateCSSBlockCmt, stateCSSLineCmt: case stateHTMLCmt, stateJSBlockCmt, stateJSLineCmt, stateJSHTMLOpenCmt, stateJSHTMLCloseCmt, stateCSSBlockCmt, stateCSSLineCmt:
return true return true
} }
return false return false
@ -171,6 +175,20 @@ func isInTag(s state) bool {
return false return false
} }
// isInScriptLiteral returns true if s is one of the literal states within a
// <script> tag, and as such occurances of "<!--", "<script", and "</script"
// need to be treated specially.
func isInScriptLiteral(s state) bool {
// Ignore the comment states (stateJSBlockCmt, stateJSLineCmt,
// stateJSHTMLOpenCmt, stateJSHTMLCloseCmt) because their content is already
// omitted from the output.
switch s {
case stateJSDqStr, stateJSSqStr, stateJSBqStr, stateJSRegexp:
return true
}
return false
}
// delim is the delimiter that will end the current HTML attribute. // delim is the delimiter that will end the current HTML attribute.
type delim uint8 type delim uint8

View file

@ -11,6 +11,7 @@ import (
//"internal/godebug" //"internal/godebug"
"io" "io"
"regexp"
template "github.com/gohugoio/hugo/tpl/internal/go_templates/texttemplate" template "github.com/gohugoio/hugo/tpl/internal/go_templates/texttemplate"
"github.com/gohugoio/hugo/tpl/internal/go_templates/texttemplate/parse" "github.com/gohugoio/hugo/tpl/internal/go_templates/texttemplate/parse"
@ -732,6 +733,26 @@ var delimEnds = [...]string{
delimSpaceOrTagEnd: " \t\n\f\r>", delimSpaceOrTagEnd: " \t\n\f\r>",
} }
var (
// Per WHATWG HTML specification, section 4.12.1.3, there are extremely
// complicated rules for how to handle the set of opening tags <!--,
// <script, and </script when they appear in JS literals (i.e. strings,
// regexs, and comments). The specification suggests a simple solution,
// rather than implementing the arcane ABNF, which involves simply escaping
// the opening bracket with \x3C. We use the below regex for this, since it
// makes doing the case-insensitive find-replace much simpler.
specialScriptTagRE = regexp.MustCompile("(?i)<(script|/script|!--)")
specialScriptTagReplacement = []byte("\\x3C$1")
)
func containsSpecialScriptTag(s []byte) bool {
return specialScriptTagRE.Match(s)
}
func escapeSpecialScriptTags(s []byte) []byte {
return specialScriptTagRE.ReplaceAll(s, specialScriptTagReplacement)
}
var doctypeBytes = []byte("<!DOCTYPE") var doctypeBytes = []byte("<!DOCTYPE")
// escapeText escapes a text template node. // escapeText escapes a text template node.
@ -780,13 +801,21 @@ func (e *escaper) escapeText(c context, n *parse.TextNode) context {
if c.state != c1.state && isComment(c1.state) && c1.delim == delimNone { if c.state != c1.state && isComment(c1.state) && c1.delim == delimNone {
// Preserve the portion between written and the comment start. // Preserve the portion between written and the comment start.
cs := i1 - 2 cs := i1 - 2
if c1.state == stateHTMLCmt { if c1.state == stateHTMLCmt || c1.state == stateJSHTMLOpenCmt {
// "<!--" instead of "/*" or "//" // "<!--" instead of "/*" or "//"
cs -= 2 cs -= 2
} else if c1.state == stateJSHTMLCloseCmt {
// "-->" instead of "/*" or "//"
cs -= 1
} }
b.Write(s[written:cs]) b.Write(s[written:cs])
written = i1 written = i1
} }
if isInScriptLiteral(c.state) && containsSpecialScriptTag(s[i:i1]) {
b.Write(s[written:i])
b.Write(escapeSpecialScriptTags(s[i:i1]))
written = i1
}
if i == i1 && c.state == c1.state { if i == i1 && c.state == c1.state {
panic(fmt.Sprintf("infinite loop from %v to %v on %q..%q", c, c1, s[:i], s[i:])) panic(fmt.Sprintf("infinite loop from %v to %v on %q..%q", c, c1, s[:i], s[i:]))
} }

View file

@ -508,6 +508,31 @@ func TestEscape(t *testing.T) {
"<script>var a/*b*///c\nd</script>", "<script>var a/*b*///c\nd</script>",
"<script>var a \nd</script>", "<script>var a \nd</script>",
}, },
{
"JS HTML-like comments",
"<script>before <!-- beep\nbetween\nbefore-->boop\n</script>",
"<script>before \nbetween\nbefore\n</script>",
},
{
"JS hashbang comment",
"<script>#! beep\n</script>",
"<script>\n</script>",
},
{
"Special tags in <script> string literals",
`<script>var a = "asd < 123 <!-- 456 < fgh <script jkl < 789 </script"</script>`,
`<script>var a = "asd < 123 \x3C!-- 456 < fgh \x3Cscript jkl < 789 \x3C/script"</script>`,
},
{
"Special tags in <script> string literals (mixed case)",
`<script>var a = "<!-- <ScripT </ScripT"</script>`,
`<script>var a = "\x3C!-- \x3CScripT \x3C/ScripT"</script>`,
},
{
"Special tags in <script> regex literals (mixed case)",
`<script>var a = /<!-- <ScripT </ScripT/</script>`,
`<script>var a = /\x3C!-- \x3CScripT \x3C/ScripT/</script>`,
},
{ {
"CSS comments", "CSS comments",
"<style>p// paragraph\n" + "<style>p// paragraph\n" +
@ -1528,8 +1553,38 @@ func TestEscapeText(t *testing.T) {
context{state: stateJS, element: elementScript}, context{state: stateJS, element: elementScript},
}, },
{ {
// <script and </script tags are escaped, so </script> should not
// cause us to exit the JS state.
`<script>document.write("<script>alert(1)</script>");`, `<script>document.write("<script>alert(1)</script>");`,
context{state: stateText}, context{state: stateJS, element: elementScript},
},
{
`<script>document.write("<script>`,
context{state: stateJSDqStr, element: elementScript},
},
{
`<script>document.write("<script>alert(1)</script>`,
context{state: stateJSDqStr, element: elementScript},
},
{
`<script>document.write("<script>alert(1)<!--`,
context{state: stateJSDqStr, element: elementScript},
},
{
`<script>document.write("<script>alert(1)</Script>");`,
context{state: stateJS, element: elementScript},
},
{
`<script>document.write("<!--");`,
context{state: stateJS, element: elementScript},
},
{
`<script>let a = /</script`,
context{state: stateJSRegexp, element: elementScript},
},
{
`<script>let a = /</script/`,
context{state: stateJS, element: elementScript, jsCtx: jsCtxDivOp},
}, },
{ {
`<script type="text/template">`, `<script type="text/template">`,

View file

@ -25,21 +25,23 @@ func _() {
_ = x[stateJSRegexp-14] _ = x[stateJSRegexp-14]
_ = x[stateJSBlockCmt-15] _ = x[stateJSBlockCmt-15]
_ = x[stateJSLineCmt-16] _ = x[stateJSLineCmt-16]
_ = x[stateCSS-17] _ = x[stateJSHTMLOpenCmt-17]
_ = x[stateCSSDqStr-18] _ = x[stateJSHTMLCloseCmt-18]
_ = x[stateCSSSqStr-19] _ = x[stateCSS-19]
_ = x[stateCSSDqURL-20] _ = x[stateCSSDqStr-20]
_ = x[stateCSSSqURL-21] _ = x[stateCSSSqStr-21]
_ = x[stateCSSURL-22] _ = x[stateCSSDqURL-22]
_ = x[stateCSSBlockCmt-23] _ = x[stateCSSSqURL-23]
_ = x[stateCSSLineCmt-24] _ = x[stateCSSURL-24]
_ = x[stateError-25] _ = x[stateCSSBlockCmt-25]
_ = x[stateDead-26] _ = x[stateCSSLineCmt-26]
_ = x[stateError-27]
_ = x[stateDead-28]
} }
const _state_name = "stateTextstateTagstateAttrNamestateAfterNamestateBeforeValuestateHTMLCmtstateRCDATAstateAttrstateURLstateSrcsetstateJSstateJSDqStrstateJSSqStrstateJSBqStrstateJSRegexpstateJSBlockCmtstateJSLineCmtstateCSSstateCSSDqStrstateCSSSqStrstateCSSDqURLstateCSSSqURLstateCSSURLstateCSSBlockCmtstateCSSLineCmtstateErrorstateDead" const _state_name = "stateTextstateTagstateAttrNamestateAfterNamestateBeforeValuestateHTMLCmtstateRCDATAstateAttrstateURLstateSrcsetstateJSstateJSDqStrstateJSSqStrstateJSBqStrstateJSRegexpstateJSBlockCmtstateJSLineCmtstateJSHTMLOpenCmtstateJSHTMLCloseCmtstateCSSstateCSSDqStrstateCSSSqStrstateCSSDqURLstateCSSSqURLstateCSSURLstateCSSBlockCmtstateCSSLineCmtstateErrorstateDead"
var _state_index = [...]uint16{0, 9, 17, 30, 44, 60, 72, 83, 92, 100, 111, 118, 130, 142, 154, 167, 182, 196, 204, 217, 230, 243, 256, 267, 283, 298, 308, 317} var _state_index = [...]uint16{0, 9, 17, 30, 44, 60, 72, 83, 92, 100, 111, 118, 130, 142, 154, 167, 182, 196, 214, 233, 241, 254, 267, 280, 293, 304, 320, 335, 345, 354}
func (i state) String() string { func (i state) String() string {
if i >= state(len(_state_index)-1) { if i >= state(len(_state_index)-1) {

View file

@ -31,6 +31,8 @@ var transitionFunc = [...]func(context, []byte) (context, int){
stateJSRegexp: tJSDelimited, stateJSRegexp: tJSDelimited,
stateJSBlockCmt: tBlockCmt, stateJSBlockCmt: tBlockCmt,
stateJSLineCmt: tLineCmt, stateJSLineCmt: tLineCmt,
stateJSHTMLOpenCmt: tLineCmt,
stateJSHTMLCloseCmt: tLineCmt,
stateCSS: tCSS, stateCSS: tCSS,
stateCSSDqStr: tCSSStr, stateCSSDqStr: tCSSStr,
stateCSSSqStr: tCSSStr, stateCSSSqStr: tCSSStr,
@ -212,6 +214,11 @@ var (
// element states. // element states.
func tSpecialTagEnd(c context, s []byte) (context, int) { func tSpecialTagEnd(c context, s []byte) (context, int) {
if c.element != elementNone { if c.element != elementNone {
// script end tags ("</script") within script literals are ignored, so that
// we can properly escape them.
if c.element == elementScript && (isInScriptLiteral(c.state) || isComment(c.state)) {
return c, len(s)
}
if i := indexTagEnd(s, specialTagEndMarkers[c.element]); i != -1 { if i := indexTagEnd(s, specialTagEndMarkers[c.element]); i != -1 {
return context{}, i return context{}, i
} }
@ -263,7 +270,7 @@ func tURL(c context, s []byte) (context, int) {
// tJS is the context transition function for the JS state. // tJS is the context transition function for the JS state.
func tJS(c context, s []byte) (context, int) { func tJS(c context, s []byte) (context, int) {
i := bytes.IndexAny(s, "\"`'/") i := bytes.IndexAny(s, "\"`'/<-#")
if i == -1 { if i == -1 {
// Entire input is non string, comment, regexp tokens. // Entire input is non string, comment, regexp tokens.
c.jsCtx = nextJSCtx(s, c.jsCtx) c.jsCtx = nextJSCtx(s, c.jsCtx)
@ -293,6 +300,26 @@ func tJS(c context, s []byte) (context, int) {
err: errorf(ErrSlashAmbig, nil, 0, "'/' could start a division or regexp: %.32q", s[i:]), err: errorf(ErrSlashAmbig, nil, 0, "'/' could start a division or regexp: %.32q", s[i:]),
}, len(s) }, len(s)
} }
// ECMAScript supports HTML style comments for legacy reasons, see Appendix
// B.1.1 "HTML-like Comments". The handling of these comments is somewhat
// confusing. Multi-line comments are not supported, i.e. anything on lines
// between the opening and closing tokens is not considered a comment, but
// anything following the opening or closing token, on the same line, is
// ignored. As such we simply treat any line prefixed with "<!--" or "-->"
// as if it were actually prefixed with "//" and move on.
case '<':
if i+3 < len(s) && bytes.Equal(commentStart, s[i:i+4]) {
c.state, i = stateJSHTMLOpenCmt, i+3
}
case '-':
if i+2 < len(s) && bytes.Equal(commentEnd, s[i:i+3]) {
c.state, i = stateJSHTMLCloseCmt, i+2
}
// ECMAScript also supports "hashbang" comment lines, see Section 12.5.
case '#':
if i+1 < len(s) && s[i+1] == '!' {
c.state, i = stateJSLineCmt, i+1
}
default: default:
panic("unreachable") panic("unreachable")
} }
@ -331,6 +358,16 @@ func tJSDelimited(c context, s []byte) (context, int) {
inCharset = true inCharset = true
case ']': case ']':
inCharset = false inCharset = false
case '/':
// If "</script" appears in a regex literal, the '/' should not
// close the regex literal, and it will later be escaped to
// "\x3C/script" in escapeText.
if i > 0 && i+7 <= len(s) && bytes.Compare(bytes.ToLower(s[i-1:i+7]), []byte("</script")) == 0 {
i++
} else if !inCharset {
c.state, c.jsCtx = stateJS, jsCtxDivOp
return c, i + 1
}
default: default:
// end delimiter // end delimiter
if !inCharset { if !inCharset {
@ -372,12 +409,12 @@ func tBlockCmt(c context, s []byte) (context, int) {
return c, i + 2 return c, i + 2
} }
// tLineCmt is the context transition function for //comment states. // tLineCmt is the context transition function for //comment states, and the JS HTML-like comment state.
func tLineCmt(c context, s []byte) (context, int) { func tLineCmt(c context, s []byte) (context, int) {
var lineTerminators string var lineTerminators string
var endState state var endState state
switch c.state { switch c.state {
case stateJSLineCmt: case stateJSLineCmt, stateJSHTMLOpenCmt, stateJSHTMLCloseCmt:
lineTerminators, endState = "\n\r\u2028\u2029", stateJS lineTerminators, endState = "\n\r\u2028\u2029", stateJS
case stateCSSLineCmt: case stateCSSLineCmt:
lineTerminators, endState = "\n\f\r", stateCSS lineTerminators, endState = "\n\f\r", stateCSS