2018-10-17 07:48:55 -04:00
// Copyright 2018 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package pageparser
import (
"fmt"
2018-10-18 03:04:48 -04:00
"reflect"
2018-10-17 07:48:55 -04:00
"strings"
"testing"
)
type lexerTest struct {
name string
input string
items [ ] Item
}
2018-10-18 04:21:23 -04:00
func nti ( tp ItemType , val string ) Item {
2019-09-29 08:51:51 -04:00
return Item { tp , 0 , [ ] byte ( val ) , false }
2018-10-18 03:04:48 -04:00
}
2018-10-17 07:48:55 -04:00
var (
tstJSON = ` { "a": { "b": "\"Hugo\"}" } } `
2018-10-21 06:20:21 -04:00
tstFrontMatterTOML = nti ( TypeFrontMatterTOML , "foo = \"bar\"\n" )
tstFrontMatterYAML = nti ( TypeFrontMatterYAML , "foo: \"bar\"\n" )
tstFrontMatterYAMLCRLF = nti ( TypeFrontMatterYAML , "foo: \"bar\"\r\n" )
2018-10-18 04:21:23 -04:00
tstFrontMatterJSON = nti ( TypeFrontMatterJSON , tstJSON + "\r\n" )
2018-10-18 03:04:48 -04:00
tstSomeText = nti ( tText , "\nSome text.\n" )
2018-10-30 15:24:34 -04:00
tstSummaryDivider = nti ( TypeLeadSummaryDivider , "<!--more-->\n" )
2018-11-24 11:06:26 -05:00
tstNewline = nti ( tText , "\n" )
2018-10-17 07:48:55 -04:00
tstORG = `
# + TITLE : T1
# + AUTHOR : A1
# + DESCRIPTION : D1
`
2018-10-18 04:21:23 -04:00
tstFrontMatterORG = nti ( TypeFrontMatterORG , tstORG )
2018-10-17 07:48:55 -04:00
)
var crLfReplacer = strings . NewReplacer ( "\r" , "#" , "\n" , "$" )
// TODO(bep) a way to toggle ORG mode vs the rest.
var frontMatterTests = [ ] lexerTest {
{ "empty" , "" , [ ] Item { tstEOF } } ,
2018-10-18 04:21:23 -04:00
{ "Byte order mark" , "\ufeff\nSome text.\n" , [ ] Item { nti ( TypeIgnore , "\ufeff" ) , tstSomeText , tstEOF } } ,
2020-03-09 07:04:33 -04:00
{ "HTML Document" , ` <html> ` , [ ] Item { nti ( tError , "plain HTML documents not supported" ) } } ,
{ "HTML Document with shortcode" , ` <html> {{ < sc1 > }} </html> ` , [ ] Item { nti ( tError , "plain HTML documents not supported" ) } } ,
2018-10-18 04:21:23 -04:00
{ "No front matter" , "\nSome text.\n" , [ ] Item { tstSomeText , tstEOF } } ,
2018-10-17 07:48:55 -04:00
{ "YAML front matter" , "---\nfoo: \"bar\"\n---\n\nSome text.\n" , [ ] Item { tstFrontMatterYAML , tstSomeText , tstEOF } } ,
2018-10-21 06:20:21 -04:00
{ "YAML empty front matter" , "---\n---\n\nSome text.\n" , [ ] Item { nti ( TypeFrontMatterYAML , "" ) , tstSomeText , tstEOF } } ,
2018-11-28 04:21:54 -05:00
{ "YAML commented out front matter" , "<!--\n---\nfoo: \"bar\"\n---\n-->\nSome text.\n" , [ ] Item { nti ( TypeIgnore , "<!--\n" ) , tstFrontMatterYAML , nti ( TypeIgnore , "-->" ) , tstSomeText , tstEOF } } ,
{ "YAML commented out front matter, no end" , "<!--\n---\nfoo: \"bar\"\n---\nSome text.\n" , [ ] Item { nti ( TypeIgnore , "<!--\n" ) , tstFrontMatterYAML , nti ( tError , "starting HTML comment with no end" ) } } ,
2018-10-17 07:48:55 -04:00
// Note that we keep all bytes as they are, but we need to handle CRLF
{ "YAML front matter CRLF" , "---\r\nfoo: \"bar\"\r\n---\n\nSome text.\n" , [ ] Item { tstFrontMatterYAMLCRLF , tstSomeText , tstEOF } } ,
{ "TOML front matter" , "+++\nfoo = \"bar\"\n+++\n\nSome text.\n" , [ ] Item { tstFrontMatterTOML , tstSomeText , tstEOF } } ,
{ "JSON front matter" , tstJSON + "\r\n\nSome text.\n" , [ ] Item { tstFrontMatterJSON , tstSomeText , tstEOF } } ,
{ "ORG front matter" , tstORG + "\nSome text.\n" , [ ] Item { tstFrontMatterORG , tstSomeText , tstEOF } } ,
2018-10-30 15:24:34 -04:00
{ "Summary divider ORG" , tstORG + "\nSome text.\n# more\nSome text.\n" , [ ] Item { tstFrontMatterORG , tstSomeText , nti ( TypeLeadSummaryDivider , "# more\n" ) , nti ( tText , "Some text.\n" ) , tstEOF } } ,
{ "Summary divider" , "+++\nfoo = \"bar\"\n+++\n\nSome text.\n<!--more-->\nSome text.\n" , [ ] Item { tstFrontMatterTOML , tstSomeText , tstSummaryDivider , nti ( tText , "Some text.\n" ) , tstEOF } } ,
{ "Summary divider same line" , "+++\nfoo = \"bar\"\n+++\n\nSome text.<!--more-->Some text.\n" , [ ] Item { tstFrontMatterTOML , nti ( tText , "\nSome text." ) , nti ( TypeLeadSummaryDivider , "<!--more-->" ) , nti ( tText , "Some text.\n" ) , tstEOF } } ,
2018-11-05 07:30:16 -05:00
// https://github.com/gohugoio/hugo/issues/5402
{ "Summary and shortcode, no space" , "+++\nfoo = \"bar\"\n+++\n\nSome text.\n<!--more-->{{< sc1 >}}\nSome text.\n" , [ ] Item { tstFrontMatterTOML , tstSomeText , nti ( TypeLeadSummaryDivider , "<!--more-->" ) , tstLeftNoMD , tstSC1 , tstRightNoMD , tstSomeText , tstEOF } } ,
2018-11-24 11:06:26 -05:00
// https://github.com/gohugoio/hugo/issues/5464
{ "Summary and shortcode only" , "+++\nfoo = \"bar\"\n+++\n{{< sc1 >}}\n<!--more-->\n{{< sc2 >}}" , [ ] Item { tstFrontMatterTOML , tstLeftNoMD , tstSC1 , tstRightNoMD , tstNewline , tstSummaryDivider , tstLeftNoMD , tstSC2 , tstRightNoMD , tstEOF } } ,
2018-10-17 07:48:55 -04:00
}
func TestFrontMatter ( t * testing . T ) {
t . Parallel ( )
for i , test := range frontMatterTests {
2018-10-18 03:04:48 -04:00
items := collect ( [ ] byte ( test . input ) , false , lexIntroSection )
2018-10-17 07:48:55 -04:00
if ! equal ( items , test . items ) {
got := crLfReplacer . Replace ( fmt . Sprint ( items ) )
expected := crLfReplacer . Replace ( fmt . Sprint ( test . items ) )
t . Errorf ( "[%d] %s: got\n\t%v\nexpected\n\t%v" , i , test . name , got , expected )
}
}
}
Move the emoji parsing to pageparser
This avoids double parsing the page content when `enableEmoji=true`.
This commit also adds some general improvements to the parser, making it in general much faster:
```bash
benchmark old ns/op new ns/op delta
BenchmarkShortcodeLexer-4 90258 101730 +12.71%
BenchmarkParse-4 148940 15037 -89.90%
benchmark old allocs new allocs delta
BenchmarkShortcodeLexer-4 456 700 +53.51%
BenchmarkParse-4 28 33 +17.86%
benchmark old bytes new bytes delta
BenchmarkShortcodeLexer-4 69875 81014 +15.94%
BenchmarkParse-4 8128 8304 +2.17%
```
Running some site benchmarks with Emoji support turned on:
```bash
benchmark old ns/op new ns/op delta
BenchmarkSiteBuilding/TOML,num_langs=3,num_pages=5000,tags_per_page=5,shortcodes,render-4 924556797 818115620 -11.51%
benchmark old allocs new allocs delta
BenchmarkSiteBuilding/TOML,num_langs=3,num_pages=5000,tags_per_page=5,shortcodes,render-4 4112613 4133787 +0.51%
benchmark old bytes new bytes delta
BenchmarkSiteBuilding/TOML,num_langs=3,num_pages=5000,tags_per_page=5,shortcodes,render-4 426982864 424363832 -0.61%
```
Fixes #5534
2018-12-17 15:03:23 -05:00
func collectWithConfig ( input [ ] byte , skipFrontMatter bool , stateStart stateFunc , cfg Config ) ( items [ ] Item ) {
l := newPageLexer ( input , stateStart , cfg )
2018-10-17 07:48:55 -04:00
l . run ( )
2018-10-18 04:21:23 -04:00
t := l . newIterator ( )
2018-10-17 07:48:55 -04:00
for {
2018-10-18 04:21:23 -04:00
item := t . Next ( )
2018-10-17 07:48:55 -04:00
items = append ( items , item )
2018-10-19 05:30:57 -04:00
if item . Type == tEOF || item . Type == tError {
2018-10-17 07:48:55 -04:00
break
}
}
return
}
Move the emoji parsing to pageparser
This avoids double parsing the page content when `enableEmoji=true`.
This commit also adds some general improvements to the parser, making it in general much faster:
```bash
benchmark old ns/op new ns/op delta
BenchmarkShortcodeLexer-4 90258 101730 +12.71%
BenchmarkParse-4 148940 15037 -89.90%
benchmark old allocs new allocs delta
BenchmarkShortcodeLexer-4 456 700 +53.51%
BenchmarkParse-4 28 33 +17.86%
benchmark old bytes new bytes delta
BenchmarkShortcodeLexer-4 69875 81014 +15.94%
BenchmarkParse-4 8128 8304 +2.17%
```
Running some site benchmarks with Emoji support turned on:
```bash
benchmark old ns/op new ns/op delta
BenchmarkSiteBuilding/TOML,num_langs=3,num_pages=5000,tags_per_page=5,shortcodes,render-4 924556797 818115620 -11.51%
benchmark old allocs new allocs delta
BenchmarkSiteBuilding/TOML,num_langs=3,num_pages=5000,tags_per_page=5,shortcodes,render-4 4112613 4133787 +0.51%
benchmark old bytes new bytes delta
BenchmarkSiteBuilding/TOML,num_langs=3,num_pages=5000,tags_per_page=5,shortcodes,render-4 426982864 424363832 -0.61%
```
Fixes #5534
2018-12-17 15:03:23 -05:00
func collect ( input [ ] byte , skipFrontMatter bool , stateStart stateFunc ) ( items [ ] Item ) {
var cfg Config
return collectWithConfig ( input , skipFrontMatter , stateStart , cfg )
}
2018-10-17 07:48:55 -04:00
// no positional checking, for now ...
func equal ( i1 , i2 [ ] Item ) bool {
if len ( i1 ) != len ( i2 ) {
return false
}
for k := range i1 {
2018-10-19 05:30:57 -04:00
if i1 [ k ] . Type != i2 [ k ] . Type {
2018-10-17 07:48:55 -04:00
return false
}
2019-09-29 08:51:51 -04:00
2018-10-18 03:04:48 -04:00
if ! reflect . DeepEqual ( i1 [ k ] . Val , i2 [ k ] . Val ) {
2018-10-17 07:48:55 -04:00
return false
}
}
return true
}