hugo/helpers/content.go

789 lines
22 KiB
Go
Raw Normal View History

// Copyright 2019 The Hugo Authors. All rights reserved.
//
2015-11-23 22:16:36 -05:00
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
2015-11-23 22:16:36 -05:00
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package helpers implements general utility functions that work with
// and on content. The helper functions defined here lay down the
// foundation of how Hugo works with files and filepaths, and perform
// string operations on content.
package helpers
import (
"bytes"
"fmt"
"html/template"
"os/exec"
"runtime"
"unicode"
"unicode/utf8"
Add support for theme composition and inheritance This commit adds support for theme composition and inheritance in Hugo. With this, it helps thinking about a theme as a set of ordered components: ```toml theme = ["my-shortcodes", "base-theme", "hyde"] ``` The theme definition example above in `config.toml` creates a theme with the 3 components with presedence from left to right. So, Hugo will, for any given file, data entry etc., look first in the project, and then in `my-shortcode`, `base-theme` and lastly `hyde`. Hugo uses two different algorithms to merge the filesystems, depending on the file type: * For `i18n` and `data` files, Hugo merges deeply using the translation id and data key inside the files. * For `static`, `layouts` (templates) and `archetypes` files, these are merged on file level. So the left-most file will be chosen. The name used in the `theme` definition above must match a folder in `/your-site/themes`, e.g. `/your-site/themes/my-shortcodes`. There are plans to improve on this and get a URL scheme so this can be resolved automatically. Also note that a component that is part of a theme can have its own configuration file, e.g. `config.toml`. There are currently some restrictions to what a theme component can configure: * `params` (global and per language) * `menu` (global and per language) * `outputformats` and `mediatypes` The same rules apply here: The left-most param/menu etc. with the same ID will win. There are some hidden and experimental namespace support in the above, which we will work to improve in the future, but theme authors are encouraged to create their own namespaces to avoid naming conflicts. A final note: Themes/components can also have a `theme` definition in their `config.toml` and similar, which is the "inheritance" part of this commit's title. This is currently not supported by the Hugo theme site. We will have to wait for some "auto dependency" feature to be implemented for that to happen, but this can be a powerful feature if you want to create your own theme-variant based on others. Fixes #4460 Fixes #4450
2018-03-01 09:01:25 -05:00
"github.com/gohugoio/hugo/common/maps"
"github.com/chaseadamsio/goorgeous"
bp "github.com/gohugoio/hugo/bufferpool"
"github.com/gohugoio/hugo/config"
"github.com/miekg/mmark"
"github.com/mitchellh/mapstructure"
"github.com/russross/blackfriday"
jww "github.com/spf13/jwalterweatherman"
"strings"
)
// SummaryDivider denotes where content summarization should end. The default is "<!--more-->".
var SummaryDivider = []byte("<!--more-->")
var (
openingPTag = []byte("<p>")
closingPTag = []byte("</p>")
paragraphIndicator = []byte("<p")
)
// ContentSpec provides functionality to render markdown content.
type ContentSpec struct {
BlackFriday *BlackFriday
footnoteAnchorPrefix string
footnoteReturnLinkContents string
// SummaryLength is the length of the summary that Hugo extracts from a content.
summaryLength int
:sparkles: Implement Page bundling and image handling This commit is not the smallest in Hugo's history. Some hightlights include: * Page bundles (for complete articles, keeping images and content together etc.). * Bundled images can be processed in as many versions/sizes as you need with the three methods `Resize`, `Fill` and `Fit`. * Processed images are cached inside `resources/_gen/images` (default) in your project. * Symbolic links (both files and dirs) are now allowed anywhere inside /content * A new table based build summary * The "Total in nn ms" now reports the total including the handling of the files inside /static. So if it now reports more than you're used to, it is just **more real** and probably faster than before (see below). A site building benchmark run compared to `v0.31.1` shows that this should be slightly faster and use less memory: ```bash ▶ ./benchSite.sh "TOML,num_langs=.*,num_root_sections=5,num_pages=(500|1000),tags_per_page=5,shortcodes,render" benchmark old ns/op new ns/op delta BenchmarkSiteBuilding/TOML,num_langs=1,num_root_sections=5,num_pages=500,tags_per_page=5,shortcodes,render-4 101785785 78067944 -23.30% BenchmarkSiteBuilding/TOML,num_langs=1,num_root_sections=5,num_pages=1000,tags_per_page=5,shortcodes,render-4 185481057 149159919 -19.58% BenchmarkSiteBuilding/TOML,num_langs=3,num_root_sections=5,num_pages=500,tags_per_page=5,shortcodes,render-4 103149918 85679409 -16.94% BenchmarkSiteBuilding/TOML,num_langs=3,num_root_sections=5,num_pages=1000,tags_per_page=5,shortcodes,render-4 203515478 169208775 -16.86% benchmark old allocs new allocs delta BenchmarkSiteBuilding/TOML,num_langs=1,num_root_sections=5,num_pages=500,tags_per_page=5,shortcodes,render-4 532464 391539 -26.47% BenchmarkSiteBuilding/TOML,num_langs=1,num_root_sections=5,num_pages=1000,tags_per_page=5,shortcodes,render-4 1056549 772702 -26.87% BenchmarkSiteBuilding/TOML,num_langs=3,num_root_sections=5,num_pages=500,tags_per_page=5,shortcodes,render-4 555974 406630 -26.86% BenchmarkSiteBuilding/TOML,num_langs=3,num_root_sections=5,num_pages=1000,tags_per_page=5,shortcodes,render-4 1086545 789922 -27.30% benchmark old bytes new bytes delta BenchmarkSiteBuilding/TOML,num_langs=1,num_root_sections=5,num_pages=500,tags_per_page=5,shortcodes,render-4 53243246 43598155 -18.12% BenchmarkSiteBuilding/TOML,num_langs=1,num_root_sections=5,num_pages=1000,tags_per_page=5,shortcodes,render-4 105811617 86087116 -18.64% BenchmarkSiteBuilding/TOML,num_langs=3,num_root_sections=5,num_pages=500,tags_per_page=5,shortcodes,render-4 54558852 44545097 -18.35% BenchmarkSiteBuilding/TOML,num_langs=3,num_root_sections=5,num_pages=1000,tags_per_page=5,shortcodes,render-4 106903858 86978413 -18.64% ``` Fixes #3651 Closes #3158 Fixes #1014 Closes #2021 Fixes #1240 Updates #3757
2017-07-24 03:00:23 -04:00
BuildFuture bool
BuildExpired bool
BuildDrafts bool
Highlight func(code, lang, optsStr string) (string, error)
defatultPygmentsOpts map[string]string
Cfg config.Provider
}
// NewContentSpec returns a ContentSpec initialized
// with the appropriate fields from the given config.Provider.
func NewContentSpec(cfg config.Provider) (*ContentSpec, error) {
bf := newBlackfriday(cfg.GetStringMap("blackfriday"))
spec := &ContentSpec{
BlackFriday: bf,
footnoteAnchorPrefix: cfg.GetString("footnoteAnchorPrefix"),
footnoteReturnLinkContents: cfg.GetString("footnoteReturnLinkContents"),
summaryLength: cfg.GetInt("summaryLength"),
:sparkles: Implement Page bundling and image handling This commit is not the smallest in Hugo's history. Some hightlights include: * Page bundles (for complete articles, keeping images and content together etc.). * Bundled images can be processed in as many versions/sizes as you need with the three methods `Resize`, `Fill` and `Fit`. * Processed images are cached inside `resources/_gen/images` (default) in your project. * Symbolic links (both files and dirs) are now allowed anywhere inside /content * A new table based build summary * The "Total in nn ms" now reports the total including the handling of the files inside /static. So if it now reports more than you're used to, it is just **more real** and probably faster than before (see below). A site building benchmark run compared to `v0.31.1` shows that this should be slightly faster and use less memory: ```bash ▶ ./benchSite.sh "TOML,num_langs=.*,num_root_sections=5,num_pages=(500|1000),tags_per_page=5,shortcodes,render" benchmark old ns/op new ns/op delta BenchmarkSiteBuilding/TOML,num_langs=1,num_root_sections=5,num_pages=500,tags_per_page=5,shortcodes,render-4 101785785 78067944 -23.30% BenchmarkSiteBuilding/TOML,num_langs=1,num_root_sections=5,num_pages=1000,tags_per_page=5,shortcodes,render-4 185481057 149159919 -19.58% BenchmarkSiteBuilding/TOML,num_langs=3,num_root_sections=5,num_pages=500,tags_per_page=5,shortcodes,render-4 103149918 85679409 -16.94% BenchmarkSiteBuilding/TOML,num_langs=3,num_root_sections=5,num_pages=1000,tags_per_page=5,shortcodes,render-4 203515478 169208775 -16.86% benchmark old allocs new allocs delta BenchmarkSiteBuilding/TOML,num_langs=1,num_root_sections=5,num_pages=500,tags_per_page=5,shortcodes,render-4 532464 391539 -26.47% BenchmarkSiteBuilding/TOML,num_langs=1,num_root_sections=5,num_pages=1000,tags_per_page=5,shortcodes,render-4 1056549 772702 -26.87% BenchmarkSiteBuilding/TOML,num_langs=3,num_root_sections=5,num_pages=500,tags_per_page=5,shortcodes,render-4 555974 406630 -26.86% BenchmarkSiteBuilding/TOML,num_langs=3,num_root_sections=5,num_pages=1000,tags_per_page=5,shortcodes,render-4 1086545 789922 -27.30% benchmark old bytes new bytes delta BenchmarkSiteBuilding/TOML,num_langs=1,num_root_sections=5,num_pages=500,tags_per_page=5,shortcodes,render-4 53243246 43598155 -18.12% BenchmarkSiteBuilding/TOML,num_langs=1,num_root_sections=5,num_pages=1000,tags_per_page=5,shortcodes,render-4 105811617 86087116 -18.64% BenchmarkSiteBuilding/TOML,num_langs=3,num_root_sections=5,num_pages=500,tags_per_page=5,shortcodes,render-4 54558852 44545097 -18.35% BenchmarkSiteBuilding/TOML,num_langs=3,num_root_sections=5,num_pages=1000,tags_per_page=5,shortcodes,render-4 106903858 86978413 -18.64% ``` Fixes #3651 Closes #3158 Fixes #1014 Closes #2021 Fixes #1240 Updates #3757
2017-07-24 03:00:23 -04:00
BuildFuture: cfg.GetBool("buildFuture"),
BuildExpired: cfg.GetBool("buildExpired"),
BuildDrafts: cfg.GetBool("buildDrafts"),
Cfg: cfg,
}
// Highlighting setup
options, err := parseDefaultPygmentsOpts(cfg)
if err != nil {
return nil, err
}
spec.defatultPygmentsOpts = options
// Use the Pygmentize on path if present
useClassic := false
h := newHiglighters(spec)
if cfg.GetBool("pygmentsUseClassic") {
if !hasPygments() {
jww.WARN.Println("Highlighting with pygmentsUseClassic set requires Pygments to be installed and in the path")
} else {
useClassic = true
}
}
if useClassic {
spec.Highlight = h.pygmentsHighlight
} else {
spec.Highlight = h.chromaHighlight
}
return spec, nil
}
// BlackFriday holds configuration values for BlackFriday rendering.
type BlackFriday struct {
2017-08-02 15:37:03 -04:00
Smartypants bool
SmartypantsQuotesNBSP bool
AngledQuotes bool
Fractions bool
HrefTargetBlank bool
NofollowLinks bool
NoreferrerLinks bool
2017-08-02 15:37:03 -04:00
SmartDashes bool
LatexDashes bool
TaskLists bool
PlainIDAnchors bool
Extensions []string
ExtensionsMask []string
SkipHTML bool
}
// NewBlackfriday creates a new Blackfriday filled with site config or some sane defaults.
func newBlackfriday(config map[string]interface{}) *BlackFriday {
defaultParam := map[string]interface{}{
2017-08-02 15:37:03 -04:00
"smartypants": true,
"angledQuotes": false,
"smartypantsQuotesNBSP": false,
"fractions": true,
"hrefTargetBlank": false,
"nofollowLinks": false,
"noreferrerLinks": false,
2017-08-02 15:37:03 -04:00
"smartDashes": true,
"latexDashes": true,
"plainIDAnchors": true,
"taskLists": true,
"skipHTML": false,
2015-01-31 12:24:00 -05:00
}
Add support for theme composition and inheritance This commit adds support for theme composition and inheritance in Hugo. With this, it helps thinking about a theme as a set of ordered components: ```toml theme = ["my-shortcodes", "base-theme", "hyde"] ``` The theme definition example above in `config.toml` creates a theme with the 3 components with presedence from left to right. So, Hugo will, for any given file, data entry etc., look first in the project, and then in `my-shortcode`, `base-theme` and lastly `hyde`. Hugo uses two different algorithms to merge the filesystems, depending on the file type: * For `i18n` and `data` files, Hugo merges deeply using the translation id and data key inside the files. * For `static`, `layouts` (templates) and `archetypes` files, these are merged on file level. So the left-most file will be chosen. The name used in the `theme` definition above must match a folder in `/your-site/themes`, e.g. `/your-site/themes/my-shortcodes`. There are plans to improve on this and get a URL scheme so this can be resolved automatically. Also note that a component that is part of a theme can have its own configuration file, e.g. `config.toml`. There are currently some restrictions to what a theme component can configure: * `params` (global and per language) * `menu` (global and per language) * `outputformats` and `mediatypes` The same rules apply here: The left-most param/menu etc. with the same ID will win. There are some hidden and experimental namespace support in the above, which we will work to improve in the future, but theme authors are encouraged to create their own namespaces to avoid naming conflicts. A final note: Themes/components can also have a `theme` definition in their `config.toml` and similar, which is the "inheritance" part of this commit's title. This is currently not supported by the Hugo theme site. We will have to wait for some "auto dependency" feature to be implemented for that to happen, but this can be a powerful feature if you want to create your own theme-variant based on others. Fixes #4460 Fixes #4450
2018-03-01 09:01:25 -05:00
maps.ToLower(defaultParam)
siteConfig := make(map[string]interface{})
for k, v := range defaultParam {
siteConfig[k] = v
}
2019-03-24 05:11:16 -04:00
for k, v := range config {
siteConfig[k] = v
}
combinedConfig := &BlackFriday{}
if err := mapstructure.Decode(siteConfig, combinedConfig); err != nil {
jww.FATAL.Printf("Failed to get site rendering config\n%s", err.Error())
}
return combinedConfig
2015-01-31 12:24:00 -05:00
}
var blackfridayExtensionMap = map[string]int{
"noIntraEmphasis": blackfriday.EXTENSION_NO_INTRA_EMPHASIS,
"tables": blackfriday.EXTENSION_TABLES,
"fencedCode": blackfriday.EXTENSION_FENCED_CODE,
"autolink": blackfriday.EXTENSION_AUTOLINK,
"strikethrough": blackfriday.EXTENSION_STRIKETHROUGH,
"laxHtmlBlocks": blackfriday.EXTENSION_LAX_HTML_BLOCKS,
"spaceHeaders": blackfriday.EXTENSION_SPACE_HEADERS,
"hardLineBreak": blackfriday.EXTENSION_HARD_LINE_BREAK,
"tabSizeEight": blackfriday.EXTENSION_TAB_SIZE_EIGHT,
"footnotes": blackfriday.EXTENSION_FOOTNOTES,
"noEmptyLineBeforeBlock": blackfriday.EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK,
"headerIds": blackfriday.EXTENSION_HEADER_IDS,
"titleblock": blackfriday.EXTENSION_TITLEBLOCK,
"autoHeaderIds": blackfriday.EXTENSION_AUTO_HEADER_IDS,
"backslashLineBreak": blackfriday.EXTENSION_BACKSLASH_LINE_BREAK,
"definitionLists": blackfriday.EXTENSION_DEFINITION_LISTS,
"joinLines": blackfriday.EXTENSION_JOIN_LINES,
}
var stripHTMLReplacer = strings.NewReplacer("\n", " ", "</p>", "\n", "<br>", "\n", "<br />", "\n")
var mmarkExtensionMap = map[string]int{
"tables": mmark.EXTENSION_TABLES,
"fencedCode": mmark.EXTENSION_FENCED_CODE,
"autolink": mmark.EXTENSION_AUTOLINK,
"laxHtmlBlocks": mmark.EXTENSION_LAX_HTML_BLOCKS,
"spaceHeaders": mmark.EXTENSION_SPACE_HEADERS,
"hardLineBreak": mmark.EXTENSION_HARD_LINE_BREAK,
"footnotes": mmark.EXTENSION_FOOTNOTES,
"noEmptyLineBeforeBlock": mmark.EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK,
"headerIds": mmark.EXTENSION_HEADER_IDS,
"autoHeaderIds": mmark.EXTENSION_AUTO_HEADER_IDS,
}
// StripHTML accepts a string, strips out all HTML tags and returns it.
func StripHTML(s string) string {
// Shortcut strings with no tags in them
if !strings.ContainsAny(s, "<>") {
return s
2015-03-06 12:07:50 -05:00
}
s = stripHTMLReplacer.Replace(s)
// Walk through the string removing all tags
b := bp.GetBuffer()
defer bp.PutBuffer(b)
var inTag, isSpace, wasSpace bool
2015-03-06 12:07:50 -05:00
for _, r := range s {
if !inTag {
isSpace = false
}
switch {
case r == '<':
2015-03-06 12:07:50 -05:00
inTag = true
case r == '>':
2015-03-06 12:07:50 -05:00
inTag = false
case unicode.IsSpace(r):
isSpace = true
fallthrough
2015-03-06 12:07:50 -05:00
default:
if !inTag && (!isSpace || (isSpace && !wasSpace)) {
2015-03-06 12:07:50 -05:00
b.WriteRune(r)
}
}
wasSpace = isSpace
}
2015-03-06 12:07:50 -05:00
return b.String()
}
2016-03-14 12:27:15 -04:00
// stripEmptyNav strips out empty <nav> tags from content.
func stripEmptyNav(in []byte) []byte {
return bytes.Replace(in, []byte("<nav>\n</nav>\n\n"), []byte(``), -1)
}
// BytesToHTML converts bytes to type template.HTML.
func BytesToHTML(b []byte) template.HTML {
return template.HTML(string(b))
}
// getHTMLRenderer creates a new Blackfriday HTML Renderer with the given configuration.
func (c *ContentSpec) getHTMLRenderer(defaultFlags int, ctx *RenderingContext) blackfriday.Renderer {
renderParameters := blackfriday.HtmlRendererParameters{
FootnoteAnchorPrefix: c.footnoteAnchorPrefix,
FootnoteReturnLinkContents: c.footnoteReturnLinkContents,
}
b := len(ctx.DocumentID) != 0
2015-01-05 14:00:56 -05:00
if ctx.Config == nil {
panic(fmt.Sprintf("RenderingContext of %q doesn't have a config", ctx.DocumentID))
}
if b && !ctx.Config.PlainIDAnchors {
renderParameters.FootnoteAnchorPrefix = ctx.DocumentID + ":" + renderParameters.FootnoteAnchorPrefix
renderParameters.HeaderIDSuffix = ":" + ctx.DocumentID
}
htmlFlags := defaultFlags
htmlFlags |= blackfriday.HTML_USE_XHTML
htmlFlags |= blackfriday.HTML_FOOTNOTE_RETURN_LINKS
if ctx.Config.Smartypants {
htmlFlags |= blackfriday.HTML_USE_SMARTYPANTS
}
if ctx.Config.SmartypantsQuotesNBSP {
htmlFlags |= blackfriday.HTML_SMARTYPANTS_QUOTES_NBSP
}
if ctx.Config.AngledQuotes {
htmlFlags |= blackfriday.HTML_SMARTYPANTS_ANGLED_QUOTES
}
if ctx.Config.Fractions {
2015-01-31 12:24:00 -05:00
htmlFlags |= blackfriday.HTML_SMARTYPANTS_FRACTIONS
}
if ctx.Config.HrefTargetBlank {
htmlFlags |= blackfriday.HTML_HREF_TARGET_BLANK
}
if ctx.Config.NofollowLinks {
htmlFlags |= blackfriday.HTML_NOFOLLOW_LINKS
}
if ctx.Config.NoreferrerLinks {
htmlFlags |= blackfriday.HTML_NOREFERRER_LINKS
}
if ctx.Config.SmartDashes {
htmlFlags |= blackfriday.HTML_SMARTYPANTS_DASHES
}
if ctx.Config.LatexDashes {
htmlFlags |= blackfriday.HTML_SMARTYPANTS_LATEX_DASHES
}
if ctx.Config.SkipHTML {
htmlFlags |= blackfriday.HTML_SKIP_HTML
}
2016-03-24 18:16:18 -04:00
return &HugoHTMLRenderer{
cs: c,
RenderingContext: ctx,
Renderer: blackfriday.HtmlRendererWithParameters(htmlFlags, "", "", renderParameters),
}
}
2015-03-06 12:07:50 -05:00
func getMarkdownExtensions(ctx *RenderingContext) int {
// Default Blackfriday common extensions
commonExtensions := 0 |
blackfriday.EXTENSION_NO_INTRA_EMPHASIS |
blackfriday.EXTENSION_TABLES |
blackfriday.EXTENSION_FENCED_CODE |
blackfriday.EXTENSION_AUTOLINK |
blackfriday.EXTENSION_STRIKETHROUGH |
blackfriday.EXTENSION_SPACE_HEADERS |
blackfriday.EXTENSION_HEADER_IDS |
blackfriday.EXTENSION_BACKSLASH_LINE_BREAK |
blackfriday.EXTENSION_DEFINITION_LISTS
// Extra Blackfriday extensions that Hugo enables by default
flags := commonExtensions |
blackfriday.EXTENSION_AUTO_HEADER_IDS |
blackfriday.EXTENSION_FOOTNOTES
if ctx.Config == nil {
panic(fmt.Sprintf("RenderingContext of %q doesn't have a config", ctx.DocumentID))
}
for _, extension := range ctx.Config.Extensions {
if flag, ok := blackfridayExtensionMap[extension]; ok {
flags |= flag
}
}
for _, extension := range ctx.Config.ExtensionsMask {
2015-04-15 21:54:10 -04:00
if flag, ok := blackfridayExtensionMap[extension]; ok {
flags &= ^flag
}
}
return flags
}
func (c ContentSpec) markdownRender(ctx *RenderingContext) []byte {
2016-07-10 05:36:25 -04:00
if ctx.RenderTOC {
return blackfriday.Markdown(ctx.Content,
c.getHTMLRenderer(blackfriday.HTML_TOC, ctx),
2016-07-10 05:36:25 -04:00
getMarkdownExtensions(ctx))
}
return blackfriday.Markdown(ctx.Content, c.getHTMLRenderer(0, ctx),
2015-03-06 12:07:50 -05:00
getMarkdownExtensions(ctx))
}
// getMmarkHTMLRenderer creates a new mmark HTML Renderer with the given configuration.
func (c *ContentSpec) getMmarkHTMLRenderer(defaultFlags int, ctx *RenderingContext) mmark.Renderer {
renderParameters := mmark.HtmlRendererParameters{
FootnoteAnchorPrefix: c.footnoteAnchorPrefix,
FootnoteReturnLinkContents: c.footnoteReturnLinkContents,
}
b := len(ctx.DocumentID) != 0
if ctx.Config == nil {
panic(fmt.Sprintf("RenderingContext of %q doesn't have a config", ctx.DocumentID))
}
if b && !ctx.Config.PlainIDAnchors {
renderParameters.FootnoteAnchorPrefix = ctx.DocumentID + ":" + renderParameters.FootnoteAnchorPrefix
// renderParameters.HeaderIDSuffix = ":" + ctx.DocumentId
}
htmlFlags := defaultFlags
htmlFlags |= mmark.HTML_FOOTNOTE_RETURN_LINKS
2016-03-24 18:16:18 -04:00
return &HugoMmarkHTMLRenderer{
cs: c,
Renderer: mmark.HtmlRendererWithParameters(htmlFlags, "", "", renderParameters),
Cfg: c.Cfg,
}
}
2016-03-14 12:27:15 -04:00
func getMmarkExtensions(ctx *RenderingContext) int {
flags := 0
flags |= mmark.EXTENSION_TABLES
flags |= mmark.EXTENSION_FENCED_CODE
flags |= mmark.EXTENSION_AUTOLINK
flags |= mmark.EXTENSION_SPACE_HEADERS
flags |= mmark.EXTENSION_CITATION
flags |= mmark.EXTENSION_TITLEBLOCK_TOML
flags |= mmark.EXTENSION_HEADER_IDS
flags |= mmark.EXTENSION_AUTO_HEADER_IDS
flags |= mmark.EXTENSION_UNIQUE_HEADER_IDS
flags |= mmark.EXTENSION_FOOTNOTES
flags |= mmark.EXTENSION_SHORT_REF
flags |= mmark.EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK
flags |= mmark.EXTENSION_INCLUDE
if ctx.Config == nil {
panic(fmt.Sprintf("RenderingContext of %q doesn't have a config", ctx.DocumentID))
}
for _, extension := range ctx.Config.Extensions {
if flag, ok := mmarkExtensionMap[extension]; ok {
flags |= flag
}
}
return flags
}
func (c ContentSpec) mmarkRender(ctx *RenderingContext) []byte {
return mmark.Parse(ctx.Content, c.getMmarkHTMLRenderer(0, ctx),
2016-03-14 12:27:15 -04:00
getMmarkExtensions(ctx)).Bytes()
}
// ExtractTOC extracts Table of Contents from content.
func ExtractTOC(content []byte) (newcontent []byte, toc []byte) {
if !bytes.Contains(content, []byte("<nav>")) {
return content, nil
}
origContent := make([]byte, len(content))
copy(origContent, content)
first := []byte(`<nav>
<ul>`)
last := []byte(`</ul>
</nav>`)
replacement := []byte(`<nav id="TableOfContents">
<ul>`)
startOfTOC := bytes.Index(content, first)
peekEnd := len(content)
if peekEnd > 70+startOfTOC {
peekEnd = 70 + startOfTOC
}
if startOfTOC < 0 {
2016-03-14 12:27:15 -04:00
return stripEmptyNav(content), toc
}
// Need to peek ahead to see if this nav element is actually the right one.
correctNav := bytes.Index(content[startOfTOC:peekEnd], []byte(`<li><a href="#`))
if correctNav < 0 { // no match found
return content, toc
}
lengthOfTOC := bytes.Index(content[startOfTOC:], last) + len(last)
endOfTOC := startOfTOC + lengthOfTOC
newcontent = append(content[:startOfTOC], content[endOfTOC:]...)
toc = append(replacement, origContent[startOfTOC+len(first):endOfTOC]...)
return
}
2015-03-06 12:07:50 -05:00
// RenderingContext holds contextual information, like content and configuration,
// for a given content rendering.
// By creating you must set the Config, otherwise it will panic.
type RenderingContext struct {
Content []byte
PageFmt string
DocumentID string
DocumentName string
Config *BlackFriday
RenderTOC bool
Cfg config.Provider
}
2015-03-06 12:07:50 -05:00
// RenderBytes renders a []byte.
func (c ContentSpec) RenderBytes(ctx *RenderingContext) []byte {
switch ctx.PageFmt {
default:
return c.markdownRender(ctx)
case "markdown":
return c.markdownRender(ctx)
Experimental AsciiDoc support with external helpers See #470 * Based on existing support for reStructuredText files * Handles content files with extensions `.asciidoc` and `.ad` * Pipes content through `asciidoctor --safe -`. If `asciidoctor` is not installed, then `asciidoc --safe -`. * To make sure `asciidoctor` or `asciidoc` is found, after adding a piece of AsciiDoc content, run `hugo` with the `-v` flag and look for this message: INFO: 2015/01/23 Rendering with /usr/bin/asciidoctor ... Caveats: * The final "Last updated" timestamp is currently not stripped. * When `hugo` is run with `-v`, you may see a lot of these messages INFO: 2015/01/23 Rendering with /usr/bin/asciidoctor ... if you have lots of `*.ad`, `*.adoc` or `*.asciidoc` files. * Some versions of `asciidoc` may have trouble with its safe mode. To test if you are affected, try this: $ echo "Hello" | asciidoc --safe - asciidoc: ERROR: unsafe: ifeval invalid asciidoc: FAILED: ifeval invalid safe document If so, I recommend that you install `asciidoctor` instead. Feedback and patches welcome! Ideally, we should be using https://github.com/VonC/asciidocgo, @VonC's wonderful Go implementation of Asciidoctor. However, there is still a bit of work needed for asciidocgo to expose its API so that Hugo can actually use it. Until then, hope this "experimental AsciiDoc support through external helpers" can serve as a stopgap solution for our community. :-) 2015-01-30: Updated for the replaceShortcodeTokens() syntax change 2015-02-21: Add `.adoc` extension as suggested by @Fale Conflicts: helpers/content.go
2015-01-23 13:59:14 -05:00
case "asciidoc":
return getAsciidocContent(ctx)
case "mmark":
return c.mmarkRender(ctx)
case "rst":
return getRstContent(ctx)
case "org":
return orgRender(ctx, c)
case "pandoc":
return getPandocContent(ctx)
}
}
// TotalWords counts instance of one or more consecutive white space
// characters, as defined by unicode.IsSpace, in s.
// This is a cheaper way of word counting than the obvious len(strings.Fields(s)).
func TotalWords(s string) int {
n := 0
inWord := false
for _, r := range s {
wasInWord := inWord
inWord = !unicode.IsSpace(r)
if inWord && !wasInWord {
n++
}
}
return n
}
// Old implementation only kept for benchmark comparison.
// TODO(bep) remove
func totalWordsOld(s string) int {
return len(strings.Fields(s))
}
// TruncateWordsByRune truncates words by runes.
func (c *ContentSpec) TruncateWordsByRune(in []string) (string, bool) {
words := make([]string, len(in))
copy(words, in)
2015-09-03 06:22:20 -04:00
count := 0
for index, word := range words {
if count >= c.summaryLength {
return strings.Join(words[:index], " "), true
}
2015-09-03 06:22:20 -04:00
runeCount := utf8.RuneCountInString(word)
if len(word) == runeCount {
count++
} else if count+runeCount < c.summaryLength {
count += runeCount
2015-09-03 06:22:20 -04:00
} else {
2016-02-06 06:29:13 -05:00
for ri := range word {
if count >= c.summaryLength {
truncatedWords := append(words[:index], word[:ri])
return strings.Join(truncatedWords, " "), true
2015-09-03 06:22:20 -04:00
}
count++
2015-09-03 06:22:20 -04:00
}
}
}
return strings.Join(words, " "), false
}
// TruncateWordsToWholeSentence takes content and truncates to whole sentence
// limited by max number of words. It also returns whether it is truncated.
func (c *ContentSpec) TruncateWordsToWholeSentence(s string) (string, bool) {
var (
wordCount = 0
lastWordIndex = -1
)
for i, r := range s {
if unicode.IsSpace(r) {
wordCount++
lastWordIndex = i
if wordCount >= c.summaryLength {
break
}
}
}
if lastWordIndex == -1 {
return s, false
}
endIndex := -1
for j, r := range s[lastWordIndex:] {
if isEndOfSentence(r) {
endIndex = j + lastWordIndex + utf8.RuneLen(r)
break
}
}
if endIndex == -1 {
return s, false
}
return strings.TrimSpace(s[:endIndex]), endIndex < len(s)
}
// TrimShortHTML removes the <p>/</p> tags from HTML input in the situation
// where said tags are the only <p> tags in the input and enclose the content
// of the input (whitespace excluded).
func (c *ContentSpec) TrimShortHTML(input []byte) []byte {
first := bytes.Index(input, paragraphIndicator)
last := bytes.LastIndex(input, paragraphIndicator)
if first == last {
input = bytes.TrimSpace(input)
input = bytes.TrimPrefix(input, openingPTag)
input = bytes.TrimSuffix(input, closingPTag)
input = bytes.TrimSpace(input)
}
return input
}
func isEndOfSentence(r rune) bool {
return r == '.' || r == '?' || r == '!' || r == '"' || r == '\n'
}
// Kept only for benchmark.
func (c *ContentSpec) truncateWordsToWholeSentenceOld(content string) (string, bool) {
words := strings.Fields(content)
if c.summaryLength >= len(words) {
return strings.Join(words, " "), false
}
for counter, word := range words[c.summaryLength:] {
if strings.HasSuffix(word, ".") ||
strings.HasSuffix(word, "?") ||
strings.HasSuffix(word, ".\"") ||
strings.HasSuffix(word, "!") {
upper := c.summaryLength + counter + 1
return strings.Join(words[:upper], " "), (upper < len(words))
}
}
return strings.Join(words[:c.summaryLength], " "), true
}
func getAsciidocExecPath() string {
path, err := exec.LookPath("asciidoc")
Experimental AsciiDoc support with external helpers See #470 * Based on existing support for reStructuredText files * Handles content files with extensions `.asciidoc` and `.ad` * Pipes content through `asciidoctor --safe -`. If `asciidoctor` is not installed, then `asciidoc --safe -`. * To make sure `asciidoctor` or `asciidoc` is found, after adding a piece of AsciiDoc content, run `hugo` with the `-v` flag and look for this message: INFO: 2015/01/23 Rendering with /usr/bin/asciidoctor ... Caveats: * The final "Last updated" timestamp is currently not stripped. * When `hugo` is run with `-v`, you may see a lot of these messages INFO: 2015/01/23 Rendering with /usr/bin/asciidoctor ... if you have lots of `*.ad`, `*.adoc` or `*.asciidoc` files. * Some versions of `asciidoc` may have trouble with its safe mode. To test if you are affected, try this: $ echo "Hello" | asciidoc --safe - asciidoc: ERROR: unsafe: ifeval invalid asciidoc: FAILED: ifeval invalid safe document If so, I recommend that you install `asciidoctor` instead. Feedback and patches welcome! Ideally, we should be using https://github.com/VonC/asciidocgo, @VonC's wonderful Go implementation of Asciidoctor. However, there is still a bit of work needed for asciidocgo to expose its API so that Hugo can actually use it. Until then, hope this "experimental AsciiDoc support through external helpers" can serve as a stopgap solution for our community. :-) 2015-01-30: Updated for the replaceShortcodeTokens() syntax change 2015-02-21: Add `.adoc` extension as suggested by @Fale Conflicts: helpers/content.go
2015-01-23 13:59:14 -05:00
if err != nil {
return ""
Experimental AsciiDoc support with external helpers See #470 * Based on existing support for reStructuredText files * Handles content files with extensions `.asciidoc` and `.ad` * Pipes content through `asciidoctor --safe -`. If `asciidoctor` is not installed, then `asciidoc --safe -`. * To make sure `asciidoctor` or `asciidoc` is found, after adding a piece of AsciiDoc content, run `hugo` with the `-v` flag and look for this message: INFO: 2015/01/23 Rendering with /usr/bin/asciidoctor ... Caveats: * The final "Last updated" timestamp is currently not stripped. * When `hugo` is run with `-v`, you may see a lot of these messages INFO: 2015/01/23 Rendering with /usr/bin/asciidoctor ... if you have lots of `*.ad`, `*.adoc` or `*.asciidoc` files. * Some versions of `asciidoc` may have trouble with its safe mode. To test if you are affected, try this: $ echo "Hello" | asciidoc --safe - asciidoc: ERROR: unsafe: ifeval invalid asciidoc: FAILED: ifeval invalid safe document If so, I recommend that you install `asciidoctor` instead. Feedback and patches welcome! Ideally, we should be using https://github.com/VonC/asciidocgo, @VonC's wonderful Go implementation of Asciidoctor. However, there is still a bit of work needed for asciidocgo to expose its API so that Hugo can actually use it. Until then, hope this "experimental AsciiDoc support through external helpers" can serve as a stopgap solution for our community. :-) 2015-01-30: Updated for the replaceShortcodeTokens() syntax change 2015-02-21: Add `.adoc` extension as suggested by @Fale Conflicts: helpers/content.go
2015-01-23 13:59:14 -05:00
}
return path
}
func getAsciidoctorExecPath() string {
path, err := exec.LookPath("asciidoctor")
if err != nil {
return ""
}
return path
}
// HasAsciidoc returns whether Asciidoc or Asciidoctor is installed on this computer.
func HasAsciidoc() bool {
return (getAsciidoctorExecPath() != "" ||
getAsciidocExecPath() != "")
}
// getAsciidocContent calls asciidoctor or asciidoc as an external helper
// to convert AsciiDoc content to HTML.
func getAsciidocContent(ctx *RenderingContext) []byte {
var isAsciidoctor bool
path := getAsciidoctorExecPath()
if path == "" {
path = getAsciidocExecPath()
if path == "" {
jww.ERROR.Println("asciidoctor / asciidoc not found in $PATH: Please install.\n",
" Leaving AsciiDoc content unrendered.")
return ctx.Content
}
} else {
isAsciidoctor = true
}
Experimental AsciiDoc support with external helpers See #470 * Based on existing support for reStructuredText files * Handles content files with extensions `.asciidoc` and `.ad` * Pipes content through `asciidoctor --safe -`. If `asciidoctor` is not installed, then `asciidoc --safe -`. * To make sure `asciidoctor` or `asciidoc` is found, after adding a piece of AsciiDoc content, run `hugo` with the `-v` flag and look for this message: INFO: 2015/01/23 Rendering with /usr/bin/asciidoctor ... Caveats: * The final "Last updated" timestamp is currently not stripped. * When `hugo` is run with `-v`, you may see a lot of these messages INFO: 2015/01/23 Rendering with /usr/bin/asciidoctor ... if you have lots of `*.ad`, `*.adoc` or `*.asciidoc` files. * Some versions of `asciidoc` may have trouble with its safe mode. To test if you are affected, try this: $ echo "Hello" | asciidoc --safe - asciidoc: ERROR: unsafe: ifeval invalid asciidoc: FAILED: ifeval invalid safe document If so, I recommend that you install `asciidoctor` instead. Feedback and patches welcome! Ideally, we should be using https://github.com/VonC/asciidocgo, @VonC's wonderful Go implementation of Asciidoctor. However, there is still a bit of work needed for asciidocgo to expose its API so that Hugo can actually use it. Until then, hope this "experimental AsciiDoc support through external helpers" can serve as a stopgap solution for our community. :-) 2015-01-30: Updated for the replaceShortcodeTokens() syntax change 2015-02-21: Add `.adoc` extension as suggested by @Fale Conflicts: helpers/content.go
2015-01-23 13:59:14 -05:00
jww.INFO.Println("Rendering", ctx.DocumentName, "with", path, "...")
args := []string{"--no-header-footer", "--safe"}
if isAsciidoctor {
// asciidoctor-specific arg to show stack traces on errors
args = append(args, "--trace")
}
args = append(args, "-")
return externallyRenderContent(ctx, path, args)
Experimental AsciiDoc support with external helpers See #470 * Based on existing support for reStructuredText files * Handles content files with extensions `.asciidoc` and `.ad` * Pipes content through `asciidoctor --safe -`. If `asciidoctor` is not installed, then `asciidoc --safe -`. * To make sure `asciidoctor` or `asciidoc` is found, after adding a piece of AsciiDoc content, run `hugo` with the `-v` flag and look for this message: INFO: 2015/01/23 Rendering with /usr/bin/asciidoctor ... Caveats: * The final "Last updated" timestamp is currently not stripped. * When `hugo` is run with `-v`, you may see a lot of these messages INFO: 2015/01/23 Rendering with /usr/bin/asciidoctor ... if you have lots of `*.ad`, `*.adoc` or `*.asciidoc` files. * Some versions of `asciidoc` may have trouble with its safe mode. To test if you are affected, try this: $ echo "Hello" | asciidoc --safe - asciidoc: ERROR: unsafe: ifeval invalid asciidoc: FAILED: ifeval invalid safe document If so, I recommend that you install `asciidoctor` instead. Feedback and patches welcome! Ideally, we should be using https://github.com/VonC/asciidocgo, @VonC's wonderful Go implementation of Asciidoctor. However, there is still a bit of work needed for asciidocgo to expose its API so that Hugo can actually use it. Until then, hope this "experimental AsciiDoc support through external helpers" can serve as a stopgap solution for our community. :-) 2015-01-30: Updated for the replaceShortcodeTokens() syntax change 2015-02-21: Add `.adoc` extension as suggested by @Fale Conflicts: helpers/content.go
2015-01-23 13:59:14 -05:00
}
2016-07-04 04:49:20 -04:00
// HasRst returns whether rst2html is installed on this computer.
func HasRst() bool {
return getRstExecPath() != ""
}
2016-07-04 04:49:20 -04:00
func getRstExecPath() string {
path, err := exec.LookPath("rst2html")
if err != nil {
path, err = exec.LookPath("rst2html.py")
if err != nil {
2016-07-04 04:49:20 -04:00
return ""
}
}
2016-07-04 04:49:20 -04:00
return path
}
func getPythonExecPath() string {
path, err := exec.LookPath("python")
if err != nil {
path, err = exec.LookPath("python.exe")
if err != nil {
return ""
}
}
return path
}
2016-07-04 04:49:20 -04:00
// getRstContent calls the Python script rst2html as an external helper
// to convert reStructuredText content to HTML.
func getRstContent(ctx *RenderingContext) []byte {
2016-07-04 04:49:20 -04:00
path := getRstExecPath()
if path == "" {
jww.ERROR.Println("rst2html / rst2html.py not found in $PATH: Please install.\n",
" Leaving reStructuredText content unrendered.")
return ctx.Content
2016-07-04 04:49:20 -04:00
}
jww.INFO.Println("Rendering", ctx.DocumentName, "with", path, "...")
var result []byte
// certain *nix based OSs wrap executables in scripted launchers
// invoking binaries on these OSs via python interpreter causes SyntaxError
// invoke directly so that shebangs work as expected
// handle Windows manually because it doesn't do shebangs
if runtime.GOOS == "windows" {
python := getPythonExecPath()
args := []string{path, "--leave-comments", "--initial-header-level=2"}
result = externallyRenderContent(ctx, python, args)
} else {
args := []string{"--leave-comments", "--initial-header-level=2"}
result = externallyRenderContent(ctx, path, args)
}
// TODO(bep) check if rst2html has a body only option.
bodyStart := bytes.Index(result, []byte("<body>\n"))
if bodyStart < 0 {
bodyStart = -7 //compensate for length
}
bodyEnd := bytes.Index(result, []byte("\n</body>"))
if bodyEnd < 0 || bodyEnd >= len(result) {
bodyEnd = len(result) - 1
if bodyEnd < 0 {
bodyEnd = 0
}
}
return result[bodyStart+7 : bodyEnd]
}
// getPandocContent calls pandoc as an external helper to convert pandoc markdown to HTML.
func getPandocContent(ctx *RenderingContext) []byte {
path, err := exec.LookPath("pandoc")
if err != nil {
jww.ERROR.Println("pandoc not found in $PATH: Please install.\n",
" Leaving pandoc content unrendered.")
return ctx.Content
}
args := []string{"--mathjax"}
return externallyRenderContent(ctx, path, args)
}
func orgRender(ctx *RenderingContext, c ContentSpec) []byte {
content := ctx.Content
cleanContent := bytes.Replace(content, []byte("# more"), []byte(""), 1)
return goorgeous.Org(cleanContent,
c.getHTMLRenderer(blackfriday.HTML_TOC, ctx))
}
func externallyRenderContent(ctx *RenderingContext, path string, args []string) []byte {
content := ctx.Content
cleanContent := bytes.Replace(content, SummaryDivider, []byte(""), 1)
cmd := exec.Command(path, args...)
cmd.Stdin = bytes.NewReader(cleanContent)
var out, cmderr bytes.Buffer
cmd.Stdout = &out
cmd.Stderr = &cmderr
err := cmd.Run()
// Most external helpers exit w/ non-zero exit code only if severe, i.e.
// halting errors occurred. -> log stderr output regardless of state of err
2019-03-24 05:11:16 -04:00
for _, item := range strings.Split(cmderr.String(), "\n") {
item := strings.TrimSpace(item)
if item != "" {
jww.ERROR.Printf("%s: %s", ctx.DocumentName, item)
}
}
if err != nil {
jww.ERROR.Printf("%s rendering %s: %v", path, ctx.DocumentName, err)
}
return normalizeExternalHelperLineFeeds(out.Bytes())
}