hugo/helpers/pygments.go

// Copyright © 2013-14 Steve Francia <spf@spf13.com>.
//
// Licensed under the Simple Public License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://opensource.org/licenses/Simple-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package helpers

import (
	"bytes"
	"crypto/sha1"
	"fmt"
	"github.com/spf13/hugo/hugofs"
	jww "github.com/spf13/jwalterweatherman"
	"github.com/spf13/viper"
	"io"
	"io/ioutil"
	"os/exec"
	"path/filepath"
	"sort"
	"strings"
)

const pygmentsBin = "pygmentize"

// HasPygments checks to see if Pygments is installed and available
// on the system.
func HasPygments() bool {
	if _, err := exec.LookPath(pygmentsBin); err != nil {
		return false
	}
	return true
}

// Highlight takes some code and returns highlighted code.
func Highlight(code, lang, optsStr string) string {

	if !HasPygments() {
		jww.WARN.Println("Highlighting requires Pygments to be installed and in the path")
		return code
	}

	options, err := parsePygmentsOpts(optsStr)

	if err != nil {
		jww.ERROR.Print(err.Error())
		return code
	}

	// Try to read from cache first
	hash := sha1.New()
	io.WriteString(hash, code)
	io.WriteString(hash, lang)
	io.WriteString(hash, options)

	fs := hugofs.OsFs

	cacheDir := viper.GetString("CacheDir")
	var cachefile string

	if cacheDir != "" {
		cachefile = filepath.Join(cacheDir, fmt.Sprintf("pygments-%x", hash.Sum(nil)))

		exists, err := Exists(cachefile, fs)
		if err != nil {
			jww.ERROR.Print(err.Error())
			return code
		}
		if exists {
			f, err := fs.Open(cachefile)
			if err != nil {
				jww.ERROR.Print(err.Error())
				return code
			}

			s, err := ioutil.ReadAll(f)
			if err != nil {
				jww.ERROR.Print(err.Error())
				return code
			}

			return string(s)
		}
	}

	// No cache file, render and cache it
	var out bytes.Buffer
	var stderr bytes.Buffer

	var langOpt string
	if lang == "" {
		langOpt = "-g" // Try guessing the language
	} else {
		langOpt = "-l"+lang
	}

	cmd := exec.Command(pygmentsBin, langOpt, "-fhtml", "-O", options)
	cmd.Stdin = strings.NewReader(code)
	cmd.Stdout = &out
	cmd.Stderr = &stderr

	if err := cmd.Run(); err != nil {
		jww.ERROR.Print(stderr.String())
		return code
	}

	if cachefile != "" {
		// Write cache file
		if err := WriteToDisk(cachefile, bytes.NewReader(out.Bytes()), fs); err != nil {
			jww.ERROR.Print(stderr.String())
		}
	}

	return out.String()
}

var pygmentsKeywords = make(map[string]bool)

func init() {
	pygmentsKeywords["style"] = true
	pygmentsKeywords["encoding"] = true
	pygmentsKeywords["noclasses"] = true
	pygmentsKeywords["hl_lines"] = true
	pygmentsKeywords["linenos"] = true
	pygmentsKeywords["classprefix"] = true
}

func parsePygmentsOpts(in string) (string, error) {

	in = strings.Trim(in, " ")

	style := viper.GetString("PygmentsStyle")

	noclasses := "true"
	if viper.GetBool("PygmentsUseClasses") {
		noclasses = "false"
	}

	if len(in) == 0 {
		return fmt.Sprintf("style=%s,noclasses=%s,encoding=utf8", style, noclasses), nil
	}

	options := make(map[string]string)

	o := strings.Split(in, ",")
	for _, v := range o {
		keyVal := strings.Split(v, "=")
		key := strings.ToLower(strings.Trim(keyVal[0], " "))
		if len(keyVal) != 2 || !pygmentsKeywords[key] {
			return "", fmt.Errorf("invalid Pygments option: %s", key)
		}
		options[key] = keyVal[1]
	}

	if _, ok := options["style"]; !ok {
		options["style"] = style
	}

	if _, ok := options["noclasses"]; !ok {
		options["noclasses"] = noclasses
	}

	if _, ok := options["encoding"]; !ok {
		options["encoding"] = "utf8"
	}

	var keys []string
	for k := range options {
		keys = append(keys, k)
	}
	sort.Strings(keys)

	var optionsStr string
	for i, k := range keys {
		optionsStr += fmt.Sprintf("%s=%s", k, options[k])
		if i < len(options)-1 {
			optionsStr += ","
		}
	}
	return optionsStr, nil
}
Shortcode rewrite, take 2 This commit contains a restructuring and partial rewrite of the shortcode handling. Prior to this commit rendering of the page content was mingled with handling of the shortcodes. This led to several oddities. The new flow is: 1. Shortcodes are extracted from page and replaced with placeholders. 2. Shortcodes are processed and rendered 3. Page is processed 4. The placeholders are replaced with the rendered shortcodes The handling of summaries is also made simpler by this. This commit also introduces some other chenges: 1. distinction between shortcodes that need further processing and those who do not: * `{{< >}}`: Typically raw HTML. Will not be processed. * `{{% %}}`: Will be processed by the page's markup engine (Markdown or (infuture) Asciidoctor) The above also involves a new shortcode-parser, with lexical scanning inspired by Rob Pike's talk called "Lexical Scanning in Go", which should be easier to understand, give better error messages and perform better. 2. If you want to exclude a shortcode from being processed (for documentation etc.), the inner part of the shorcode must be commented out, i.e. `{{%/* movie 47238zzb /%}}`. See the updated shortcode section in the documentation for further examples. The new parser supports nested shortcodes. This isn't new, but has two related design choices worth mentioning: The shortcodes will be rendered individually, so If both `{{< >}}` and `{{% %}}` are used in the nested hierarchy, one will be passed through the page's markdown processor, the other not. * To avoid potential costly overhead of always looking far ahead for a possible closing tag, this implementation looks at the template itself, and is branded as a container with inner content if it contains a reference to `.Inner` Fixes #565 Fixes #480 Fixes #461 And probably some others. 2014-10-27 20:48:30 +00:00			`// Copyright © 2013-14 Steve Francia <spf@spf13.com>.`
Adding Pygments helper 2013-12-05 14:42:29 +00:00			`//`
			`// Licensed under the Simple Public License, Version 2.0 (the "License");`
			`// you may not use this file except in compliance with the License.`
			`// You may obtain a copy of the License at`
			`// http://opensource.org/licenses/Simple-2.0`
			`//`
			`// Unless required by applicable law or agreed to in writing, software`
			`// distributed under the License is distributed on an "AS IS" BASIS,`
			`// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`// See the License for the specific language governing permissions and`
			`// limitations under the License.`

			`package helpers`

			`import (`
gofmt all go code 2014-01-29 22:50:31 +00:00			`"bytes"`
Cache pygments rendering between runs Fixes #1000 2015-03-27 16:05:17 +00:00			`"crypto/sha1"`
Add pygmentsstyle and pygmentsuseclasses options Fixes #204 Conflicts: commands/hugo.go 2014-05-07 16:38:14 +00:00			`"fmt"`
Add more options to highlight Fixes #1021 2015-04-15 18:31:05 +00:00			`"github.com/spf13/hugo/hugofs"`
			`jww "github.com/spf13/jwalterweatherman"`
			`"github.com/spf13/viper"`
Hash all pygments parameters. Ensures that Hugo rehighlights source code whenever one of the highlighting options changes. 2015-03-29 10:55:46 +00:00			`"io"`
Cache pygments rendering between runs Fixes #1000 2015-03-27 16:05:17 +00:00			`"io/ioutil"`
gofmt all go code 2014-01-29 22:50:31 +00:00			`"os/exec"`
Fix potential Windows path issue with Pygments cache 2015-04-14 17:58:37 +00:00			`"path/filepath"`
Add more options to highlight Fixes #1021 2015-04-15 18:31:05 +00:00			`"sort"`
gofmt all go code 2014-01-29 22:50:31 +00:00			`"strings"`
Adding Pygments helper 2013-12-05 14:42:29 +00:00			`)`

Shortcode rewrite, take 2 This commit contains a restructuring and partial rewrite of the shortcode handling. Prior to this commit rendering of the page content was mingled with handling of the shortcodes. This led to several oddities. The new flow is: 1. Shortcodes are extracted from page and replaced with placeholders. 2. Shortcodes are processed and rendered 3. Page is processed 4. The placeholders are replaced with the rendered shortcodes The handling of summaries is also made simpler by this. This commit also introduces some other chenges: 1. distinction between shortcodes that need further processing and those who do not: * `{{< >}}`: Typically raw HTML. Will not be processed. * `{{% %}}`: Will be processed by the page's markup engine (Markdown or (infuture) Asciidoctor) The above also involves a new shortcode-parser, with lexical scanning inspired by Rob Pike's talk called "Lexical Scanning in Go", which should be easier to understand, give better error messages and perform better. 2. If you want to exclude a shortcode from being processed (for documentation etc.), the inner part of the shorcode must be commented out, i.e. `{{%/* movie 47238zzb /%}}`. See the updated shortcode section in the documentation for further examples. The new parser supports nested shortcodes. This isn't new, but has two related design choices worth mentioning: The shortcodes will be rendered individually, so If both `{{< >}}` and `{{% %}}` are used in the nested hierarchy, one will be passed through the page's markdown processor, the other not. * To avoid potential costly overhead of always looking far ahead for a possible closing tag, this implementation looks at the template itself, and is branded as a container with inner content if it contains a reference to `.Inner` Fixes #565 Fixes #480 Fixes #461 And probably some others. 2014-10-27 20:48:30 +00:00			`const pygmentsBin = "pygmentize"`
Adding Pygments helper 2013-12-05 14:42:29 +00:00
Wrap comments helpers package to fit 80-column width Add an initial space after `//` where appropriate. Minor copyediting. 2014-12-26 15:07:03 +00:00			`// HasPygments checks to see if Pygments is installed and available`
			`// on the system.`
Shortcode rewrite, take 2 This commit contains a restructuring and partial rewrite of the shortcode handling. Prior to this commit rendering of the page content was mingled with handling of the shortcodes. This led to several oddities. The new flow is: 1. Shortcodes are extracted from page and replaced with placeholders. 2. Shortcodes are processed and rendered 3. Page is processed 4. The placeholders are replaced with the rendered shortcodes The handling of summaries is also made simpler by this. This commit also introduces some other chenges: 1. distinction between shortcodes that need further processing and those who do not: * `{{< >}}`: Typically raw HTML. Will not be processed. * `{{% %}}`: Will be processed by the page's markup engine (Markdown or (infuture) Asciidoctor) The above also involves a new shortcode-parser, with lexical scanning inspired by Rob Pike's talk called "Lexical Scanning in Go", which should be easier to understand, give better error messages and perform better. 2. If you want to exclude a shortcode from being processed (for documentation etc.), the inner part of the shorcode must be commented out, i.e. `{{%/* movie 47238zzb /%}}`. See the updated shortcode section in the documentation for further examples. The new parser supports nested shortcodes. This isn't new, but has two related design choices worth mentioning: The shortcodes will be rendered individually, so If both `{{< >}}` and `{{% %}}` are used in the nested hierarchy, one will be passed through the page's markdown processor, the other not. * To avoid potential costly overhead of always looking far ahead for a possible closing tag, this implementation looks at the template itself, and is branded as a container with inner content if it contains a reference to `.Inner` Fixes #565 Fixes #480 Fixes #461 And probably some others. 2014-10-27 20:48:30 +00:00			`func HasPygments() bool {`
gofmt all go code 2014-01-29 22:50:31 +00:00			`if _, err := exec.LookPath(pygmentsBin); err != nil {`
Shortcode rewrite, take 2 This commit contains a restructuring and partial rewrite of the shortcode handling. Prior to this commit rendering of the page content was mingled with handling of the shortcodes. This led to several oddities. The new flow is: 1. Shortcodes are extracted from page and replaced with placeholders. 2. Shortcodes are processed and rendered 3. Page is processed 4. The placeholders are replaced with the rendered shortcodes The handling of summaries is also made simpler by this. This commit also introduces some other chenges: 1. distinction between shortcodes that need further processing and those who do not: * `{{< >}}`: Typically raw HTML. Will not be processed. * `{{% %}}`: Will be processed by the page's markup engine (Markdown or (infuture) Asciidoctor) The above also involves a new shortcode-parser, with lexical scanning inspired by Rob Pike's talk called "Lexical Scanning in Go", which should be easier to understand, give better error messages and perform better. 2. If you want to exclude a shortcode from being processed (for documentation etc.), the inner part of the shorcode must be commented out, i.e. `{{%/* movie 47238zzb /%}}`. See the updated shortcode section in the documentation for further examples. The new parser supports nested shortcodes. This isn't new, but has two related design choices worth mentioning: The shortcodes will be rendered individually, so If both `{{< >}}` and `{{% %}}` are used in the nested hierarchy, one will be passed through the page's markdown processor, the other not. * To avoid potential costly overhead of always looking far ahead for a possible closing tag, this implementation looks at the template itself, and is branded as a container with inner content if it contains a reference to `.Inner` Fixes #565 Fixes #480 Fixes #461 And probably some others. 2014-10-27 20:48:30 +00:00			`return false`
			`}`
			`return true`
			`}`

Wrap comments helpers package to fit 80-column width Add an initial space after `//` where appropriate. Minor copyediting. 2014-12-26 15:07:03 +00:00			`// Highlight takes some code and returns highlighted code.`
Add more options to highlight Fixes #1021 2015-04-15 18:31:05 +00:00			`func Highlight(code, lang, optsStr string) string {`
Adding support for logging & verbose logging. Consolidation of error handling. Integration of jWalterWeatherman library. Fixed #137 2014-03-31 17:23:34 +00:00
Shortcode rewrite, take 2 This commit contains a restructuring and partial rewrite of the shortcode handling. Prior to this commit rendering of the page content was mingled with handling of the shortcodes. This led to several oddities. The new flow is: 1. Shortcodes are extracted from page and replaced with placeholders. 2. Shortcodes are processed and rendered 3. Page is processed 4. The placeholders are replaced with the rendered shortcodes The handling of summaries is also made simpler by this. This commit also introduces some other chenges: 1. distinction between shortcodes that need further processing and those who do not: * `{{< >}}`: Typically raw HTML. Will not be processed. * `{{% %}}`: Will be processed by the page's markup engine (Markdown or (infuture) Asciidoctor) The above also involves a new shortcode-parser, with lexical scanning inspired by Rob Pike's talk called "Lexical Scanning in Go", which should be easier to understand, give better error messages and perform better. 2. If you want to exclude a shortcode from being processed (for documentation etc.), the inner part of the shorcode must be commented out, i.e. `{{%/* movie 47238zzb /%}}`. See the updated shortcode section in the documentation for further examples. The new parser supports nested shortcodes. This isn't new, but has two related design choices worth mentioning: The shortcodes will be rendered individually, so If both `{{< >}}` and `{{% %}}` are used in the nested hierarchy, one will be passed through the page's markdown processor, the other not. * To avoid potential costly overhead of always looking far ahead for a possible closing tag, this implementation looks at the template itself, and is branded as a container with inner content if it contains a reference to `.Inner` Fixes #565 Fixes #480 Fixes #461 And probably some others. 2014-10-27 20:48:30 +00:00			`if !HasPygments() {`
Little syntax mistake 2014-04-09 19:08:47 +00:00			`jww.WARN.Println("Highlighting requires Pygments to be installed and in the path")`
gofmt all go code 2014-01-29 22:50:31 +00:00			`return code`
			`}`
Adding Pygments helper 2013-12-05 14:42:29 +00:00
Add more options to highlight Fixes #1021 2015-04-15 18:31:05 +00:00			`options, err := parsePygmentsOpts(optsStr)`
Hash all pygments parameters. Ensures that Hugo rehighlights source code whenever one of the highlighting options changes. 2015-03-29 10:55:46 +00:00
Add more options to highlight Fixes #1021 2015-04-15 18:31:05 +00:00			`if err != nil {`
			`jww.ERROR.Print(err.Error())`
			`return code`
Hash all pygments parameters. Ensures that Hugo rehighlights source code whenever one of the highlighting options changes. 2015-03-29 10:55:46 +00:00			`}`

Cache pygments rendering between runs Fixes #1000 2015-03-27 16:05:17 +00:00			`// Try to read from cache first`
Hash all pygments parameters. Ensures that Hugo rehighlights source code whenever one of the highlighting options changes. 2015-03-29 10:55:46 +00:00			`hash := sha1.New()`
			`io.WriteString(hash, code)`
Add more options to highlight Fixes #1021 2015-04-15 18:31:05 +00:00			`io.WriteString(hash, lang)`
			`io.WriteString(hash, options)`
Hash all pygments parameters. Ensures that Hugo rehighlights source code whenever one of the highlighting options changes. 2015-03-29 10:55:46 +00:00
Add more options to highlight Fixes #1021 2015-04-15 18:31:05 +00:00			`fs := hugofs.OsFs`

Only write highlight to cache when CacheDir is set To avoid writing cache files when testing. 2015-04-16 00:27:37 +00:00			`cacheDir := viper.GetString("CacheDir")`
			`var cachefile string`
Cache pygments rendering between runs Fixes #1000 2015-03-27 16:05:17 +00:00
Only write highlight to cache when CacheDir is set To avoid writing cache files when testing. 2015-04-16 00:27:37 +00:00			`if cacheDir != "" {`
			`cachefile = filepath.Join(cacheDir, fmt.Sprintf("pygments-%x", hash.Sum(nil)))`

			`exists, err := Exists(cachefile, fs)`
Cache pygments rendering between runs Fixes #1000 2015-03-27 16:05:17 +00:00			`if err != nil {`
			`jww.ERROR.Print(err.Error())`
			`return code`
			`}`
Only write highlight to cache when CacheDir is set To avoid writing cache files when testing. 2015-04-16 00:27:37 +00:00			`if exists {`
			`f, err := fs.Open(cachefile)`
			`if err != nil {`
			`jww.ERROR.Print(err.Error())`
			`return code`
			`}`

			`s, err := ioutil.ReadAll(f)`
			`if err != nil {`
			`jww.ERROR.Print(err.Error())`
			`return code`
			`}`

			`return string(s)`
			`}`
Cache pygments rendering between runs Fixes #1000 2015-03-27 16:05:17 +00:00			`}`

			`// No cache file, render and cache it`
gofmt all go code 2014-01-29 22:50:31 +00:00			`var out bytes.Buffer`
			`var stderr bytes.Buffer`
Add pygmentsstyle and pygmentsuseclasses options Fixes #204 Conflicts: commands/hugo.go 2014-05-07 16:38:14 +00:00
If no language is provided to Pygments, then try and guess it Previously if no language was specified, then illegal args would be passed to pygments, for example `pygments -l -fhtml`, which would result in pygments printing an error. 2015-07-03 21:51:43 +00:00			`var langOpt string`
			`if lang == "" {`
			`langOpt = "-g" // Try guessing the language`
			`} else {`
			`langOpt = "-l"+lang`
			`}`

			`cmd := exec.Command(pygmentsBin, langOpt, "-fhtml", "-O", options)`
gofmt all go code 2014-01-29 22:50:31 +00:00			`cmd.Stdin = strings.NewReader(code)`
			`cmd.Stdout = &out`
			`cmd.Stderr = &stderr`
Adding Pygments helper 2013-12-05 14:42:29 +00:00
gofmt all go code 2014-01-29 22:50:31 +00:00			`if err := cmd.Run(); err != nil {`
Adding support for logging & verbose logging. Consolidation of error handling. Integration of jWalterWeatherman library. Fixed #137 2014-03-31 17:23:34 +00:00			`jww.ERROR.Print(stderr.String())`
gofmt all go code 2014-01-29 22:50:31 +00:00			`return code`
			`}`
Adding Pygments helper 2013-12-05 14:42:29 +00:00
Only write highlight to cache when CacheDir is set To avoid writing cache files when testing. 2015-04-16 00:27:37 +00:00			`if cachefile != "" {`
			`// Write cache file`
			`if err := WriteToDisk(cachefile, bytes.NewReader(out.Bytes()), fs); err != nil {`
			`jww.ERROR.Print(stderr.String())`
			`}`
Cache pygments rendering between runs Fixes #1000 2015-03-27 16:05:17 +00:00			`}`

gofmt all go code 2014-01-29 22:50:31 +00:00			`return out.String()`
Adding Pygments helper 2013-12-05 14:42:29 +00:00			`}`
Add more options to highlight Fixes #1021 2015-04-15 18:31:05 +00:00
			`var pygmentsKeywords = make(map[string]bool)`

			`func init() {`
			`pygmentsKeywords["style"] = true`
			`pygmentsKeywords["encoding"] = true`
			`pygmentsKeywords["noclasses"] = true`
			`pygmentsKeywords["hl_lines"] = true`
			`pygmentsKeywords["linenos"] = true`
Allow 'classprefix' pygments options. 2015-05-07 00:55:28 +00:00			`pygmentsKeywords["classprefix"] = true`
Add more options to highlight Fixes #1021 2015-04-15 18:31:05 +00:00			`}`

			`func parsePygmentsOpts(in string) (string, error) {`

			`in = strings.Trim(in, " ")`

			`style := viper.GetString("PygmentsStyle")`

			`noclasses := "true"`
			`if viper.GetBool("PygmentsUseClasses") {`
			`noclasses = "false"`
			`}`

			`if len(in) == 0 {`
			`return fmt.Sprintf("style=%s,noclasses=%s,encoding=utf8", style, noclasses), nil`
			`}`

			`options := make(map[string]string)`

			`o := strings.Split(in, ",")`
			`for _, v := range o {`
			`keyVal := strings.Split(v, "=")`
			`key := strings.ToLower(strings.Trim(keyVal[0], " "))`
			`if len(keyVal) != 2 \|\| !pygmentsKeywords[key] {`
			`return "", fmt.Errorf("invalid Pygments option: %s", key)`
			`}`
			`options[key] = keyVal[1]`
			`}`

			`if _, ok := options["style"]; !ok {`
			`options["style"] = style`
			`}`

			`if _, ok := options["noclasses"]; !ok {`
			`options["noclasses"] = noclasses`
			`}`

			`if _, ok := options["encoding"]; !ok {`
			`options["encoding"] = "utf8"`
			`}`

			`var keys []string`
			`for k := range options {`
			`keys = append(keys, k)`
			`}`
			`sort.Strings(keys)`

			`var optionsStr string`
			`for i, k := range keys {`
			`optionsStr += fmt.Sprintf("%s=%s", k, options[k])`
			`if i < len(options)-1 {`
			`optionsStr += ","`
			`}`
			`}`
			`return optionsStr, nil`
			`}`