From 5d50c46482d231efa26c95e4705e720fb9bf753c Mon Sep 17 00:00:00 2001 From: Mattias Wadman Date: Wed, 13 Apr 2016 00:14:00 +0200 Subject: [PATCH] Chomp Unicode BOM if present Useful if using or sharing files with users that use editors that append a unicode byte order marker header (like Windows notepad). This will still assume files are UTF-8 encoded. Closes #2075 --- hugolib/page_test.go | 13 +++++++++++++ parser/page.go | 19 +++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/hugolib/page_test.go b/hugolib/page_test.go index c4e442a41..876a74caf 100644 --- a/hugolib/page_test.go +++ b/hugolib/page_test.go @@ -1052,6 +1052,19 @@ func TestPageSimpleMethods(t *testing.T) { } } +func TestChompBOM(t *testing.T) { + p, _ := NewPage("simple.md") + const utf8BOM = "\xef\xbb\xbf" + _, err := p.ReadFrom(strings.NewReader(utf8BOM + simplePage)) + p.Convert() + + if err != nil { + t.Fatalf("Unable to create a page with BOM prefixed frontmatter and body content: %s", err) + } + + checkPageTitle(t, p, "Simple") +} + func listEqual(left, right []string) bool { if len(left) != len(right) { return false diff --git a/parser/page.go b/parser/page.go index 949bfdac6..5092caddc 100644 --- a/parser/page.go +++ b/parser/page.go @@ -50,6 +50,8 @@ const ( HTMLCommentStart = "" + // BOM Unicode byte order marker + BOM = '\ufeff' ) var ( @@ -101,6 +103,10 @@ func (p *page) Metadata() (meta interface{}, err error) { func ReadFrom(r io.Reader) (p Page, err error) { reader := bufio.NewReader(r) + // chomp BOM and assume UTF-8 + if err = chompBOM(reader); err != nil && err != io.EOF { + return + } if err = chompWhitespace(reader); err != nil && err != io.EOF { return } @@ -135,6 +141,19 @@ func ReadFrom(r io.Reader) (p Page, err error) { return newp, nil } +func chompBOM(r io.RuneScanner) (err error) { + for { + c, _, err := r.ReadRune() + if err != nil { + return err + } + if c != BOM { + r.UnreadRune() + return nil + } + } +} + func chompWhitespace(r io.RuneScanner) (err error) { for { c, _, err := r.ReadRune()