// Copyright 2024 The Hugo Authors. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package page import ( "strings" "testing" qt "github.com/frankban/quicktest" "github.com/gohugoio/hugo/common/types" "github.com/gohugoio/hugo/media" ) func TestExtractSummaryFromHTML(t *testing.T) { c := qt.New(t) tests := []struct { mt media.Type input string isCJK bool numWords int expectSummary string expectContentWithoutSummary string }{ {media.Builtin.ReStructuredTextType, "
Simple Page
\nSimple Page
\nFirst paragraph
Second paragraph
First paragraph
Second paragraph
First paragraph
", false, 10, "First paragraph
", ""}, {media.Builtin.MarkdownType, "First paragraph
Second paragraph
", false, 2, "First paragraph
", "Second paragraph
"}, {media.Builtin.MarkdownType, "First paragraph
Second paragraph
Third paragraph
", false, 3, "First paragraph
Second paragraph
", "Third paragraph
"}, {media.Builtin.AsciiDocType, "First paragraph
Second paragraph
First paragraph
Second paragraph
这是中文,全中文
a这是中文,全中文
", true, 5, "这是中文,全中文
", "a这是中文,全中文
"}, } for i, test := range tests { summary := ExtractSummaryFromHTML(test.mt, test.input, test.numWords, test.isCJK) c.Assert(summary.Summary(), qt.Equals, test.expectSummary, qt.Commentf("Summary %d", i)) c.Assert(summary.ContentWithoutSummary(), qt.Equals, test.expectContentWithoutSummary, qt.Commentf("ContentWithoutSummary %d", i)) } } // See https://discourse.gohugo.io/t/automatic-summarys-summarylength-seems-broken-in-the-case-of-plainify/51466/4 // Also issue 12837 func TestExtractSummaryFromHTMLLotsOfHTMLInSummary(t *testing.T) { c := qt.New(t) input := `
This is a story about a cat.
The cat was white and fluffy.
And it liked milk.
` summary := ExtractSummaryFromHTML(media.Builtin.MarkdownType, input, 10, false) c.Assert(strings.HasSuffix(summary.Summary(), "\nThis is a story about a cat.\n
\n\nThe cat was white and fluffy.\n
"), qt.IsTrue) } func TestExtractSummaryFromHTMLWithDivider(t *testing.T) { c := qt.New(t) const divider = "FOOO" tests := []struct { mt media.Type input string expectSummary string expectContentWithoutSummary string expectContent string }{ {media.Builtin.MarkdownType, "First paragraph
FOOO
Second paragraph
", "First paragraph
", "Second paragraph
", "First paragraph
Second paragraph
"}, {media.Builtin.MarkdownType, "First paragraph
\nFOOO
\nSecond paragraph
", "First paragraph
", "Second paragraph
", "First paragraph
\nSecond paragraph
"}, {media.Builtin.MarkdownType, "FOOO
\nFirst paragraph
", "", "First paragraph
", "First paragraph
"}, {media.Builtin.MarkdownType, "First paragraph
Second paragraphFOOO
Third paragraph
", "First paragraph
Second paragraph
", "Third paragraph
", "First paragraph
Second paragraph
Third paragraph
"}, {media.Builtin.MarkdownType, "这是中文,全中文FOOO
a这是中文,全中文
", "这是中文,全中文
", "a这是中文,全中文
", "这是中文,全中文
a这是中文,全中文
"}, {media.Builtin.MarkdownType, `a b` + "\v" + ` c
` + "\nFOOO
", "a b\v c
", "", "a b\v c
"}, {media.Builtin.HTMLType, "First paragraph
FOOOSecond paragraph
", "First paragraph
", "Second paragraph
", "First paragraph
Second paragraph
"}, {media.Builtin.ReStructuredTextType, "This is summary.
\nFOOO
\nThis is content.
\nThis is summary.
\nThis is content.
\nThis is summary.
\nThis is content.
\nFirst paragraphFOOO
Second paragraph
First paragraph
Second paragraph
First paragraph
Second paragraph
Summary Next Line
FOOO
Some more text
Summary Next Line
Some more text
Summary Next Line
Some more text
Summary Next Line
\nFOOO
\nSome more text
\nSummary Next Line
\nSome more text
\nSummary Next Line
\nSome more text
\nFOOO
First paragraph
First paragraph
First paragraph
First paragraphFOOO
Second paragraph
First paragraph
Second paragraph
First paragraph
Second paragraph
First paragraph
\nFOOO
\nSecond paragraph
", "FOOO", startEndP, "FOOO
\n", ""}, {"FOOO
\nFOOO
\nFOOO
Second paragraph
FOOO
First paragraphFOOO
Second paragraph
abc FOOO
", "FOOO", startEndP, "FOOO", " "}, {"FOOO
", "FOOO", startEndP, "FOOO
", ""}, {"\n \nFOOO
", "FOOO", startEndP, "\n \nFOOO
", ""}, {"", true}, {"
Æøå", false}, } { c.Assert(isProbablyHTMLToken(test.input), qt.Equals, test.expect, qt.Commentf("[%d] Test.expect %q", i, test.input)) } } func BenchmarkSummaryFromHTML(b *testing.B) { b.StopTimer() input := "
First paragraph
Second paragraph
" b.StartTimer() for i := 0; i < b.N; i++ { summary := ExtractSummaryFromHTML(media.Builtin.MarkdownType, input, 2, false) if s := summary.Content(); s != input { b.Fatalf("unexpected content: %q", s) } if s := summary.ContentWithoutSummary(); s != "Second paragraph
" { b.Fatalf("unexpected content without summary: %q", s) } if s := summary.Summary(); s != "First paragraph
" { b.Fatalf("unexpected summary: %q", s) } } } func BenchmarkSummaryFromHTMLWithDivider(b *testing.B) { b.StopTimer() input := "First paragraph
FOOO
Second paragraph
" b.StartTimer() for i := 0; i < b.N; i++ { summary := ExtractSummaryFromHTMLWithDivider(media.Builtin.MarkdownType, input, "FOOO") if s := summary.Content(); s != "First paragraph
Second paragraph
" { b.Fatalf("unexpected content: %q", s) } if s := summary.ContentWithoutSummary(); s != "Second paragraph
" { b.Fatalf("unexpected content without summary: %q", s) } if s := summary.Summary(); s != "First paragraph
" { b.Fatalf("unexpected summary: %q", s) } } }