// Copyright 2024 The Hugo Authors. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package page import ( "strings" "testing" qt "github.com/frankban/quicktest" "github.com/gohugoio/hugo/common/types" "github.com/gohugoio/hugo/media" ) func TestExtractSummaryFromHTML(t *testing.T) { c := qt.New(t) tests := []struct { mt media.Type input string isCJK bool numWords int expectSummary string expectContentWithoutSummary string }{ {media.Builtin.ReStructuredTextType, "
\n\n\n

Simple Page

\n
", false, 70, "
\n\n\n

Simple Page

\n
", ""}, {media.Builtin.ReStructuredTextType, "

First paragraph

Second paragraph

", false, 2, `

First paragraph

`, "

Second paragraph

"}, {media.Builtin.MarkdownType, "

First paragraph

", false, 10, "

First paragraph

", ""}, {media.Builtin.MarkdownType, "

First paragraph

Second paragraph

", false, 2, "

First paragraph

", "

Second paragraph

"}, {media.Builtin.MarkdownType, "

First paragraph

Second paragraph

Third paragraph

", false, 3, "

First paragraph

Second paragraph

", "

Third paragraph

"}, {media.Builtin.AsciiDocType, "

First paragraph

Second paragraph

", false, 2, "

First paragraph

", "

Second paragraph

"}, {media.Builtin.MarkdownType, "

这是中文,全中文

a这是中文,全中文

", true, 5, "

这是中文,全中文

", "

a这是中文,全中文

"}, } for i, test := range tests { summary := ExtractSummaryFromHTML(test.mt, test.input, test.numWords, test.isCJK) c.Assert(summary.Summary(), qt.Equals, test.expectSummary, qt.Commentf("Summary %d", i)) c.Assert(summary.ContentWithoutSummary(), qt.Equals, test.expectContentWithoutSummary, qt.Commentf("ContentWithoutSummary %d", i)) } } // See https://discourse.gohugo.io/t/automatic-summarys-summarylength-seems-broken-in-the-case-of-plainify/51466/4 // Also issue 12837 func TestExtractSummaryFromHTMLLotsOfHTMLInSummary(t *testing.T) { c := qt.New(t) input := `

1 2 3 4 5

This is a story about a cat.

The cat was white and fluffy.

And it liked milk.

` summary := ExtractSummaryFromHTML(media.Builtin.MarkdownType, input, 10, false) c.Assert(strings.HasSuffix(summary.Summary(), "

\nThis is a story about a cat.\n

\n

\nThe cat was white and fluffy.\n

"), qt.IsTrue) } func TestExtractSummaryFromHTMLWithDivider(t *testing.T) { c := qt.New(t) const divider = "FOOO" tests := []struct { mt media.Type input string expectSummary string expectContentWithoutSummary string expectContent string }{ {media.Builtin.MarkdownType, "

First paragraph

FOOO

Second paragraph

", "

First paragraph

", "

Second paragraph

", "

First paragraph

Second paragraph

"}, {media.Builtin.MarkdownType, "

First paragraph

\n

FOOO

\n

Second paragraph

", "

First paragraph

", "

Second paragraph

", "

First paragraph

\n

Second paragraph

"}, {media.Builtin.MarkdownType, "

FOOO

\n

First paragraph

", "", "

First paragraph

", "

First paragraph

"}, {media.Builtin.MarkdownType, "

First paragraph

Second paragraphFOOO

Third paragraph

", "

First paragraph

Second paragraph

", "

Third paragraph

", "

First paragraph

Second paragraph

Third paragraph

"}, {media.Builtin.MarkdownType, "

这是中文,全中文FOOO

a这是中文,全中文

", "

这是中文,全中文

", "

a这是中文,全中文

", "

这是中文,全中文

a这是中文,全中文

"}, {media.Builtin.MarkdownType, `

a b` + "\v" + ` c

` + "\n

FOOO

", "

a b\v c

", "", "

a b\v c

"}, {media.Builtin.HTMLType, "

First paragraph

FOOO

Second paragraph

", "

First paragraph

", "

Second paragraph

", "

First paragraph

Second paragraph

"}, {media.Builtin.ReStructuredTextType, "
\n\n\n

This is summary.

\n

FOOO

\n

This is content.

\n
", "
\n\n\n

This is summary.

\n
", "

This is content.

\n
", "
\n\n\n

This is summary.

\n

This is content.

\n
"}, {media.Builtin.ReStructuredTextType, "

First paragraphFOOO

Second paragraph

", "

First paragraph

", "

Second paragraph

", `

First paragraph

Second paragraph

`}, {media.Builtin.AsciiDocType, "

Summary Next Line

FOOO

Some more text

", "

Summary Next Line

", "

Some more text

", "

Summary Next Line

Some more text

"}, {media.Builtin.AsciiDocType, "
\n

Summary Next Line

\n
\n
\n

FOOO

\n
\n
\n

Some more text

\n
\n", "
\n

Summary Next Line

\n
", "
\n

Some more text

\n
", "
\n

Summary Next Line

\n
\n
\n

Some more text

\n
"}, {media.Builtin.AsciiDocType, "

FOOO

First paragraph

", "", "

First paragraph

", "

First paragraph

"}, {media.Builtin.AsciiDocType, "

First paragraphFOOO

Second paragraph

", "

First paragraph

", "

Second paragraph

", "

First paragraph

Second paragraph

"}, } for i, test := range tests { summary := ExtractSummaryFromHTMLWithDivider(test.mt, test.input, divider) c.Assert(summary.Summary(), qt.Equals, test.expectSummary, qt.Commentf("Summary %d", i)) c.Assert(summary.ContentWithoutSummary(), qt.Equals, test.expectContentWithoutSummary, qt.Commentf("ContentWithoutSummary %d", i)) c.Assert(summary.Content(), qt.Equals, test.expectContent, qt.Commentf("Content %d", i)) } } func TestExpandDivider(t *testing.T) { c := qt.New(t) for i, test := range []struct { input string divider string ptag tagReStartEnd expect string expectEndMarkup string }{ {"

First paragraph

\n

FOOO

\n

Second paragraph

", "FOOO", startEndP, "

FOOO

\n", ""}, {"
\n

FOOO

\n
", "FOOO", startEndDiv, "
\n

FOOO

\n
", ""}, {"

FOOO

Second paragraph

", "FOOO", startEndDiv, "

FOOO

", ""}, {"

First paragraphFOOO

Second paragraph

", "FOOO", startEndDiv, "FOOO", "

"}, {"

abc FOOO

", "FOOO", startEndP, "FOOO", "

"}, {"

FOOO

", "FOOO", startEndP, "

FOOO

", ""}, {"

\n \nFOOO

", "FOOO", startEndP, "

\n \nFOOO

", ""}, {"
FOOO
", "FOOO", startEndDiv, "
FOOO
", ""}, } { l := types.LowHigh[string]{Low: strings.Index(test.input, test.divider), High: strings.Index(test.input, test.divider) + len(test.divider)} e, t := expandSummaryDivider(test.input, test.ptag, l) c.Assert(test.input[e.Low:e.High], qt.Equals, test.expect, qt.Commentf("[%d] Test.expect %q", i, test.input)) c.Assert(test.input[t.Low:t.High], qt.Equals, test.expectEndMarkup, qt.Commentf("[%d] Test.expectEndMarkup %q", i, test.input)) } } func TestIsProbablyHTMLToken(t *testing.T) { c := qt.New(t) for i, test := range []struct { input string expect bool }{ {"

", true}, {"Æøå", false}, } { c.Assert(isProbablyHTMLToken(test.input), qt.Equals, test.expect, qt.Commentf("[%d] Test.expect %q", i, test.input)) } } func BenchmarkSummaryFromHTML(b *testing.B) { b.StopTimer() input := "

First paragraph

Second paragraph

" b.StartTimer() for i := 0; i < b.N; i++ { summary := ExtractSummaryFromHTML(media.Builtin.MarkdownType, input, 2, false) if s := summary.Content(); s != input { b.Fatalf("unexpected content: %q", s) } if s := summary.ContentWithoutSummary(); s != "

Second paragraph

" { b.Fatalf("unexpected content without summary: %q", s) } if s := summary.Summary(); s != "

First paragraph

" { b.Fatalf("unexpected summary: %q", s) } } } func BenchmarkSummaryFromHTMLWithDivider(b *testing.B) { b.StopTimer() input := "

First paragraph

FOOO

Second paragraph

" b.StartTimer() for i := 0; i < b.N; i++ { summary := ExtractSummaryFromHTMLWithDivider(media.Builtin.MarkdownType, input, "FOOO") if s := summary.Content(); s != "

First paragraph

Second paragraph

" { b.Fatalf("unexpected content: %q", s) } if s := summary.ContentWithoutSummary(); s != "

Second paragraph

" { b.Fatalf("unexpected content without summary: %q", s) } if s := summary.Summary(); s != "

First paragraph

" { b.Fatalf("unexpected summary: %q", s) } } }