1// Copyright 2019 The Hugo Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14package helpers
15
16import (
17	"bytes"
18	"html/template"
19	"strings"
20	"testing"
21
22	"github.com/gohugoio/hugo/config"
23	"github.com/spf13/afero"
24
25	"github.com/gohugoio/hugo/common/loggers"
26
27	qt "github.com/frankban/quicktest"
28)
29
30const tstHTMLContent = "<!DOCTYPE html><html><head><script src=\"http://two/foobar.js\"></script></head><body><nav><ul><li hugo-nav=\"section_0\"></li><li hugo-nav=\"section_1\"></li></ul></nav><article>content <a href=\"http://two/foobar\">foobar</a>. Follow up</article><p>This is some text.<br>And some more.</p></body></html>"
31
32func TestTrimShortHTML(t *testing.T) {
33	tests := []struct {
34		input, output []byte
35	}{
36		{[]byte(""), []byte("")},
37		{[]byte("Plain text"), []byte("Plain text")},
38		{[]byte("  \t\n Whitespace text\n\n"), []byte("Whitespace text")},
39		{[]byte("<p>Simple paragraph</p>"), []byte("Simple paragraph")},
40		{[]byte("\n  \n \t  <p> \t Whitespace\nHTML  \n\t </p>\n\t"), []byte("Whitespace\nHTML")},
41		{[]byte("<p>Multiple</p><p>paragraphs</p>"), []byte("<p>Multiple</p><p>paragraphs</p>")},
42		{[]byte("<p>Nested<p>paragraphs</p></p>"), []byte("<p>Nested<p>paragraphs</p></p>")},
43		{[]byte("<p>Hello</p>\n<ul>\n<li>list1</li>\n<li>list2</li>\n</ul>"), []byte("<p>Hello</p>\n<ul>\n<li>list1</li>\n<li>list2</li>\n</ul>")},
44	}
45
46	c := newTestContentSpec()
47	for i, test := range tests {
48		output := c.TrimShortHTML(test.input)
49		if !bytes.Equal(test.output, output) {
50			t.Errorf("Test %d failed. Expected %q got %q", i, test.output, output)
51		}
52	}
53}
54
55func TestStripHTML(t *testing.T) {
56	type test struct {
57		input, expected string
58	}
59	data := []test{
60		{"<h1>strip h1 tag <h1>", "strip h1 tag "},
61		{"<p> strip p tag </p>", " strip p tag "},
62		{"</br> strip br<br>", " strip br\n"},
63		{"</br> strip br2<br />", " strip br2\n"},
64		{"This <strong>is</strong> a\nnewline", "This is a newline"},
65		{"No Tags", "No Tags"},
66		{`<p>Summary Next Line.
67<figure >
68
69        <img src="/not/real" />
70
71
72</figure>
73.
74More text here.</p>
75
76<p>Some more text</p>`, "Summary Next Line.  . More text here.\nSome more text\n"},
77	}
78	for i, d := range data {
79		output := StripHTML(d.input)
80		if d.expected != output {
81			t.Errorf("Test %d failed. Expected %q got %q", i, d.expected, output)
82		}
83	}
84}
85
86func BenchmarkStripHTML(b *testing.B) {
87	b.ResetTimer()
88	for i := 0; i < b.N; i++ {
89		StripHTML(tstHTMLContent)
90	}
91}
92
93func TestStripEmptyNav(t *testing.T) {
94	c := qt.New(t)
95	cleaned := stripEmptyNav([]byte("do<nav>\n</nav>\n\nbedobedo"))
96	c.Assert(cleaned, qt.DeepEquals, []byte("dobedobedo"))
97}
98
99func TestBytesToHTML(t *testing.T) {
100	c := qt.New(t)
101	c.Assert(BytesToHTML([]byte("dobedobedo")), qt.Equals, template.HTML("dobedobedo"))
102}
103
104func TestNewContentSpec(t *testing.T) {
105	cfg := config.New()
106	c := qt.New(t)
107
108	cfg.Set("summaryLength", 32)
109	cfg.Set("buildFuture", true)
110	cfg.Set("buildExpired", true)
111	cfg.Set("buildDrafts", true)
112
113	spec, err := NewContentSpec(cfg, loggers.NewErrorLogger(), afero.NewMemMapFs(), nil)
114
115	c.Assert(err, qt.IsNil)
116	c.Assert(spec.summaryLength, qt.Equals, 32)
117	c.Assert(spec.BuildFuture, qt.Equals, true)
118	c.Assert(spec.BuildExpired, qt.Equals, true)
119	c.Assert(spec.BuildDrafts, qt.Equals, true)
120}
121
122var benchmarkTruncateString = strings.Repeat("This is a sentence about nothing.", 20)
123
124func BenchmarkTestTruncateWordsToWholeSentence(b *testing.B) {
125	c := newTestContentSpec()
126	b.ResetTimer()
127	for i := 0; i < b.N; i++ {
128		c.TruncateWordsToWholeSentence(benchmarkTruncateString)
129	}
130}
131
132func BenchmarkTestTruncateWordsToWholeSentenceOld(b *testing.B) {
133	c := newTestContentSpec()
134	b.ResetTimer()
135	for i := 0; i < b.N; i++ {
136		c.truncateWordsToWholeSentenceOld(benchmarkTruncateString)
137	}
138}
139
140func TestTruncateWordsToWholeSentence(t *testing.T) {
141	c := newTestContentSpec()
142	type test struct {
143		input, expected string
144		max             int
145		truncated       bool
146	}
147	data := []test{
148		{"a b c", "a b c", 12, false},
149		{"a b c", "a b c", 3, false},
150		{"a", "a", 1, false},
151		{"This is a sentence.", "This is a sentence.", 5, false},
152		{"This is also a sentence!", "This is also a sentence!", 1, false},
153		{"To be. Or not to be. That's the question.", "To be.", 1, true},
154		{" \nThis is not a sentence\nAnd this is another", "This is not a sentence", 4, true},
155		{"", "", 10, false},
156		{"This... is a more difficult test?", "This... is a more difficult test?", 1, false},
157	}
158	for i, d := range data {
159		c.summaryLength = d.max
160		output, truncated := c.TruncateWordsToWholeSentence(d.input)
161		if d.expected != output {
162			t.Errorf("Test %d failed. Expected %q got %q", i, d.expected, output)
163		}
164
165		if d.truncated != truncated {
166			t.Errorf("Test %d failed. Expected truncated=%t got %t", i, d.truncated, truncated)
167		}
168	}
169}
170
171func TestTruncateWordsByRune(t *testing.T) {
172	c := newTestContentSpec()
173	type test struct {
174		input, expected string
175		max             int
176		truncated       bool
177	}
178	data := []test{
179		{"", "", 1, false},
180		{"a b c", "a b c", 12, false},
181		{"a b c", "a b c", 3, false},
182		{"a", "a", 1, false},
183		{"Hello 中国", "", 0, true},
184		{"这是中文,全中文。", "这是中文,", 5, true},
185		{"Hello 中国", "Hello 中", 2, true},
186		{"Hello 中国", "Hello 中国", 3, false},
187		{"Hello中国 Good 好的", "Hello中国 Good 好", 9, true},
188		{"This is a sentence.", "This is", 2, true},
189		{"This is also a sentence!", "This", 1, true},
190		{"To be. Or not to be. That's the question.", "To be. Or not", 4, true},
191		{" \nThis is    not a sentence\n ", "This is not", 3, true},
192	}
193	for i, d := range data {
194		c.summaryLength = d.max
195		output, truncated := c.TruncateWordsByRune(strings.Fields(d.input))
196		if d.expected != output {
197			t.Errorf("Test %d failed. Expected %q got %q", i, d.expected, output)
198		}
199
200		if d.truncated != truncated {
201			t.Errorf("Test %d failed. Expected truncated=%t got %t", i, d.truncated, truncated)
202		}
203	}
204}
205
206func TestExtractTOCNormalContent(t *testing.T) {
207	content := []byte("<nav>\n<ul>\nTOC<li><a href=\"#")
208
209	actualTocLessContent, actualToc := ExtractTOC(content)
210	expectedTocLess := []byte("TOC<li><a href=\"#")
211	expectedToc := []byte("<nav id=\"TableOfContents\">\n<ul>\n")
212
213	if !bytes.Equal(actualTocLessContent, expectedTocLess) {
214		t.Errorf("Actual tocless (%s) did not equal expected (%s) tocless content", actualTocLessContent, expectedTocLess)
215	}
216
217	if !bytes.Equal(actualToc, expectedToc) {
218		t.Errorf("Actual toc (%s) did not equal expected (%s) toc content", actualToc, expectedToc)
219	}
220}
221
222func TestExtractTOCGreaterThanSeventy(t *testing.T) {
223	content := []byte("<nav>\n<ul>\nTOC This is a very long content which will definitely be greater than seventy, I promise you that.<li><a href=\"#")
224
225	actualTocLessContent, actualToc := ExtractTOC(content)
226	// Because the start of Toc is greater than 70+startpoint of <li> content and empty TOC will be returned
227	expectedToc := []byte("")
228
229	if !bytes.Equal(actualTocLessContent, content) {
230		t.Errorf("Actual tocless (%s) did not equal expected (%s) tocless content", actualTocLessContent, content)
231	}
232
233	if !bytes.Equal(actualToc, expectedToc) {
234		t.Errorf("Actual toc (%s) did not equal expected (%s) toc content", actualToc, expectedToc)
235	}
236}
237
238func TestExtractNoTOC(t *testing.T) {
239	content := []byte("TOC")
240
241	actualTocLessContent, actualToc := ExtractTOC(content)
242	expectedToc := []byte("")
243
244	if !bytes.Equal(actualTocLessContent, content) {
245		t.Errorf("Actual tocless (%s) did not equal expected (%s) tocless content", actualTocLessContent, content)
246	}
247
248	if !bytes.Equal(actualToc, expectedToc) {
249		t.Errorf("Actual toc (%s) did not equal expected (%s) toc content", actualToc, expectedToc)
250	}
251}
252
253var totalWordsBenchmarkString = strings.Repeat("Hugo Rocks ", 200)
254
255func TestTotalWords(t *testing.T) {
256	for i, this := range []struct {
257		s     string
258		words int
259	}{
260		{"Two, Words!", 2},
261		{"Word", 1},
262		{"", 0},
263		{"One, Two,      Three", 3},
264		{totalWordsBenchmarkString, 400},
265	} {
266		actualWordCount := TotalWords(this.s)
267
268		if actualWordCount != this.words {
269			t.Errorf("[%d] Actual word count (%d) for test string (%s) did not match %d", i, actualWordCount, this.s, this.words)
270		}
271	}
272}
273
274func BenchmarkTotalWords(b *testing.B) {
275	b.ResetTimer()
276	for i := 0; i < b.N; i++ {
277		wordCount := TotalWords(totalWordsBenchmarkString)
278		if wordCount != 400 {
279			b.Fatal("Wordcount error")
280		}
281	}
282}
283