1// Copyright 2020 The Hugo Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14// Package asciidocext converts AsciiDoc to HTML using Asciidoctor
15// external binary. The `asciidoc` module is reserved for a future golang
16// implementation.
17package asciidocext
18
19import (
20	"bytes"
21	"path/filepath"
22	"strings"
23
24	"github.com/gohugoio/hugo/common/hexec"
25	"github.com/gohugoio/hugo/htesting"
26
27	"github.com/gohugoio/hugo/identity"
28	"github.com/gohugoio/hugo/markup/asciidocext/asciidocext_config"
29	"github.com/gohugoio/hugo/markup/converter"
30	"github.com/gohugoio/hugo/markup/internal"
31	"github.com/gohugoio/hugo/markup/tableofcontents"
32	"golang.org/x/net/html"
33)
34
35/* ToDo: RelPermalink patch for svg posts not working*/
36type pageSubset interface {
37	RelPermalink() string
38}
39
40// Provider is the package entry point.
41var Provider converter.ProviderProvider = provider{}
42
43type provider struct{}
44
45func (p provider) New(cfg converter.ProviderConfig) (converter.Provider, error) {
46	return converter.NewProvider("asciidocext", func(ctx converter.DocumentContext) (converter.Converter, error) {
47		return &asciidocConverter{
48			ctx: ctx,
49			cfg: cfg,
50		}, nil
51	}), nil
52}
53
54type asciidocResult struct {
55	converter.Result
56	toc tableofcontents.Root
57}
58
59func (r asciidocResult) TableOfContents() tableofcontents.Root {
60	return r.toc
61}
62
63type asciidocConverter struct {
64	ctx converter.DocumentContext
65	cfg converter.ProviderConfig
66}
67
68func (a *asciidocConverter) Convert(ctx converter.RenderContext) (converter.Result, error) {
69	b, err := a.getAsciidocContent(ctx.Src, a.ctx)
70	if err != nil {
71		return nil, err
72	}
73	content, toc, err := a.extractTOC(b)
74	if err != nil {
75		return nil, err
76	}
77	return asciidocResult{
78		Result: converter.Bytes(content),
79		toc:    toc,
80	}, nil
81}
82
83func (a *asciidocConverter) Supports(_ identity.Identity) bool {
84	return false
85}
86
87// getAsciidocContent calls asciidoctor as an external helper
88// to convert AsciiDoc content to HTML.
89func (a *asciidocConverter) getAsciidocContent(src []byte, ctx converter.DocumentContext) ([]byte, error) {
90	if !hasAsciiDoc() {
91		a.cfg.Logger.Errorln("asciidoctor not found in $PATH: Please install.\n",
92			"                 Leaving AsciiDoc content unrendered.")
93		return src, nil
94	}
95
96	args := a.parseArgs(ctx)
97	args = append(args, "-")
98
99	a.cfg.Logger.Infoln("Rendering", ctx.DocumentName, " using asciidoctor args", args, "...")
100
101	return internal.ExternallyRenderContent(a.cfg, ctx, src, asciiDocBinaryName, args)
102}
103
104func (a *asciidocConverter) parseArgs(ctx converter.DocumentContext) []string {
105	cfg := a.cfg.MarkupConfig.AsciidocExt
106	args := []string{}
107
108	args = a.appendArg(args, "-b", cfg.Backend, asciidocext_config.CliDefault.Backend, asciidocext_config.AllowedBackend)
109
110	for _, extension := range cfg.Extensions {
111		if strings.LastIndexAny(extension, `\/.`) > -1 {
112			a.cfg.Logger.Errorln("Unsupported asciidoctor extension was passed in. Extension `" + extension + "` ignored. Only installed asciidoctor extensions are allowed.")
113			continue
114		}
115		args = append(args, "-r", extension)
116	}
117
118	for attributeKey, attributeValue := range cfg.Attributes {
119		if asciidocext_config.DisallowedAttributes[attributeKey] {
120			a.cfg.Logger.Errorln("Unsupported asciidoctor attribute was passed in. Attribute `" + attributeKey + "` ignored.")
121			continue
122		}
123
124		args = append(args, "-a", attributeKey+"="+attributeValue)
125	}
126
127	if cfg.WorkingFolderCurrent {
128		contentDir := filepath.Dir(ctx.Filename)
129		sourceDir := a.cfg.Cfg.GetString("source")
130		destinationDir := a.cfg.Cfg.GetString("destination")
131
132		if destinationDir == "" {
133			a.cfg.Logger.Errorln("markup.asciidocext.workingFolderCurrent requires hugo command option --destination to be set")
134		}
135		if !filepath.IsAbs(destinationDir) && sourceDir != "" {
136			destinationDir = filepath.Join(sourceDir, destinationDir)
137		}
138
139		var outDir string
140		var err error
141
142		file := filepath.Base(ctx.Filename)
143		if a.cfg.Cfg.GetBool("uglyUrls") || file == "_index.adoc" || file == "index.adoc" {
144			outDir, err = filepath.Abs(filepath.Dir(filepath.Join(destinationDir, ctx.DocumentName)))
145		} else {
146			postDir := ""
147			page, ok := ctx.Document.(pageSubset)
148			if ok {
149				postDir = filepath.Base(page.RelPermalink())
150			} else {
151				a.cfg.Logger.Errorln("unable to cast interface to pageSubset")
152			}
153
154			outDir, err = filepath.Abs(filepath.Join(destinationDir, filepath.Dir(ctx.DocumentName), postDir))
155		}
156
157		if err != nil {
158			a.cfg.Logger.Errorln("asciidoctor outDir: ", err)
159		}
160
161		args = append(args, "--base-dir", contentDir, "-a", "outdir="+outDir)
162	}
163
164	if cfg.NoHeaderOrFooter {
165		args = append(args, "--no-header-footer")
166	} else {
167		a.cfg.Logger.Warnln("asciidoctor parameter NoHeaderOrFooter is expected for correct html rendering")
168	}
169
170	if cfg.SectionNumbers {
171		args = append(args, "--section-numbers")
172	}
173
174	if cfg.Verbose {
175		args = append(args, "--verbose")
176	}
177
178	if cfg.Trace {
179		args = append(args, "--trace")
180	}
181
182	args = a.appendArg(args, "--failure-level", cfg.FailureLevel, asciidocext_config.CliDefault.FailureLevel, asciidocext_config.AllowedFailureLevel)
183
184	args = a.appendArg(args, "--safe-mode", cfg.SafeMode, asciidocext_config.CliDefault.SafeMode, asciidocext_config.AllowedSafeMode)
185
186	return args
187}
188
189func (a *asciidocConverter) appendArg(args []string, option, value, defaultValue string, allowedValues map[string]bool) []string {
190	if value != defaultValue {
191		if allowedValues[value] {
192			args = append(args, option, value)
193		} else {
194			a.cfg.Logger.Errorln("Unsupported asciidoctor value `" + value + "` for option " + option + " was passed in and will be ignored.")
195		}
196	}
197	return args
198}
199
200const asciiDocBinaryName = "asciidoctor"
201
202func hasAsciiDoc() bool {
203	return hexec.InPath(asciiDocBinaryName)
204}
205
206// extractTOC extracts the toc from the given src html.
207// It returns the html without the TOC, and the TOC data
208func (a *asciidocConverter) extractTOC(src []byte) ([]byte, tableofcontents.Root, error) {
209	var buf bytes.Buffer
210	buf.Write(src)
211	node, err := html.Parse(&buf)
212	if err != nil {
213		return nil, tableofcontents.Root{}, err
214	}
215	var (
216		f       func(*html.Node) bool
217		toc     tableofcontents.Root
218		toVisit []*html.Node
219	)
220	f = func(n *html.Node) bool {
221		if n.Type == html.ElementNode && n.Data == "div" && attr(n, "id") == "toc" {
222			toc = parseTOC(n)
223			if !a.cfg.MarkupConfig.AsciidocExt.PreserveTOC {
224				n.Parent.RemoveChild(n)
225			}
226			return true
227		}
228		if n.FirstChild != nil {
229			toVisit = append(toVisit, n.FirstChild)
230		}
231		if n.NextSibling != nil && f(n.NextSibling) {
232			return true
233		}
234		for len(toVisit) > 0 {
235			nv := toVisit[0]
236			toVisit = toVisit[1:]
237			if f(nv) {
238				return true
239			}
240		}
241		return false
242	}
243	f(node)
244	if err != nil {
245		return nil, tableofcontents.Root{}, err
246	}
247	buf.Reset()
248	err = html.Render(&buf, node)
249	if err != nil {
250		return nil, tableofcontents.Root{}, err
251	}
252	// ltrim <html><head></head><body> and rtrim </body></html> which are added by html.Render
253	res := buf.Bytes()[25:]
254	res = res[:len(res)-14]
255	return res, toc, nil
256}
257
258// parseTOC returns a TOC root from the given toc Node
259func parseTOC(doc *html.Node) tableofcontents.Root {
260	var (
261		toc tableofcontents.Root
262		f   func(*html.Node, int, int)
263	)
264	f = func(n *html.Node, row, level int) {
265		if n.Type == html.ElementNode {
266			switch n.Data {
267			case "ul":
268				if level == 0 {
269					row++
270				}
271				level++
272				f(n.FirstChild, row, level)
273			case "li":
274				for c := n.FirstChild; c != nil; c = c.NextSibling {
275					if c.Type != html.ElementNode || c.Data != "a" {
276						continue
277					}
278					href := attr(c, "href")[1:]
279					toc.AddAt(tableofcontents.Heading{
280						Text: nodeContent(c),
281						ID:   href,
282					}, row, level)
283				}
284				f(n.FirstChild, row, level)
285			}
286		}
287		if n.NextSibling != nil {
288			f(n.NextSibling, row, level)
289		}
290	}
291	f(doc.FirstChild, -1, 0)
292	return toc
293}
294
295func attr(node *html.Node, key string) string {
296	for _, a := range node.Attr {
297		if a.Key == key {
298			return a.Val
299		}
300	}
301	return ""
302}
303
304func nodeContent(node *html.Node) string {
305	var buf bytes.Buffer
306	for c := node.FirstChild; c != nil; c = c.NextSibling {
307		html.Render(&buf, c)
308	}
309	return buf.String()
310}
311
312// Supports returns whether Asciidoctor is installed on this computer.
313func Supports() bool {
314	hasBin := hasAsciiDoc()
315	if htesting.SupportsAll() {
316		if !hasBin {
317			panic("asciidoctor not installed")
318		}
319		return true
320	}
321	return hasBin
322}
323