1// Copyright 2019 The Hugo Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14package files
15
16import (
17	"bufio"
18	"fmt"
19	"io"
20	"os"
21	"path/filepath"
22	"sort"
23	"strings"
24	"unicode"
25
26	"github.com/spf13/afero"
27)
28
29const (
30	// The NPM package.json "template" file.
31	FilenamePackageHugoJSON = "package.hugo.json"
32	// The NPM package file.
33	FilenamePackageJSON = "package.json"
34)
35
36var (
37	// This should be the only list of valid extensions for content files.
38	contentFileExtensions = []string{
39		"html", "htm",
40		"mdown", "markdown", "md",
41		"asciidoc", "adoc", "ad",
42		"rest", "rst",
43		"mmark",
44		"org",
45		"pandoc", "pdc",
46	}
47
48	contentFileExtensionsSet map[string]bool
49
50	htmlFileExtensions = []string{
51		"html", "htm",
52	}
53
54	htmlFileExtensionsSet map[string]bool
55)
56
57func init() {
58	contentFileExtensionsSet = make(map[string]bool)
59	for _, ext := range contentFileExtensions {
60		contentFileExtensionsSet[ext] = true
61	}
62	htmlFileExtensionsSet = make(map[string]bool)
63	for _, ext := range htmlFileExtensions {
64		htmlFileExtensionsSet[ext] = true
65	}
66}
67
68func IsContentFile(filename string) bool {
69	return contentFileExtensionsSet[strings.TrimPrefix(filepath.Ext(filename), ".")]
70}
71
72func IsIndexContentFile(filename string) bool {
73	if !IsContentFile(filename) {
74		return false
75	}
76
77	base := filepath.Base(filename)
78
79	return strings.HasPrefix(base, "index.") || strings.HasPrefix(base, "_index.")
80}
81
82func IsHTMLFile(filename string) bool {
83	return htmlFileExtensionsSet[strings.TrimPrefix(filepath.Ext(filename), ".")]
84}
85
86func IsContentExt(ext string) bool {
87	return contentFileExtensionsSet[ext]
88}
89
90type ContentClass string
91
92const (
93	ContentClassLeaf    ContentClass = "leaf"
94	ContentClassBranch  ContentClass = "branch"
95	ContentClassFile    ContentClass = "zfile" // Sort below
96	ContentClassContent ContentClass = "zcontent"
97)
98
99func (c ContentClass) IsBundle() bool {
100	return c == ContentClassLeaf || c == ContentClassBranch
101}
102
103func ClassifyContentFile(filename string, open func() (afero.File, error)) ContentClass {
104	if !IsContentFile(filename) {
105		return ContentClassFile
106	}
107
108	if IsHTMLFile(filename) {
109		// We need to look inside the file. If the first non-whitespace
110		// character is a "<", then we treat it as a regular file.
111		// Eearlier we created pages for these files, but that had all sorts
112		// of troubles, and isn't what it says in the documentation.
113		// See https://github.com/gohugoio/hugo/issues/7030
114		if open == nil {
115			panic(fmt.Sprintf("no file opener provided for %q", filename))
116		}
117
118		f, err := open()
119		if err != nil {
120			return ContentClassFile
121		}
122		ishtml := isHTMLContent(f)
123		f.Close()
124		if ishtml {
125			return ContentClassFile
126		}
127
128	}
129
130	if strings.HasPrefix(filename, "_index.") {
131		return ContentClassBranch
132	}
133
134	if strings.HasPrefix(filename, "index.") {
135		return ContentClassLeaf
136	}
137
138	return ContentClassContent
139}
140
141var htmlComment = []rune{'<', '!', '-', '-'}
142
143func isHTMLContent(r io.Reader) bool {
144	br := bufio.NewReader(r)
145	i := 0
146	for {
147		c, _, err := br.ReadRune()
148		if err != nil {
149			break
150		}
151
152		if i > 0 {
153			if i >= len(htmlComment) {
154				return false
155			}
156
157			if c != htmlComment[i] {
158				return true
159			}
160
161			i++
162			continue
163		}
164
165		if !unicode.IsSpace(c) {
166			if i == 0 && c != '<' {
167				return false
168			}
169			i++
170		}
171	}
172	return true
173}
174
175const (
176	ComponentFolderArchetypes = "archetypes"
177	ComponentFolderStatic     = "static"
178	ComponentFolderLayouts    = "layouts"
179	ComponentFolderContent    = "content"
180	ComponentFolderData       = "data"
181	ComponentFolderAssets     = "assets"
182	ComponentFolderI18n       = "i18n"
183
184	FolderResources = "resources"
185	FolderJSConfig  = "_jsconfig" // Mounted below /assets with postcss.config.js etc.
186)
187
188var (
189	JsConfigFolderMountPrefix = filepath.Join(ComponentFolderAssets, FolderJSConfig)
190
191	ComponentFolders = []string{
192		ComponentFolderArchetypes,
193		ComponentFolderStatic,
194		ComponentFolderLayouts,
195		ComponentFolderContent,
196		ComponentFolderData,
197		ComponentFolderAssets,
198		ComponentFolderI18n,
199	}
200
201	componentFoldersSet = make(map[string]bool)
202)
203
204func init() {
205	sort.Strings(ComponentFolders)
206	for _, f := range ComponentFolders {
207		componentFoldersSet[f] = true
208	}
209}
210
211// ResolveComponentFolder returns "content" from "content/blog/foo.md" etc.
212func ResolveComponentFolder(filename string) string {
213	filename = strings.TrimPrefix(filename, string(os.PathSeparator))
214	for _, cf := range ComponentFolders {
215		if strings.HasPrefix(filename, cf) {
216			return cf
217		}
218	}
219
220	return ""
221}
222
223func IsComponentFolder(name string) bool {
224	return componentFoldersSet[name]
225}
226