1// Copyright 2015 The Hugo Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14package helpers
15
16import (
17	"net/url"
18	"path"
19	"path/filepath"
20	"strings"
21
22	"github.com/gohugoio/hugo/common/paths"
23
24	"github.com/PuerkitoBio/purell"
25)
26
27func sanitizeURLWithFlags(in string, f purell.NormalizationFlags) string {
28	s, err := purell.NormalizeURLString(in, f)
29	if err != nil {
30		return in
31	}
32
33	// Temporary workaround for the bug fix and resulting
34	// behavioral change in purell.NormalizeURLString():
35	// a leading '/' was inadvertently added to relative links,
36	// but no longer, see #878.
37	//
38	// I think the real solution is to allow Hugo to
39	// make relative URL with relative path,
40	// e.g. "../../post/hello-again/", as wished by users
41	// in issues #157, #622, etc., without forcing
42	// relative URLs to begin with '/'.
43	// Once the fixes are in, let's remove this kludge
44	// and restore SanitizeURL() to the way it was.
45	//                         -- @anthonyfok, 2015-02-16
46	//
47	// Begin temporary kludge
48	u, err := url.Parse(s)
49	if err != nil {
50		panic(err)
51	}
52	if len(u.Path) > 0 && !strings.HasPrefix(u.Path, "/") {
53		u.Path = "/" + u.Path
54	}
55	return u.String()
56	// End temporary kludge
57
58	// return s
59
60}
61
62// SanitizeURL sanitizes the input URL string.
63func SanitizeURL(in string) string {
64	return sanitizeURLWithFlags(in, purell.FlagsSafe|purell.FlagRemoveTrailingSlash|purell.FlagRemoveDotSegments|purell.FlagRemoveDuplicateSlashes|purell.FlagRemoveUnnecessaryHostDots|purell.FlagRemoveEmptyPortSeparator)
65}
66
67// SanitizeURLKeepTrailingSlash is the same as SanitizeURL, but will keep any trailing slash.
68func SanitizeURLKeepTrailingSlash(in string) string {
69	return sanitizeURLWithFlags(in, purell.FlagsSafe|purell.FlagRemoveDotSegments|purell.FlagRemoveDuplicateSlashes|purell.FlagRemoveUnnecessaryHostDots|purell.FlagRemoveEmptyPortSeparator)
70}
71
72// URLize is similar to MakePath, but with Unicode handling
73// Example:
74//     uri: Vim (text editor)
75//     urlize: vim-text-editor
76func (p *PathSpec) URLize(uri string) string {
77	return p.URLEscape(p.MakePathSanitized(uri))
78}
79
80// URLizeFilename creates an URL from a filename by escaping unicode letters
81// and turn any filepath separator into forward slashes.
82func (p *PathSpec) URLizeFilename(filename string) string {
83	return p.URLEscape(filepath.ToSlash(filename))
84}
85
86// URLEscape escapes unicode letters.
87func (p *PathSpec) URLEscape(uri string) string {
88	// escape unicode letters
89	parsedURI, err := url.Parse(uri)
90	if err != nil {
91		// if net/url can not parse URL it means Sanitize works incorrectly
92		panic(err)
93	}
94	x := parsedURI.String()
95	return x
96}
97
98// AbsURL creates an absolute URL from the relative path given and the BaseURL set in config.
99func (p *PathSpec) AbsURL(in string, addLanguage bool) string {
100	url, err := url.Parse(in)
101	if err != nil {
102		return in
103	}
104
105	if url.IsAbs() || strings.HasPrefix(in, "//") {
106		return in
107	}
108
109	var baseURL string
110	if strings.HasPrefix(in, "/") {
111		u := p.BaseURL.URL()
112		u.Path = ""
113		baseURL = u.String()
114	} else {
115		baseURL = p.BaseURL.String()
116	}
117
118	if addLanguage {
119		prefix := p.GetLanguagePrefix()
120		if prefix != "" {
121			hasPrefix := false
122			// avoid adding language prefix if already present
123			in2 := in
124			if strings.HasPrefix(in, "/") {
125				in2 = in[1:]
126			}
127			if in2 == prefix {
128				hasPrefix = true
129			} else {
130				hasPrefix = strings.HasPrefix(in2, prefix+"/")
131			}
132
133			if !hasPrefix {
134				addSlash := in == "" || strings.HasSuffix(in, "/")
135				in = path.Join(prefix, in)
136
137				if addSlash {
138					in += "/"
139				}
140			}
141		}
142	}
143	return paths.MakePermalink(baseURL, in).String()
144}
145
146// RelURL creates a URL relative to the BaseURL root.
147// Note: The result URL will not include the context root if canonifyURLs is enabled.
148func (p *PathSpec) RelURL(in string, addLanguage bool) string {
149	baseURL := p.BaseURL.String()
150	canonifyURLs := p.CanonifyURLs
151	if (!strings.HasPrefix(in, baseURL) && strings.HasPrefix(in, "http")) || strings.HasPrefix(in, "//") {
152		return in
153	}
154
155	u := in
156
157	if strings.HasPrefix(in, baseURL) {
158		u = strings.TrimPrefix(u, baseURL)
159	}
160
161	if addLanguage {
162		prefix := p.GetLanguagePrefix()
163		if prefix != "" {
164			hasPrefix := false
165			// avoid adding language prefix if already present
166			in2 := in
167			if strings.HasPrefix(in, "/") {
168				in2 = in[1:]
169			}
170			if in2 == prefix {
171				hasPrefix = true
172			} else {
173				hasPrefix = strings.HasPrefix(in2, prefix+"/")
174			}
175
176			if !hasPrefix {
177				hadSlash := strings.HasSuffix(u, "/")
178
179				u = path.Join(prefix, u)
180
181				if hadSlash {
182					u += "/"
183				}
184			}
185		}
186	}
187
188	if !canonifyURLs {
189		u = paths.AddContextRoot(baseURL, u)
190	}
191
192	if in == "" && !strings.HasSuffix(u, "/") && strings.HasSuffix(baseURL, "/") {
193		u += "/"
194	}
195
196	if !strings.HasPrefix(u, "/") {
197		u = "/" + u
198	}
199
200	return u
201}
202
203// PrependBasePath prepends any baseURL sub-folder to the given resource
204func (p *PathSpec) PrependBasePath(rel string, isAbs bool) string {
205	basePath := p.GetBasePath(!isAbs)
206	if basePath != "" {
207		rel = filepath.ToSlash(rel)
208		// Need to prepend any path from the baseURL
209		hadSlash := strings.HasSuffix(rel, "/")
210		rel = path.Join(basePath, rel)
211		if hadSlash {
212			rel += "/"
213		}
214	}
215	return rel
216}
217
218// URLizeAndPrep applies misc sanitation to the given URL to get it in line
219// with the Hugo standard.
220func (p *PathSpec) URLizeAndPrep(in string) string {
221	return p.URLPrep(p.URLize(in))
222}
223
224// URLPrep applies misc sanitation to the given URL.
225func (p *PathSpec) URLPrep(in string) string {
226	if p.UglyURLs {
227		return paths.Uglify(SanitizeURL(in))
228	}
229	pretty := paths.PrettifyURL(SanitizeURL(in))
230	if path.Ext(pretty) == ".xml" {
231		return pretty
232	}
233	url, err := purell.NormalizeURLString(pretty, purell.FlagAddTrailingSlash)
234	if err != nil {
235		return pretty
236	}
237	return url
238}
239