1// Copyright 2017 The Hugo Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14// Package data provides template functions for working with external data
15// sources.
16package data
17
18import (
19	"bytes"
20	"encoding/csv"
21	"encoding/json"
22	"errors"
23	"net/http"
24	"strings"
25
26	"github.com/gohugoio/hugo/common/maps"
27	"github.com/gohugoio/hugo/config/security"
28
29	"github.com/gohugoio/hugo/common/types"
30
31	"github.com/gohugoio/hugo/common/constants"
32	"github.com/gohugoio/hugo/common/loggers"
33
34	"github.com/spf13/cast"
35
36	"github.com/gohugoio/hugo/cache/filecache"
37	"github.com/gohugoio/hugo/deps"
38	_errors "github.com/pkg/errors"
39)
40
41// New returns a new instance of the data-namespaced template functions.
42func New(deps *deps.Deps) *Namespace {
43	return &Namespace{
44		deps:         deps,
45		cacheGetCSV:  deps.FileCaches.GetCSVCache(),
46		cacheGetJSON: deps.FileCaches.GetJSONCache(),
47		client:       http.DefaultClient,
48	}
49}
50
51// Namespace provides template functions for the "data" namespace.
52type Namespace struct {
53	deps *deps.Deps
54
55	cacheGetJSON *filecache.Cache
56	cacheGetCSV  *filecache.Cache
57
58	client *http.Client
59}
60
61// GetCSV expects a data separator and one or n-parts of a URL to a resource which
62// can either be a local or a remote one.
63// The data separator can be a comma, semi-colon, pipe, etc, but only one character.
64// If you provide multiple parts for the URL they will be joined together to the final URL.
65// GetCSV returns nil or a slice slice to use in a short code.
66func (ns *Namespace) GetCSV(sep string, args ...interface{}) (d [][]string, err error) {
67	url, headers := toURLAndHeaders(args)
68	cache := ns.cacheGetCSV
69
70	unmarshal := func(b []byte) (bool, error) {
71		if d, err = parseCSV(b, sep); err != nil {
72			err = _errors.Wrapf(err, "failed to parse CSV file %s", url)
73
74			return true, err
75		}
76
77		return false, nil
78	}
79
80	var req *http.Request
81	req, err = http.NewRequest("GET", url, nil)
82	if err != nil {
83		return nil, _errors.Wrapf(err, "failed to create request for getCSV for resource %s", url)
84	}
85
86	// Add custom user headers.
87	addUserProvidedHeaders(headers, req)
88	addDefaultHeaders(req, "text/csv", "text/plain")
89
90	err = ns.getResource(cache, unmarshal, req)
91	if err != nil {
92		if security.IsAccessDenied(err) {
93			return nil, err
94		}
95		ns.deps.Log.(loggers.IgnorableLogger).Errorsf(constants.ErrRemoteGetCSV, "Failed to get CSV resource %q: %s", url, err)
96		return nil, nil
97	}
98
99	return
100}
101
102// GetJSON expects one or n-parts of a URL to a resource which can either be a local or a remote one.
103// If you provide multiple parts they will be joined together to the final URL.
104// GetJSON returns nil or parsed JSON to use in a short code.
105func (ns *Namespace) GetJSON(args ...interface{}) (interface{}, error) {
106	var v interface{}
107	url, headers := toURLAndHeaders(args)
108	cache := ns.cacheGetJSON
109
110	req, err := http.NewRequest("GET", url, nil)
111	if err != nil {
112		return nil, _errors.Wrapf(err, "Failed to create request for getJSON resource %s", url)
113	}
114
115	unmarshal := func(b []byte) (bool, error) {
116		err := json.Unmarshal(b, &v)
117		if err != nil {
118			return true, err
119		}
120		return false, nil
121	}
122
123	addUserProvidedHeaders(headers, req)
124	addDefaultHeaders(req, "application/json")
125
126	err = ns.getResource(cache, unmarshal, req)
127	if err != nil {
128		if security.IsAccessDenied(err) {
129			return nil, err
130		}
131		ns.deps.Log.(loggers.IgnorableLogger).Errorsf(constants.ErrRemoteGetJSON, "Failed to get JSON resource %q: %s", url, err)
132		return nil, nil
133	}
134
135	return v, nil
136}
137
138func addDefaultHeaders(req *http.Request, accepts ...string) {
139	for _, accept := range accepts {
140		if !hasHeaderValue(req.Header, "Accept", accept) {
141			req.Header.Add("Accept", accept)
142		}
143	}
144	if !hasHeaderKey(req.Header, "User-Agent") {
145		req.Header.Add("User-Agent", "Hugo Static Site Generator")
146	}
147}
148
149func addUserProvidedHeaders(headers map[string]interface{}, req *http.Request) {
150	if headers == nil {
151		return
152	}
153	for key, val := range headers {
154		vals := types.ToStringSlicePreserveString(val)
155		for _, s := range vals {
156			req.Header.Add(key, s)
157		}
158	}
159}
160
161func hasHeaderValue(m http.Header, key, value string) bool {
162	var s []string
163	var ok bool
164
165	if s, ok = m[key]; !ok {
166		return false
167	}
168
169	for _, v := range s {
170		if v == value {
171			return true
172		}
173	}
174	return false
175}
176
177func hasHeaderKey(m http.Header, key string) bool {
178	_, ok := m[key]
179	return ok
180}
181
182func toURLAndHeaders(urlParts []interface{}) (string, map[string]interface{}) {
183	if len(urlParts) == 0 {
184		return "", nil
185	}
186
187	// The last argument may be a map.
188	headers, err := maps.ToStringMapE(urlParts[len(urlParts)-1])
189	if err == nil {
190		urlParts = urlParts[:len(urlParts)-1]
191	} else {
192		headers = nil
193	}
194
195	return strings.Join(cast.ToStringSlice(urlParts), ""), headers
196}
197
198// parseCSV parses bytes of CSV data into a slice slice string or an error
199func parseCSV(c []byte, sep string) ([][]string, error) {
200	if len(sep) != 1 {
201		return nil, errors.New("Incorrect length of CSV separator: " + sep)
202	}
203	b := bytes.NewReader(c)
204	r := csv.NewReader(b)
205	rSep := []rune(sep)
206	r.Comma = rSep[0]
207	r.FieldsPerRecord = 0
208	return r.ReadAll()
209}
210