1package data
2
3import (
4	"bytes"
5	"context"
6	"encoding/json"
7	"fmt"
8	"io/ioutil"
9	"net/url"
10	"os"
11	"path"
12	"path/filepath"
13	"strings"
14
15	"github.com/hairyhenderson/gomplate/v3/base64"
16	"github.com/hairyhenderson/gomplate/v3/env"
17
18	"github.com/go-git/go-billy/v5"
19	"github.com/go-git/go-billy/v5/memfs"
20	"github.com/go-git/go-git/v5"
21	"github.com/go-git/go-git/v5/plumbing"
22	"github.com/go-git/go-git/v5/plumbing/transport"
23	"github.com/go-git/go-git/v5/plumbing/transport/http"
24	"github.com/go-git/go-git/v5/plumbing/transport/ssh"
25	"github.com/go-git/go-git/v5/storage/memory"
26)
27
28func readGit(source *Source, args ...string) ([]byte, error) {
29	ctx := context.TODO()
30	g := gitsource{}
31
32	u := source.URL
33	repoURL, path, err := g.parseGitPath(u, args...)
34	if err != nil {
35		return nil, err
36	}
37
38	depth := 1
39	if u.Scheme == "git+file" {
40		// we can't do shallow clones for filesystem repos apparently
41		depth = 0
42	}
43
44	fs, _, err := g.clone(ctx, repoURL, depth)
45	if err != nil {
46		return nil, err
47	}
48
49	mimeType, out, err := g.read(fs, path)
50	if mimeType != "" {
51		source.mediaType = mimeType
52	}
53	return out, err
54}
55
56type gitsource struct {
57}
58
59func (g gitsource) parseArgURL(arg string) (u *url.URL, err error) {
60	if strings.HasPrefix(arg, "//") {
61		u, err = url.Parse(arg[1:])
62		u.Path = "/" + u.Path
63	} else {
64		u, err = url.Parse(arg)
65	}
66
67	if err != nil {
68		return nil, fmt.Errorf("failed to parse arg %s: %w", arg, err)
69	}
70	return u, err
71}
72
73func (g gitsource) parseQuery(orig, arg *url.URL) string {
74	q := orig.Query()
75	pq := arg.Query()
76	for k, vs := range pq {
77		for _, v := range vs {
78			q.Add(k, v)
79		}
80	}
81	return q.Encode()
82}
83
84func (g gitsource) parseArgPath(u *url.URL, arg string) (repo, p string) {
85	// if the source URL already specified a repo and subpath, the whole
86	// arg is interpreted as subpath
87	if strings.Contains(u.Path, "//") || strings.HasPrefix(arg, "//") {
88		return "", arg
89	}
90
91	parts := strings.SplitN(arg, "//", 2)
92	repo = parts[0]
93	if len(parts) == 2 {
94		p = "/" + parts[1]
95	}
96	return repo, p
97}
98
99// Massage the URL and args together to produce the repo to clone,
100// and the path to read.
101// The path is delimited from the repo by '//'
102func (g gitsource) parseGitPath(u *url.URL, args ...string) (out *url.URL, p string, err error) {
103	if u == nil {
104		return nil, "", fmt.Errorf("parseGitPath: no url provided (%v)", u)
105	}
106	// copy the input url so we can modify it
107	out = cloneURL(u)
108
109	parts := strings.SplitN(out.Path, "//", 2)
110	switch len(parts) {
111	case 1:
112		p = "/"
113	case 2:
114		p = "/" + parts[1]
115
116		i := strings.LastIndex(out.Path, p)
117		out.Path = out.Path[:i-1]
118	}
119
120	if len(args) > 0 {
121		argURL, uerr := g.parseArgURL(args[0])
122		if uerr != nil {
123			return nil, "", uerr
124		}
125		repo, argpath := g.parseArgPath(u, argURL.Path)
126		out.Path = path.Join(out.Path, repo)
127		p = path.Join(p, argpath)
128
129		out.RawQuery = g.parseQuery(u, argURL)
130
131		if argURL.Fragment != "" {
132			out.Fragment = argURL.Fragment
133		}
134	}
135	return out, p, err
136}
137
138//nolint: interfacer
139func cloneURL(u *url.URL) *url.URL {
140	out, _ := url.Parse(u.String())
141	return out
142}
143
144func (g gitsource) refFromURL(u *url.URL) plumbing.ReferenceName {
145	switch {
146	case strings.HasPrefix(u.Fragment, "refs/"):
147		return plumbing.ReferenceName(u.Fragment)
148	case u.Fragment != "":
149		return plumbing.NewBranchReferenceName(u.Fragment)
150	default:
151		return plumbing.ReferenceName("")
152	}
153}
154
155// clone a repo for later reading through http(s), git, or ssh. u must be the URL to the repo
156// itself, and must have any file path stripped
157func (g gitsource) clone(ctx context.Context, repoURL *url.URL, depth int) (billy.Filesystem, *git.Repository, error) {
158	fs := memfs.New()
159	storer := memory.NewStorage()
160
161	// preserve repoURL by cloning it
162	u := cloneURL(repoURL)
163
164	auth, err := g.auth(u)
165	if err != nil {
166		return nil, nil, err
167	}
168
169	if strings.HasPrefix(u.Scheme, "git+") {
170		scheme := u.Scheme[len("git+"):]
171		u.Scheme = scheme
172	}
173
174	ref := g.refFromURL(u)
175	u.Fragment = ""
176	u.RawQuery = ""
177
178	opts := &git.CloneOptions{
179		URL:           u.String(),
180		Auth:          auth,
181		Depth:         depth,
182		ReferenceName: ref,
183		SingleBranch:  true,
184		Tags:          git.NoTags,
185	}
186	repo, err := git.CloneContext(ctx, storer, fs, opts)
187	if u.Scheme == "file" && err == transport.ErrRepositoryNotFound && !strings.HasSuffix(u.Path, ".git") {
188		// maybe this has a `.git` subdirectory...
189		u = cloneURL(repoURL)
190		u.Path = path.Join(u.Path, ".git")
191		return g.clone(ctx, u, depth)
192	}
193	if err != nil {
194		return nil, nil, fmt.Errorf("git clone for %v failed: %w", repoURL, err)
195	}
196	return fs, repo, nil
197}
198
199// read - reads the provided path out of a git repo
200func (g gitsource) read(fs billy.Filesystem, path string) (string, []byte, error) {
201	fi, err := fs.Stat(path)
202	if err != nil {
203		return "", nil, fmt.Errorf("can't stat %s: %w", path, err)
204	}
205	if fi.IsDir() || strings.HasSuffix(path, string(filepath.Separator)) {
206		out, rerr := g.readDir(fs, path)
207		return jsonArrayMimetype, out, rerr
208	}
209
210	f, err := fs.OpenFile(path, os.O_RDONLY, 0)
211	if err != nil {
212		return "", nil, fmt.Errorf("can't open %s: %w", path, err)
213	}
214
215	b, err := ioutil.ReadAll(f)
216	if err != nil {
217		return "", nil, fmt.Errorf("can't read %s: %w", path, err)
218	}
219
220	return "", b, nil
221}
222
223func (g gitsource) readDir(fs billy.Filesystem, path string) ([]byte, error) {
224	names, err := fs.ReadDir(path)
225	if err != nil {
226		return nil, fmt.Errorf("couldn't read dir %s: %w", path, err)
227	}
228	files := make([]string, len(names))
229	for i, v := range names {
230		files[i] = v.Name()
231	}
232
233	var buf bytes.Buffer
234	enc := json.NewEncoder(&buf)
235	if err := enc.Encode(files); err != nil {
236		return nil, err
237	}
238	b := buf.Bytes()
239	// chop off the newline added by the json encoder
240	return b[:len(b)-1], nil
241}
242
243/*
244auth methods:
245- ssh named key (no password support)
246	- GIT_SSH_KEY (base64-encoded) or GIT_SSH_KEY_FILE (base64-encoded, or not)
247- ssh agent auth (preferred)
248- http basic auth (for github, gitlab, bitbucket tokens)
249- http token auth (bearer token, somewhat unusual)
250*/
251func (g gitsource) auth(u *url.URL) (auth transport.AuthMethod, err error) {
252	user := u.User.Username()
253	switch u.Scheme {
254	case "git+http", "git+https":
255		if pass, ok := u.User.Password(); ok {
256			auth = &http.BasicAuth{Username: user, Password: pass}
257		} else if pass := env.Getenv("GIT_HTTP_PASSWORD"); pass != "" {
258			auth = &http.BasicAuth{Username: user, Password: pass}
259		} else if tok := env.Getenv("GIT_HTTP_TOKEN"); tok != "" {
260			// note docs on TokenAuth - this is rarely to be used
261			auth = &http.TokenAuth{Token: tok}
262		}
263	case "git+ssh":
264		k := env.Getenv("GIT_SSH_KEY")
265		if k != "" {
266			var key []byte
267			key, err = base64.Decode(k)
268			if err != nil {
269				key = []byte(k)
270			}
271			auth, err = ssh.NewPublicKeys(user, key, "")
272		} else {
273			auth, err = ssh.NewSSHAgentAuth(user)
274		}
275	}
276	return auth, err
277}
278