1// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package codehost defines the interface implemented by a code hosting source,
6// along with support code for use by implementations.
7package codehost
8
9import (
10	"bytes"
11	"crypto/sha256"
12	"fmt"
13	exec "internal/execabs"
14	"io"
15	"io/fs"
16	"os"
17	"path/filepath"
18	"strings"
19	"sync"
20	"time"
21
22	"cmd/go/internal/cfg"
23	"cmd/go/internal/lockedfile"
24	"cmd/go/internal/str"
25)
26
27// Downloaded size limits.
28const (
29	MaxGoMod   = 16 << 20  // maximum size of go.mod file
30	MaxLICENSE = 16 << 20  // maximum size of LICENSE file
31	MaxZipFile = 500 << 20 // maximum size of downloaded zip file
32)
33
34// A Repo represents a code hosting source.
35// Typical implementations include local version control repositories,
36// remote version control servers, and code hosting sites.
37// A Repo must be safe for simultaneous use by multiple goroutines.
38type Repo interface {
39	// List lists all tags with the given prefix.
40	Tags(prefix string) (tags []string, err error)
41
42	// Stat returns information about the revision rev.
43	// A revision can be any identifier known to the underlying service:
44	// commit hash, branch, tag, and so on.
45	Stat(rev string) (*RevInfo, error)
46
47	// Latest returns the latest revision on the default branch,
48	// whatever that means in the underlying implementation.
49	Latest() (*RevInfo, error)
50
51	// ReadFile reads the given file in the file tree corresponding to revision rev.
52	// It should refuse to read more than maxSize bytes.
53	//
54	// If the requested file does not exist it should return an error for which
55	// os.IsNotExist(err) returns true.
56	ReadFile(rev, file string, maxSize int64) (data []byte, err error)
57
58	// ReadFileRevs reads a single file at multiple versions.
59	// It should refuse to read more than maxSize bytes.
60	// The result is a map from each requested rev strings
61	// to the associated FileRev. The map must have a non-nil
62	// entry for every requested rev (unless ReadFileRevs returned an error).
63	// A file simply being missing or even corrupted in revs[i]
64	// should be reported only in files[revs[i]].Err, not in the error result
65	// from ReadFileRevs.
66	// The overall call should return an error (and no map) only
67	// in the case of a problem with obtaining the data, such as
68	// a network failure.
69	// Implementations may assume that revs only contain tags,
70	// not direct commit hashes.
71	ReadFileRevs(revs []string, file string, maxSize int64) (files map[string]*FileRev, err error)
72
73	// ReadZip downloads a zip file for the subdir subdirectory
74	// of the given revision to a new file in a given temporary directory.
75	// It should refuse to read more than maxSize bytes.
76	// It returns a ReadCloser for a streamed copy of the zip file.
77	// All files in the zip file are expected to be
78	// nested in a single top-level directory, whose name is not specified.
79	ReadZip(rev, subdir string, maxSize int64) (zip io.ReadCloser, err error)
80
81	// RecentTag returns the most recent tag on rev or one of its predecessors
82	// with the given prefix. allowed may be used to filter out unwanted versions.
83	RecentTag(rev, prefix string, allowed func(string) bool) (tag string, err error)
84
85	// DescendsFrom reports whether rev or any of its ancestors has the given tag.
86	//
87	// DescendsFrom must return true for any tag returned by RecentTag for the
88	// same revision.
89	DescendsFrom(rev, tag string) (bool, error)
90}
91
92// A Rev describes a single revision in a source code repository.
93type RevInfo struct {
94	Name    string    // complete ID in underlying repository
95	Short   string    // shortened ID, for use in pseudo-version
96	Version string    // version used in lookup
97	Time    time.Time // commit time
98	Tags    []string  // known tags for commit
99}
100
101// A FileRev describes the result of reading a file at a given revision.
102type FileRev struct {
103	Rev  string // requested revision
104	Data []byte // file data
105	Err  error  // error if any; os.IsNotExist(Err)==true if rev exists but file does not exist in that rev
106}
107
108// UnknownRevisionError is an error equivalent to fs.ErrNotExist, but for a
109// revision rather than a file.
110type UnknownRevisionError struct {
111	Rev string
112}
113
114func (e *UnknownRevisionError) Error() string {
115	return "unknown revision " + e.Rev
116}
117func (UnknownRevisionError) Is(err error) bool {
118	return err == fs.ErrNotExist
119}
120
121// ErrNoCommits is an error equivalent to fs.ErrNotExist indicating that a given
122// repository or module contains no commits.
123var ErrNoCommits error = noCommitsError{}
124
125type noCommitsError struct{}
126
127func (noCommitsError) Error() string {
128	return "no commits"
129}
130func (noCommitsError) Is(err error) bool {
131	return err == fs.ErrNotExist
132}
133
134// AllHex reports whether the revision rev is entirely lower-case hexadecimal digits.
135func AllHex(rev string) bool {
136	for i := 0; i < len(rev); i++ {
137		c := rev[i]
138		if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' {
139			continue
140		}
141		return false
142	}
143	return true
144}
145
146// ShortenSHA1 shortens a SHA1 hash (40 hex digits) to the canonical length
147// used in pseudo-versions (12 hex digits).
148func ShortenSHA1(rev string) string {
149	if AllHex(rev) && len(rev) == 40 {
150		return rev[:12]
151	}
152	return rev
153}
154
155// WorkDir returns the name of the cached work directory to use for the
156// given repository type and name.
157func WorkDir(typ, name string) (dir, lockfile string, err error) {
158	if cfg.GOMODCACHE == "" {
159		return "", "", fmt.Errorf("neither GOPATH nor GOMODCACHE are set")
160	}
161
162	// We name the work directory for the SHA256 hash of the type and name.
163	// We intentionally avoid the actual name both because of possible
164	// conflicts with valid file system paths and because we want to ensure
165	// that one checkout is never nested inside another. That nesting has
166	// led to security problems in the past.
167	if strings.Contains(typ, ":") {
168		return "", "", fmt.Errorf("codehost.WorkDir: type cannot contain colon")
169	}
170	key := typ + ":" + name
171	dir = filepath.Join(cfg.GOMODCACHE, "cache/vcs", fmt.Sprintf("%x", sha256.Sum256([]byte(key))))
172
173	if cfg.BuildX {
174		fmt.Fprintf(os.Stderr, "mkdir -p %s # %s %s\n", filepath.Dir(dir), typ, name)
175	}
176	if err := os.MkdirAll(filepath.Dir(dir), 0777); err != nil {
177		return "", "", err
178	}
179
180	lockfile = dir + ".lock"
181	if cfg.BuildX {
182		fmt.Fprintf(os.Stderr, "# lock %s", lockfile)
183	}
184
185	unlock, err := lockedfile.MutexAt(lockfile).Lock()
186	if err != nil {
187		return "", "", fmt.Errorf("codehost.WorkDir: can't find or create lock file: %v", err)
188	}
189	defer unlock()
190
191	data, err := os.ReadFile(dir + ".info")
192	info, err2 := os.Stat(dir)
193	if err == nil && err2 == nil && info.IsDir() {
194		// Info file and directory both already exist: reuse.
195		have := strings.TrimSuffix(string(data), "\n")
196		if have != key {
197			return "", "", fmt.Errorf("%s exists with wrong content (have %q want %q)", dir+".info", have, key)
198		}
199		if cfg.BuildX {
200			fmt.Fprintf(os.Stderr, "# %s for %s %s\n", dir, typ, name)
201		}
202		return dir, lockfile, nil
203	}
204
205	// Info file or directory missing. Start from scratch.
206	if cfg.BuildX {
207		fmt.Fprintf(os.Stderr, "mkdir -p %s # %s %s\n", dir, typ, name)
208	}
209	os.RemoveAll(dir)
210	if err := os.MkdirAll(dir, 0777); err != nil {
211		return "", "", err
212	}
213	if err := os.WriteFile(dir+".info", []byte(key), 0666); err != nil {
214		os.RemoveAll(dir)
215		return "", "", err
216	}
217	return dir, lockfile, nil
218}
219
220type RunError struct {
221	Cmd      string
222	Err      error
223	Stderr   []byte
224	HelpText string
225}
226
227func (e *RunError) Error() string {
228	text := e.Cmd + ": " + e.Err.Error()
229	stderr := bytes.TrimRight(e.Stderr, "\n")
230	if len(stderr) > 0 {
231		text += ":\n\t" + strings.ReplaceAll(string(stderr), "\n", "\n\t")
232	}
233	if len(e.HelpText) > 0 {
234		text += "\n" + e.HelpText
235	}
236	return text
237}
238
239var dirLock sync.Map
240
241// Run runs the command line in the given directory
242// (an empty dir means the current directory).
243// It returns the standard output and, for a non-zero exit,
244// a *RunError indicating the command, exit status, and standard error.
245// Standard error is unavailable for commands that exit successfully.
246func Run(dir string, cmdline ...interface{}) ([]byte, error) {
247	return RunWithStdin(dir, nil, cmdline...)
248}
249
250// bashQuoter escapes characters that have special meaning in double-quoted strings in the bash shell.
251// See https://www.gnu.org/software/bash/manual/html_node/Double-Quotes.html.
252var bashQuoter = strings.NewReplacer(`"`, `\"`, `$`, `\$`, "`", "\\`", `\`, `\\`)
253
254func RunWithStdin(dir string, stdin io.Reader, cmdline ...interface{}) ([]byte, error) {
255	if dir != "" {
256		muIface, ok := dirLock.Load(dir)
257		if !ok {
258			muIface, _ = dirLock.LoadOrStore(dir, new(sync.Mutex))
259		}
260		mu := muIface.(*sync.Mutex)
261		mu.Lock()
262		defer mu.Unlock()
263	}
264
265	cmd := str.StringList(cmdline...)
266	if os.Getenv("TESTGOVCS") == "panic" {
267		panic(fmt.Sprintf("use of vcs: %v", cmd))
268	}
269	if cfg.BuildX {
270		text := new(strings.Builder)
271		if dir != "" {
272			text.WriteString("cd ")
273			text.WriteString(dir)
274			text.WriteString("; ")
275		}
276		for i, arg := range cmd {
277			if i > 0 {
278				text.WriteByte(' ')
279			}
280			switch {
281			case strings.ContainsAny(arg, "'"):
282				// Quote args that could be mistaken for quoted args.
283				text.WriteByte('"')
284				text.WriteString(bashQuoter.Replace(arg))
285				text.WriteByte('"')
286			case strings.ContainsAny(arg, "$`\\*?[\"\t\n\v\f\r \u0085\u00a0"):
287				// Quote args that contain special characters, glob patterns, or spaces.
288				text.WriteByte('\'')
289				text.WriteString(arg)
290				text.WriteByte('\'')
291			default:
292				text.WriteString(arg)
293			}
294		}
295		fmt.Fprintf(os.Stderr, "%s\n", text)
296		start := time.Now()
297		defer func() {
298			fmt.Fprintf(os.Stderr, "%.3fs # %s\n", time.Since(start).Seconds(), text)
299		}()
300	}
301	// TODO: Impose limits on command output size.
302	// TODO: Set environment to get English error messages.
303	var stderr bytes.Buffer
304	var stdout bytes.Buffer
305	c := exec.Command(cmd[0], cmd[1:]...)
306	c.Dir = dir
307	c.Stdin = stdin
308	c.Stderr = &stderr
309	c.Stdout = &stdout
310	err := c.Run()
311	if err != nil {
312		err = &RunError{Cmd: strings.Join(cmd, " ") + " in " + dir, Stderr: stderr.Bytes(), Err: err}
313	}
314	return stdout.Bytes(), err
315}
316