1// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package modfetch
6
7import (
8	"fmt"
9	"io"
10	"io/fs"
11	"os"
12	"sort"
13	"strconv"
14	"time"
15
16	"cmd/go/internal/cfg"
17	"cmd/go/internal/modfetch/codehost"
18	"cmd/go/internal/par"
19	"cmd/go/internal/vcs"
20	web "cmd/go/internal/web"
21
22	"golang.org/x/mod/module"
23	"golang.org/x/mod/semver"
24)
25
26const traceRepo = false // trace all repo actions, for debugging
27
28// A Repo represents a repository storing all versions of a single module.
29// It must be safe for simultaneous use by multiple goroutines.
30type Repo interface {
31	// ModulePath returns the module path.
32	ModulePath() string
33
34	// Versions lists all known versions with the given prefix.
35	// Pseudo-versions are not included.
36	//
37	// Versions should be returned sorted in semver order
38	// (implementations can use SortVersions).
39	//
40	// Versions returns a non-nil error only if there was a problem
41	// fetching the list of versions: it may return an empty list
42	// along with a nil error if the list of matching versions
43	// is known to be empty.
44	//
45	// If the underlying repository does not exist,
46	// Versions returns an error matching errors.Is(_, os.NotExist).
47	Versions(prefix string) ([]string, error)
48
49	// Stat returns information about the revision rev.
50	// A revision can be any identifier known to the underlying service:
51	// commit hash, branch, tag, and so on.
52	Stat(rev string) (*RevInfo, error)
53
54	// Latest returns the latest revision on the default branch,
55	// whatever that means in the underlying source code repository.
56	// It is only used when there are no tagged versions.
57	Latest() (*RevInfo, error)
58
59	// GoMod returns the go.mod file for the given version.
60	GoMod(version string) (data []byte, err error)
61
62	// Zip writes a zip file for the given version to dst.
63	Zip(dst io.Writer, version string) error
64}
65
66// A Rev describes a single revision in a module repository.
67type RevInfo struct {
68	Version string    // suggested version string for this revision
69	Time    time.Time // commit time
70
71	// These fields are used for Stat of arbitrary rev,
72	// but they are not recorded when talking about module versions.
73	Name  string `json:"-"` // complete ID in underlying repository
74	Short string `json:"-"` // shortened ID, for use in pseudo-version
75}
76
77// Re: module paths, import paths, repository roots, and lookups
78//
79// A module is a collection of Go packages stored in a file tree
80// with a go.mod file at the root of the tree.
81// The go.mod defines the module path, which is the import path
82// corresponding to the root of the file tree.
83// The import path of a directory within that file tree is the module path
84// joined with the name of the subdirectory relative to the root.
85//
86// For example, the module with path rsc.io/qr corresponds to the
87// file tree in the repository https://github.com/rsc/qr.
88// That file tree has a go.mod that says "module rsc.io/qr".
89// The package in the root directory has import path "rsc.io/qr".
90// The package in the gf256 subdirectory has import path "rsc.io/qr/gf256".
91// In this example, "rsc.io/qr" is both a module path and an import path.
92// But "rsc.io/qr/gf256" is only an import path, not a module path:
93// it names an importable package, but not a module.
94//
95// As a special case to incorporate code written before modules were
96// introduced, if a path p resolves using the pre-module "go get" lookup
97// to the root of a source code repository without a go.mod file,
98// that repository is treated as if it had a go.mod in its root directory
99// declaring module path p. (The go.mod is further considered to
100// contain requirements corresponding to any legacy version
101// tracking format such as Gopkg.lock, vendor/vendor.conf, and so on.)
102//
103// The presentation so far ignores the fact that a source code repository
104// has many different versions of a file tree, and those versions may
105// differ in whether a particular go.mod exists and what it contains.
106// In fact there is a well-defined mapping only from a module path, version
107// pair - often written path@version - to a particular file tree.
108// For example rsc.io/qr@v0.1.0 depends on the "implicit go.mod at root of
109// repository" rule, while rsc.io/qr@v0.2.0 has an explicit go.mod.
110// Because the "go get" import paths rsc.io/qr and github.com/rsc/qr
111// both redirect to the Git repository https://github.com/rsc/qr,
112// github.com/rsc/qr@v0.1.0 is the same file tree as rsc.io/qr@v0.1.0
113// but a different module (a different name). In contrast, since v0.2.0
114// of that repository has an explicit go.mod that declares path rsc.io/qr,
115// github.com/rsc/qr@v0.2.0 is an invalid module path, version pair.
116// Before modules, import comments would have had the same effect.
117//
118// The set of import paths associated with a given module path is
119// clearly not fixed: at the least, new directories with new import paths
120// can always be added. But another potential operation is to split a
121// subtree out of a module into its own module. If done carefully,
122// this operation can be done while preserving compatibility for clients.
123// For example, suppose that we want to split rsc.io/qr/gf256 into its
124// own module, so that there would be two modules rsc.io/qr and rsc.io/qr/gf256.
125// Then we can simultaneously issue rsc.io/qr v0.3.0 (dropping the gf256 subdirectory)
126// and rsc.io/qr/gf256 v0.1.0, including in their respective go.mod
127// cyclic requirements pointing at each other: rsc.io/qr v0.3.0 requires
128// rsc.io/qr/gf256 v0.1.0 and vice versa. Then a build can be
129// using an older rsc.io/qr module that includes the gf256 package, but if
130// it adds a requirement on either the newer rsc.io/qr or the newer
131// rsc.io/qr/gf256 module, it will automatically add the requirement
132// on the complementary half, ensuring both that rsc.io/qr/gf256 is
133// available for importing by the build and also that it is only defined
134// by a single module. The gf256 package could move back into the
135// original by another simultaneous release of rsc.io/qr v0.4.0 including
136// the gf256 subdirectory and an rsc.io/qr/gf256 v0.2.0 with no code
137// in its root directory, along with a new requirement cycle.
138// The ability to shift module boundaries in this way is expected to be
139// important in large-scale program refactorings, similar to the ones
140// described in https://talks.golang.org/2016/refactor.article.
141//
142// The possibility of shifting module boundaries reemphasizes
143// that you must know both the module path and its version
144// to determine the set of packages provided directly by that module.
145//
146// On top of all this, it is possible for a single code repository
147// to contain multiple modules, either in branches or subdirectories,
148// as a limited kind of monorepo. For example rsc.io/qr/v2,
149// the v2.x.x continuation of rsc.io/qr, is expected to be found
150// in v2-tagged commits in https://github.com/rsc/qr, either
151// in the root or in a v2 subdirectory, disambiguated by go.mod.
152// Again the precise file tree corresponding to a module
153// depends on which version we are considering.
154//
155// It is also possible for the underlying repository to change over time,
156// without changing the module path. If I copy the github repo over
157// to https://bitbucket.org/rsc/qr and update https://rsc.io/qr?go-get=1,
158// then clients of all versions should start fetching from bitbucket
159// instead of github. That is, in contrast to the exact file tree,
160// the location of the source code repository associated with a module path
161// does not depend on the module version. (This is by design, as the whole
162// point of these redirects is to allow package authors to establish a stable
163// name that can be updated as code moves from one service to another.)
164//
165// All of this is important background for the lookup APIs defined in this
166// file.
167//
168// The Lookup function takes a module path and returns a Repo representing
169// that module path. Lookup can do only a little with the path alone.
170// It can check that the path is well-formed (see semver.CheckPath)
171// and it can check that the path can be resolved to a target repository.
172// To avoid version control access except when absolutely necessary,
173// Lookup does not attempt to connect to the repository itself.
174//
175// The ImportRepoRev function is a variant of Import which is limited
176// to code in a source code repository at a particular revision identifier
177// (usually a commit hash or source code repository tag, not necessarily
178// a module version).
179// ImportRepoRev is used when converting legacy dependency requirements
180// from older systems into go.mod files. Those older systems worked
181// at either package or repository granularity, and most of the time they
182// recorded commit hashes, not tagged versions.
183
184var lookupCache par.Cache
185
186type lookupCacheKey struct {
187	proxy, path string
188}
189
190// Lookup returns the module with the given module path,
191// fetched through the given proxy.
192//
193// The distinguished proxy "direct" indicates that the path should be fetched
194// from its origin, and "noproxy" indicates that the patch should be fetched
195// directly only if GONOPROXY matches the given path.
196//
197// For the distinguished proxy "off", Lookup always returns a non-nil error.
198//
199// A successful return does not guarantee that the module
200// has any defined versions.
201func Lookup(proxy, path string) Repo {
202	if traceRepo {
203		defer logCall("Lookup(%q, %q)", proxy, path)()
204	}
205
206	type cached struct {
207		r Repo
208	}
209	c := lookupCache.Do(lookupCacheKey{proxy, path}, func() interface{} {
210		r := newCachingRepo(path, func() (Repo, error) {
211			r, err := lookup(proxy, path)
212			if err == nil && traceRepo {
213				r = newLoggingRepo(r)
214			}
215			return r, err
216		})
217		return cached{r}
218	}).(cached)
219
220	return c.r
221}
222
223// lookup returns the module with the given module path.
224func lookup(proxy, path string) (r Repo, err error) {
225	if cfg.BuildMod == "vendor" {
226		return nil, errLookupDisabled
227	}
228
229	if module.MatchPrefixPatterns(cfg.GONOPROXY, path) {
230		switch proxy {
231		case "noproxy", "direct":
232			return lookupDirect(path)
233		default:
234			return nil, errNoproxy
235		}
236	}
237
238	switch proxy {
239	case "off":
240		return errRepo{path, errProxyOff}, nil
241	case "direct":
242		return lookupDirect(path)
243	case "noproxy":
244		return nil, errUseProxy
245	default:
246		return newProxyRepo(proxy, path)
247	}
248}
249
250type lookupDisabledError struct{}
251
252func (lookupDisabledError) Error() string {
253	if cfg.BuildModReason == "" {
254		return fmt.Sprintf("module lookup disabled by -mod=%s", cfg.BuildMod)
255	}
256	return fmt.Sprintf("module lookup disabled by -mod=%s\n\t(%s)", cfg.BuildMod, cfg.BuildModReason)
257}
258
259var errLookupDisabled error = lookupDisabledError{}
260
261var (
262	errProxyOff       = notExistErrorf("module lookup disabled by GOPROXY=off")
263	errNoproxy  error = notExistErrorf("disabled by GOPRIVATE/GONOPROXY")
264	errUseProxy error = notExistErrorf("path does not match GOPRIVATE/GONOPROXY")
265)
266
267func lookupDirect(path string) (Repo, error) {
268	security := web.SecureOnly
269
270	if allowInsecure(path) {
271		security = web.Insecure
272	}
273	rr, err := vcs.RepoRootForImportPath(path, vcs.PreferMod, security)
274	if err != nil {
275		// We don't know where to find code for a module with this path.
276		return nil, notExistError{err: err}
277	}
278
279	if rr.VCS.Name == "mod" {
280		// Fetch module from proxy with base URL rr.Repo.
281		return newProxyRepo(rr.Repo, path)
282	}
283
284	code, err := lookupCodeRepo(rr)
285	if err != nil {
286		return nil, err
287	}
288	return newCodeRepo(code, rr.Root, path)
289}
290
291func lookupCodeRepo(rr *vcs.RepoRoot) (codehost.Repo, error) {
292	code, err := codehost.NewRepo(rr.VCS.Cmd, rr.Repo)
293	if err != nil {
294		if _, ok := err.(*codehost.VCSError); ok {
295			return nil, err
296		}
297		return nil, fmt.Errorf("lookup %s: %v", rr.Root, err)
298	}
299	return code, nil
300}
301
302// ImportRepoRev returns the module and version to use to access
303// the given import path loaded from the source code repository that
304// the original "go get" would have used, at the specific repository revision
305// (typically a commit hash, but possibly also a source control tag).
306func ImportRepoRev(path, rev string) (Repo, *RevInfo, error) {
307	if cfg.BuildMod == "vendor" || cfg.BuildMod == "readonly" {
308		return nil, nil, fmt.Errorf("repo version lookup disabled by -mod=%s", cfg.BuildMod)
309	}
310
311	// Note: Because we are converting a code reference from a legacy
312	// version control system, we ignore meta tags about modules
313	// and use only direct source control entries (get.IgnoreMod).
314	security := web.SecureOnly
315	if allowInsecure(path) {
316		security = web.Insecure
317	}
318	rr, err := vcs.RepoRootForImportPath(path, vcs.IgnoreMod, security)
319	if err != nil {
320		return nil, nil, err
321	}
322
323	code, err := lookupCodeRepo(rr)
324	if err != nil {
325		return nil, nil, err
326	}
327
328	revInfo, err := code.Stat(rev)
329	if err != nil {
330		return nil, nil, err
331	}
332
333	// TODO: Look in repo to find path, check for go.mod files.
334	// For now we're just assuming rr.Root is the module path,
335	// which is true in the absence of go.mod files.
336
337	repo, err := newCodeRepo(code, rr.Root, rr.Root)
338	if err != nil {
339		return nil, nil, err
340	}
341
342	info, err := repo.(*codeRepo).convert(revInfo, rev)
343	if err != nil {
344		return nil, nil, err
345	}
346	return repo, info, nil
347}
348
349func SortVersions(list []string) {
350	sort.Slice(list, func(i, j int) bool {
351		cmp := semver.Compare(list[i], list[j])
352		if cmp != 0 {
353			return cmp < 0
354		}
355		return list[i] < list[j]
356	})
357}
358
359// A loggingRepo is a wrapper around an underlying Repo
360// that prints a log message at the start and end of each call.
361// It can be inserted when debugging.
362type loggingRepo struct {
363	r Repo
364}
365
366func newLoggingRepo(r Repo) *loggingRepo {
367	return &loggingRepo{r}
368}
369
370// logCall prints a log message using format and args and then
371// also returns a function that will print the same message again,
372// along with the elapsed time.
373// Typical usage is:
374//
375//	defer logCall("hello %s", arg)()
376//
377// Note the final ().
378func logCall(format string, args ...interface{}) func() {
379	start := time.Now()
380	fmt.Fprintf(os.Stderr, "+++ %s\n", fmt.Sprintf(format, args...))
381	return func() {
382		fmt.Fprintf(os.Stderr, "%.3fs %s\n", time.Since(start).Seconds(), fmt.Sprintf(format, args...))
383	}
384}
385
386func (l *loggingRepo) ModulePath() string {
387	return l.r.ModulePath()
388}
389
390func (l *loggingRepo) Versions(prefix string) (tags []string, err error) {
391	defer logCall("Repo[%s]: Versions(%q)", l.r.ModulePath(), prefix)()
392	return l.r.Versions(prefix)
393}
394
395func (l *loggingRepo) Stat(rev string) (*RevInfo, error) {
396	defer logCall("Repo[%s]: Stat(%q)", l.r.ModulePath(), rev)()
397	return l.r.Stat(rev)
398}
399
400func (l *loggingRepo) Latest() (*RevInfo, error) {
401	defer logCall("Repo[%s]: Latest()", l.r.ModulePath())()
402	return l.r.Latest()
403}
404
405func (l *loggingRepo) GoMod(version string) ([]byte, error) {
406	defer logCall("Repo[%s]: GoMod(%q)", l.r.ModulePath(), version)()
407	return l.r.GoMod(version)
408}
409
410func (l *loggingRepo) Zip(dst io.Writer, version string) error {
411	dstName := "_"
412	if dst, ok := dst.(interface{ Name() string }); ok {
413		dstName = strconv.Quote(dst.Name())
414	}
415	defer logCall("Repo[%s]: Zip(%s, %q)", l.r.ModulePath(), dstName, version)()
416	return l.r.Zip(dst, version)
417}
418
419// errRepo is a Repo that returns the same error for all operations.
420//
421// It is useful in conjunction with caching, since cache hits will not attempt
422// the prohibited operations.
423type errRepo struct {
424	modulePath string
425	err        error
426}
427
428func (r errRepo) ModulePath() string { return r.modulePath }
429
430func (r errRepo) Versions(prefix string) (tags []string, err error) { return nil, r.err }
431func (r errRepo) Stat(rev string) (*RevInfo, error)                 { return nil, r.err }
432func (r errRepo) Latest() (*RevInfo, error)                         { return nil, r.err }
433func (r errRepo) GoMod(version string) ([]byte, error)              { return nil, r.err }
434func (r errRepo) Zip(dst io.Writer, version string) error           { return r.err }
435
436// A notExistError is like fs.ErrNotExist, but with a custom message
437type notExistError struct {
438	err error
439}
440
441func notExistErrorf(format string, args ...interface{}) error {
442	return notExistError{fmt.Errorf(format, args...)}
443}
444
445func (e notExistError) Error() string {
446	return e.err.Error()
447}
448
449func (notExistError) Is(target error) bool {
450	return target == fs.ErrNotExist
451}
452
453func (e notExistError) Unwrap() error {
454	return e.err
455}
456