1// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package modfetch
6
7import (
8	"fmt"
9	"io"
10	"os"
11	"sort"
12	"strconv"
13	"time"
14
15	"cmd/go/internal/cfg"
16	"cmd/go/internal/get"
17	"cmd/go/internal/modfetch/codehost"
18	"cmd/go/internal/par"
19	"cmd/go/internal/str"
20	web "cmd/go/internal/web"
21
22	"golang.org/x/mod/semver"
23)
24
25const traceRepo = false // trace all repo actions, for debugging
26
27// A Repo represents a repository storing all versions of a single module.
28// It must be safe for simultaneous use by multiple goroutines.
29type Repo interface {
30	// ModulePath returns the module path.
31	ModulePath() string
32
33	// Versions lists all known versions with the given prefix.
34	// Pseudo-versions are not included.
35	// Versions should be returned sorted in semver order
36	// (implementations can use SortVersions).
37	Versions(prefix string) ([]string, error)
38
39	// Stat returns information about the revision rev.
40	// A revision can be any identifier known to the underlying service:
41	// commit hash, branch, tag, and so on.
42	Stat(rev string) (*RevInfo, error)
43
44	// Latest returns the latest revision on the default branch,
45	// whatever that means in the underlying source code repository.
46	// It is only used when there are no tagged versions.
47	Latest() (*RevInfo, error)
48
49	// GoMod returns the go.mod file for the given version.
50	GoMod(version string) (data []byte, err error)
51
52	// Zip writes a zip file for the given version to dst.
53	Zip(dst io.Writer, version string) error
54}
55
56// A Rev describes a single revision in a module repository.
57type RevInfo struct {
58	Version string    // suggested version string for this revision
59	Time    time.Time // commit time
60
61	// These fields are used for Stat of arbitrary rev,
62	// but they are not recorded when talking about module versions.
63	Name  string `json:"-"` // complete ID in underlying repository
64	Short string `json:"-"` // shortened ID, for use in pseudo-version
65}
66
67// Re: module paths, import paths, repository roots, and lookups
68//
69// A module is a collection of Go packages stored in a file tree
70// with a go.mod file at the root of the tree.
71// The go.mod defines the module path, which is the import path
72// corresponding to the root of the file tree.
73// The import path of a directory within that file tree is the module path
74// joined with the name of the subdirectory relative to the root.
75//
76// For example, the module with path rsc.io/qr corresponds to the
77// file tree in the repository https://github.com/rsc/qr.
78// That file tree has a go.mod that says "module rsc.io/qr".
79// The package in the root directory has import path "rsc.io/qr".
80// The package in the gf256 subdirectory has import path "rsc.io/qr/gf256".
81// In this example, "rsc.io/qr" is both a module path and an import path.
82// But "rsc.io/qr/gf256" is only an import path, not a module path:
83// it names an importable package, but not a module.
84//
85// As a special case to incorporate code written before modules were
86// introduced, if a path p resolves using the pre-module "go get" lookup
87// to the root of a source code repository without a go.mod file,
88// that repository is treated as if it had a go.mod in its root directory
89// declaring module path p. (The go.mod is further considered to
90// contain requirements corresponding to any legacy version
91// tracking format such as Gopkg.lock, vendor/vendor.conf, and so on.)
92//
93// The presentation so far ignores the fact that a source code repository
94// has many different versions of a file tree, and those versions may
95// differ in whether a particular go.mod exists and what it contains.
96// In fact there is a well-defined mapping only from a module path, version
97// pair - often written path@version - to a particular file tree.
98// For example rsc.io/qr@v0.1.0 depends on the "implicit go.mod at root of
99// repository" rule, while rsc.io/qr@v0.2.0 has an explicit go.mod.
100// Because the "go get" import paths rsc.io/qr and github.com/rsc/qr
101// both redirect to the Git repository https://github.com/rsc/qr,
102// github.com/rsc/qr@v0.1.0 is the same file tree as rsc.io/qr@v0.1.0
103// but a different module (a different name). In contrast, since v0.2.0
104// of that repository has an explicit go.mod that declares path rsc.io/qr,
105// github.com/rsc/qr@v0.2.0 is an invalid module path, version pair.
106// Before modules, import comments would have had the same effect.
107//
108// The set of import paths associated with a given module path is
109// clearly not fixed: at the least, new directories with new import paths
110// can always be added. But another potential operation is to split a
111// subtree out of a module into its own module. If done carefully,
112// this operation can be done while preserving compatibility for clients.
113// For example, suppose that we want to split rsc.io/qr/gf256 into its
114// own module, so that there would be two modules rsc.io/qr and rsc.io/qr/gf256.
115// Then we can simultaneously issue rsc.io/qr v0.3.0 (dropping the gf256 subdirectory)
116// and rsc.io/qr/gf256 v0.1.0, including in their respective go.mod
117// cyclic requirements pointing at each other: rsc.io/qr v0.3.0 requires
118// rsc.io/qr/gf256 v0.1.0 and vice versa. Then a build can be
119// using an older rsc.io/qr module that includes the gf256 package, but if
120// it adds a requirement on either the newer rsc.io/qr or the newer
121// rsc.io/qr/gf256 module, it will automatically add the requirement
122// on the complementary half, ensuring both that rsc.io/qr/gf256 is
123// available for importing by the build and also that it is only defined
124// by a single module. The gf256 package could move back into the
125// original by another simultaneous release of rsc.io/qr v0.4.0 including
126// the gf256 subdirectory and an rsc.io/qr/gf256 v0.2.0 with no code
127// in its root directory, along with a new requirement cycle.
128// The ability to shift module boundaries in this way is expected to be
129// important in large-scale program refactorings, similar to the ones
130// described in https://talks.golang.org/2016/refactor.article.
131//
132// The possibility of shifting module boundaries reemphasizes
133// that you must know both the module path and its version
134// to determine the set of packages provided directly by that module.
135//
136// On top of all this, it is possible for a single code repository
137// to contain multiple modules, either in branches or subdirectories,
138// as a limited kind of monorepo. For example rsc.io/qr/v2,
139// the v2.x.x continuation of rsc.io/qr, is expected to be found
140// in v2-tagged commits in https://github.com/rsc/qr, either
141// in the root or in a v2 subdirectory, disambiguated by go.mod.
142// Again the precise file tree corresponding to a module
143// depends on which version we are considering.
144//
145// It is also possible for the underlying repository to change over time,
146// without changing the module path. If I copy the github repo over
147// to https://bitbucket.org/rsc/qr and update https://rsc.io/qr?go-get=1,
148// then clients of all versions should start fetching from bitbucket
149// instead of github. That is, in contrast to the exact file tree,
150// the location of the source code repository associated with a module path
151// does not depend on the module version. (This is by design, as the whole
152// point of these redirects is to allow package authors to establish a stable
153// name that can be updated as code moves from one service to another.)
154//
155// All of this is important background for the lookup APIs defined in this
156// file.
157//
158// The Lookup function takes a module path and returns a Repo representing
159// that module path. Lookup can do only a little with the path alone.
160// It can check that the path is well-formed (see semver.CheckPath)
161// and it can check that the path can be resolved to a target repository.
162// To avoid version control access except when absolutely necessary,
163// Lookup does not attempt to connect to the repository itself.
164//
165// The ImportRepoRev function is a variant of Import which is limited
166// to code in a source code repository at a particular revision identifier
167// (usually a commit hash or source code repository tag, not necessarily
168// a module version).
169// ImportRepoRev is used when converting legacy dependency requirements
170// from older systems into go.mod files. Those older systems worked
171// at either package or repository granularity, and most of the time they
172// recorded commit hashes, not tagged versions.
173
174var lookupCache par.Cache
175
176type lookupCacheKey struct {
177	proxy, path string
178}
179
180// Lookup returns the module with the given module path,
181// fetched through the given proxy.
182//
183// The distinguished proxy "direct" indicates that the path should be fetched
184// from its origin, and "noproxy" indicates that the patch should be fetched
185// directly only if GONOPROXY matches the given path.
186//
187// For the distinguished proxy "off", Lookup always returns a non-nil error.
188//
189// A successful return does not guarantee that the module
190// has any defined versions.
191func Lookup(proxy, path string) (Repo, error) {
192	if traceRepo {
193		defer logCall("Lookup(%q, %q)", proxy, path)()
194	}
195
196	type cached struct {
197		r   Repo
198		err error
199	}
200	c := lookupCache.Do(lookupCacheKey{proxy, path}, func() interface{} {
201		r, err := lookup(proxy, path)
202		if err == nil {
203			if traceRepo {
204				r = newLoggingRepo(r)
205			}
206			r = newCachingRepo(r)
207		}
208		return cached{r, err}
209	}).(cached)
210
211	return c.r, c.err
212}
213
214// lookup returns the module with the given module path.
215func lookup(proxy, path string) (r Repo, err error) {
216	if cfg.BuildMod == "vendor" {
217		return nil, errLookupDisabled
218	}
219
220	if str.GlobsMatchPath(cfg.GONOPROXY, path) {
221		switch proxy {
222		case "noproxy", "direct":
223			return lookupDirect(path)
224		default:
225			return nil, errNoproxy
226		}
227	}
228
229	switch proxy {
230	case "off":
231		return nil, errProxyOff
232	case "direct":
233		return lookupDirect(path)
234	case "noproxy":
235		return nil, errUseProxy
236	default:
237		return newProxyRepo(proxy, path)
238	}
239}
240
241type lookupDisabledError struct{}
242
243func (lookupDisabledError) Error() string {
244	if cfg.BuildModReason == "" {
245		return fmt.Sprintf("module lookup disabled by -mod=%s", cfg.BuildMod)
246	}
247	return fmt.Sprintf("module lookup disabled by -mod=%s\n\t(%s)", cfg.BuildMod, cfg.BuildModReason)
248}
249
250var errLookupDisabled error = lookupDisabledError{}
251
252var (
253	errProxyOff       = notExistErrorf("module lookup disabled by GOPROXY=off")
254	errNoproxy  error = notExistErrorf("disabled by GOPRIVATE/GONOPROXY")
255	errUseProxy error = notExistErrorf("path does not match GOPRIVATE/GONOPROXY")
256)
257
258func lookupDirect(path string) (Repo, error) {
259	security := web.SecureOnly
260
261	if allowInsecure(path) {
262		security = web.Insecure
263	}
264	rr, err := get.RepoRootForImportPath(path, get.PreferMod, security)
265	if err != nil {
266		// We don't know where to find code for a module with this path.
267		return nil, notExistError{err: err}
268	}
269
270	if rr.VCS == "mod" {
271		// Fetch module from proxy with base URL rr.Repo.
272		return newProxyRepo(rr.Repo, path)
273	}
274
275	code, err := lookupCodeRepo(rr)
276	if err != nil {
277		return nil, err
278	}
279	return newCodeRepo(code, rr.Root, path)
280}
281
282func lookupCodeRepo(rr *get.RepoRoot) (codehost.Repo, error) {
283	code, err := codehost.NewRepo(rr.VCS, rr.Repo)
284	if err != nil {
285		if _, ok := err.(*codehost.VCSError); ok {
286			return nil, err
287		}
288		return nil, fmt.Errorf("lookup %s: %v", rr.Root, err)
289	}
290	return code, nil
291}
292
293// ImportRepoRev returns the module and version to use to access
294// the given import path loaded from the source code repository that
295// the original "go get" would have used, at the specific repository revision
296// (typically a commit hash, but possibly also a source control tag).
297func ImportRepoRev(path, rev string) (Repo, *RevInfo, error) {
298	if cfg.BuildMod == "vendor" || cfg.BuildMod == "readonly" {
299		return nil, nil, fmt.Errorf("repo version lookup disabled by -mod=%s", cfg.BuildMod)
300	}
301
302	// Note: Because we are converting a code reference from a legacy
303	// version control system, we ignore meta tags about modules
304	// and use only direct source control entries (get.IgnoreMod).
305	security := web.SecureOnly
306	if allowInsecure(path) {
307		security = web.Insecure
308	}
309	rr, err := get.RepoRootForImportPath(path, get.IgnoreMod, security)
310	if err != nil {
311		return nil, nil, err
312	}
313
314	code, err := lookupCodeRepo(rr)
315	if err != nil {
316		return nil, nil, err
317	}
318
319	revInfo, err := code.Stat(rev)
320	if err != nil {
321		return nil, nil, err
322	}
323
324	// TODO: Look in repo to find path, check for go.mod files.
325	// For now we're just assuming rr.Root is the module path,
326	// which is true in the absence of go.mod files.
327
328	repo, err := newCodeRepo(code, rr.Root, rr.Root)
329	if err != nil {
330		return nil, nil, err
331	}
332
333	info, err := repo.(*codeRepo).convert(revInfo, rev)
334	if err != nil {
335		return nil, nil, err
336	}
337	return repo, info, nil
338}
339
340func SortVersions(list []string) {
341	sort.Slice(list, func(i, j int) bool {
342		cmp := semver.Compare(list[i], list[j])
343		if cmp != 0 {
344			return cmp < 0
345		}
346		return list[i] < list[j]
347	})
348}
349
350// A loggingRepo is a wrapper around an underlying Repo
351// that prints a log message at the start and end of each call.
352// It can be inserted when debugging.
353type loggingRepo struct {
354	r Repo
355}
356
357func newLoggingRepo(r Repo) *loggingRepo {
358	return &loggingRepo{r}
359}
360
361// logCall prints a log message using format and args and then
362// also returns a function that will print the same message again,
363// along with the elapsed time.
364// Typical usage is:
365//
366//	defer logCall("hello %s", arg)()
367//
368// Note the final ().
369func logCall(format string, args ...interface{}) func() {
370	start := time.Now()
371	fmt.Fprintf(os.Stderr, "+++ %s\n", fmt.Sprintf(format, args...))
372	return func() {
373		fmt.Fprintf(os.Stderr, "%.3fs %s\n", time.Since(start).Seconds(), fmt.Sprintf(format, args...))
374	}
375}
376
377func (l *loggingRepo) ModulePath() string {
378	return l.r.ModulePath()
379}
380
381func (l *loggingRepo) Versions(prefix string) (tags []string, err error) {
382	defer logCall("Repo[%s]: Versions(%q)", l.r.ModulePath(), prefix)()
383	return l.r.Versions(prefix)
384}
385
386func (l *loggingRepo) Stat(rev string) (*RevInfo, error) {
387	defer logCall("Repo[%s]: Stat(%q)", l.r.ModulePath(), rev)()
388	return l.r.Stat(rev)
389}
390
391func (l *loggingRepo) Latest() (*RevInfo, error) {
392	defer logCall("Repo[%s]: Latest()", l.r.ModulePath())()
393	return l.r.Latest()
394}
395
396func (l *loggingRepo) GoMod(version string) ([]byte, error) {
397	defer logCall("Repo[%s]: GoMod(%q)", l.r.ModulePath(), version)()
398	return l.r.GoMod(version)
399}
400
401func (l *loggingRepo) Zip(dst io.Writer, version string) error {
402	dstName := "_"
403	if dst, ok := dst.(interface{ Name() string }); ok {
404		dstName = strconv.Quote(dst.Name())
405	}
406	defer logCall("Repo[%s]: Zip(%s, %q)", l.r.ModulePath(), dstName, version)()
407	return l.r.Zip(dst, version)
408}
409
410// A notExistError is like os.ErrNotExist, but with a custom message
411type notExistError struct {
412	err error
413}
414
415func notExistErrorf(format string, args ...interface{}) error {
416	return notExistError{fmt.Errorf(format, args...)}
417}
418
419func (e notExistError) Error() string {
420	return e.err.Error()
421}
422
423func (notExistError) Is(target error) bool {
424	return target == os.ErrNotExist
425}
426
427func (e notExistError) Unwrap() error {
428	return e.err
429}
430