1// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package modfetch
6
7import (
8	"archive/zip"
9	"bytes"
10	"errors"
11	"fmt"
12	"io"
13	"io/fs"
14	"os"
15	"path"
16	"sort"
17	"strings"
18	"time"
19
20	"cmd/go/internal/modfetch/codehost"
21
22	"golang.org/x/mod/modfile"
23	"golang.org/x/mod/module"
24	"golang.org/x/mod/semver"
25	modzip "golang.org/x/mod/zip"
26)
27
28// A codeRepo implements modfetch.Repo using an underlying codehost.Repo.
29type codeRepo struct {
30	modPath string
31
32	// code is the repository containing this module.
33	code codehost.Repo
34	// codeRoot is the import path at the root of code.
35	codeRoot string
36	// codeDir is the directory (relative to root) at which we expect to find the module.
37	// If pathMajor is non-empty and codeRoot is not the full modPath,
38	// then we look in both codeDir and codeDir/pathMajor[1:].
39	codeDir string
40
41	// pathMajor is the suffix of modPath that indicates its major version,
42	// or the empty string if modPath is at major version 0 or 1.
43	//
44	// pathMajor is typically of the form "/vN", but possibly ".vN", or
45	// ".vN-unstable" for modules resolved using gopkg.in.
46	pathMajor string
47	// pathPrefix is the prefix of modPath that excludes pathMajor.
48	// It is used only for logging.
49	pathPrefix string
50
51	// pseudoMajor is the major version prefix to require when generating
52	// pseudo-versions for this module, derived from the module path. pseudoMajor
53	// is empty if the module path does not include a version suffix (that is,
54	// accepts either v0 or v1).
55	pseudoMajor string
56}
57
58// newCodeRepo returns a Repo that reads the source code for the module with the
59// given path, from the repo stored in code, with the root of the repo
60// containing the path given by codeRoot.
61func newCodeRepo(code codehost.Repo, codeRoot, path string) (Repo, error) {
62	if !hasPathPrefix(path, codeRoot) {
63		return nil, fmt.Errorf("mismatched repo: found %s for %s", codeRoot, path)
64	}
65	pathPrefix, pathMajor, ok := module.SplitPathVersion(path)
66	if !ok {
67		return nil, fmt.Errorf("invalid module path %q", path)
68	}
69	if codeRoot == path {
70		pathPrefix = path
71	}
72	pseudoMajor := module.PathMajorPrefix(pathMajor)
73
74	// Compute codeDir = bar, the subdirectory within the repo
75	// corresponding to the module root.
76	//
77	// At this point we might have:
78	//	path = github.com/rsc/foo/bar/v2
79	//	codeRoot = github.com/rsc/foo
80	//	pathPrefix = github.com/rsc/foo/bar
81	//	pathMajor = /v2
82	//	pseudoMajor = v2
83	//
84	// which gives
85	//	codeDir = bar
86	//
87	// We know that pathPrefix is a prefix of path, and codeRoot is a prefix of
88	// path, but codeRoot may or may not be a prefix of pathPrefix, because
89	// codeRoot may be the entire path (in which case codeDir should be empty).
90	// That occurs in two situations.
91	//
92	// One is when a go-import meta tag resolves the complete module path,
93	// including the pathMajor suffix:
94	//	path = nanomsg.org/go/mangos/v2
95	//	codeRoot = nanomsg.org/go/mangos/v2
96	//	pathPrefix = nanomsg.org/go/mangos
97	//	pathMajor = /v2
98	//	pseudoMajor = v2
99	//
100	// The other is similar: for gopkg.in only, the major version is encoded
101	// with a dot rather than a slash, and thus can't be in a subdirectory.
102	//	path = gopkg.in/yaml.v2
103	//	codeRoot = gopkg.in/yaml.v2
104	//	pathPrefix = gopkg.in/yaml
105	//	pathMajor = .v2
106	//	pseudoMajor = v2
107	//
108	codeDir := ""
109	if codeRoot != path {
110		if !hasPathPrefix(pathPrefix, codeRoot) {
111			return nil, fmt.Errorf("repository rooted at %s cannot contain module %s", codeRoot, path)
112		}
113		codeDir = strings.Trim(pathPrefix[len(codeRoot):], "/")
114	}
115
116	r := &codeRepo{
117		modPath:     path,
118		code:        code,
119		codeRoot:    codeRoot,
120		codeDir:     codeDir,
121		pathPrefix:  pathPrefix,
122		pathMajor:   pathMajor,
123		pseudoMajor: pseudoMajor,
124	}
125
126	return r, nil
127}
128
129func (r *codeRepo) ModulePath() string {
130	return r.modPath
131}
132
133func (r *codeRepo) Versions(prefix string) ([]string, error) {
134	// Special case: gopkg.in/macaroon-bakery.v2-unstable
135	// does not use the v2 tags (those are for macaroon-bakery.v2).
136	// It has no possible tags at all.
137	if strings.HasPrefix(r.modPath, "gopkg.in/") && strings.HasSuffix(r.modPath, "-unstable") {
138		return nil, nil
139	}
140
141	p := prefix
142	if r.codeDir != "" {
143		p = r.codeDir + "/" + p
144	}
145	tags, err := r.code.Tags(p)
146	if err != nil {
147		return nil, &module.ModuleError{
148			Path: r.modPath,
149			Err:  err,
150		}
151	}
152
153	var list, incompatible []string
154	for _, tag := range tags {
155		if !strings.HasPrefix(tag, p) {
156			continue
157		}
158		v := tag
159		if r.codeDir != "" {
160			v = v[len(r.codeDir)+1:]
161		}
162		if v == "" || v != module.CanonicalVersion(v) || IsPseudoVersion(v) {
163			continue
164		}
165
166		if err := module.CheckPathMajor(v, r.pathMajor); err != nil {
167			if r.codeDir == "" && r.pathMajor == "" && semver.Major(v) > "v1" {
168				incompatible = append(incompatible, v)
169			}
170			continue
171		}
172
173		list = append(list, v)
174	}
175	SortVersions(list)
176	SortVersions(incompatible)
177
178	return r.appendIncompatibleVersions(list, incompatible)
179}
180
181// appendIncompatibleVersions appends "+incompatible" versions to list if
182// appropriate, returning the final list.
183//
184// The incompatible list contains candidate versions without the '+incompatible'
185// prefix.
186//
187// Both list and incompatible must be sorted in semantic order.
188func (r *codeRepo) appendIncompatibleVersions(list, incompatible []string) ([]string, error) {
189	if len(incompatible) == 0 || r.pathMajor != "" {
190		// No +incompatible versions are possible, so no need to check them.
191		return list, nil
192	}
193
194	versionHasGoMod := func(v string) (bool, error) {
195		_, err := r.code.ReadFile(v, "go.mod", codehost.MaxGoMod)
196		if err == nil {
197			return true, nil
198		}
199		if !os.IsNotExist(err) {
200			return false, &module.ModuleError{
201				Path: r.modPath,
202				Err:  err,
203			}
204		}
205		return false, nil
206	}
207
208	if len(list) > 0 {
209		ok, err := versionHasGoMod(list[len(list)-1])
210		if err != nil {
211			return nil, err
212		}
213		if ok {
214			// The latest compatible version has a go.mod file, so assume that all
215			// subsequent versions do as well, and do not include any +incompatible
216			// versions. Even if we are wrong, the author clearly intends module
217			// consumers to be on the v0/v1 line instead of a higher +incompatible
218			// version. (See https://golang.org/issue/34189.)
219			//
220			// We know of at least two examples where this behavior is desired
221			// (github.com/russross/blackfriday@v2.0.0 and
222			// github.com/libp2p/go-libp2p@v6.0.23), and (as of 2019-10-29) have no
223			// concrete examples for which it is undesired.
224			return list, nil
225		}
226	}
227
228	var (
229		lastMajor         string
230		lastMajorHasGoMod bool
231	)
232	for i, v := range incompatible {
233		major := semver.Major(v)
234
235		if major != lastMajor {
236			rem := incompatible[i:]
237			j := sort.Search(len(rem), func(j int) bool {
238				return semver.Major(rem[j]) != major
239			})
240			latestAtMajor := rem[j-1]
241
242			var err error
243			lastMajor = major
244			lastMajorHasGoMod, err = versionHasGoMod(latestAtMajor)
245			if err != nil {
246				return nil, err
247			}
248		}
249
250		if lastMajorHasGoMod {
251			// The latest release of this major version has a go.mod file, so it is
252			// not allowed as +incompatible. It would be confusing to include some
253			// minor versions of this major version as +incompatible but require
254			// semantic import versioning for others, so drop all +incompatible
255			// versions for this major version.
256			//
257			// If we're wrong about a minor version in the middle, users will still be
258			// able to 'go get' specific tags for that version explicitly — they just
259			// won't appear in 'go list' or as the results for queries with inequality
260			// bounds.
261			continue
262		}
263		list = append(list, v+"+incompatible")
264	}
265
266	return list, nil
267}
268
269func (r *codeRepo) Stat(rev string) (*RevInfo, error) {
270	if rev == "latest" {
271		return r.Latest()
272	}
273	codeRev := r.revToRev(rev)
274	info, err := r.code.Stat(codeRev)
275	if err != nil {
276		return nil, &module.ModuleError{
277			Path: r.modPath,
278			Err: &module.InvalidVersionError{
279				Version: rev,
280				Err:     err,
281			},
282		}
283	}
284	return r.convert(info, rev)
285}
286
287func (r *codeRepo) Latest() (*RevInfo, error) {
288	info, err := r.code.Latest()
289	if err != nil {
290		return nil, err
291	}
292	return r.convert(info, "")
293}
294
295// convert converts a version as reported by the code host to a version as
296// interpreted by the module system.
297//
298// If statVers is a valid module version, it is used for the Version field.
299// Otherwise, the Version is derived from the passed-in info and recent tags.
300func (r *codeRepo) convert(info *codehost.RevInfo, statVers string) (*RevInfo, error) {
301	info2 := &RevInfo{
302		Name:  info.Name,
303		Short: info.Short,
304		Time:  info.Time,
305	}
306
307	// If this is a plain tag (no dir/ prefix)
308	// and the module path is unversioned,
309	// and if the underlying file tree has no go.mod,
310	// then allow using the tag with a +incompatible suffix.
311	var canUseIncompatible func() bool
312	canUseIncompatible = func() bool {
313		var ok bool
314		if r.codeDir == "" && r.pathMajor == "" {
315			_, errGoMod := r.code.ReadFile(info.Name, "go.mod", codehost.MaxGoMod)
316			if errGoMod != nil {
317				ok = true
318			}
319		}
320		canUseIncompatible = func() bool { return ok }
321		return ok
322	}
323
324	invalidf := func(format string, args ...interface{}) error {
325		return &module.ModuleError{
326			Path: r.modPath,
327			Err: &module.InvalidVersionError{
328				Version: info2.Version,
329				Err:     fmt.Errorf(format, args...),
330			},
331		}
332	}
333
334	// checkGoMod verifies that the go.mod file for the module exists or does not
335	// exist as required by info2.Version and the module path represented by r.
336	checkGoMod := func() (*RevInfo, error) {
337		// If r.codeDir is non-empty, then the go.mod file must exist: the module
338		// author — not the module consumer, — gets to decide how to carve up the repo
339		// into modules.
340		//
341		// Conversely, if the go.mod file exists, the module author — not the module
342		// consumer — gets to determine the module's path
343		//
344		// r.findDir verifies both of these conditions. Execute it now so that
345		// r.Stat will correctly return a notExistError if the go.mod location or
346		// declared module path doesn't match.
347		_, _, _, err := r.findDir(info2.Version)
348		if err != nil {
349			// TODO: It would be nice to return an error like "not a module".
350			// Right now we return "missing go.mod", which is a little confusing.
351			return nil, &module.ModuleError{
352				Path: r.modPath,
353				Err: &module.InvalidVersionError{
354					Version: info2.Version,
355					Err:     notExistError{err: err},
356				},
357			}
358		}
359
360		// If the version is +incompatible, then the go.mod file must not exist:
361		// +incompatible is not an ongoing opt-out from semantic import versioning.
362		if strings.HasSuffix(info2.Version, "+incompatible") {
363			if !canUseIncompatible() {
364				if r.pathMajor != "" {
365					return nil, invalidf("+incompatible suffix not allowed: module path includes a major version suffix, so major version must match")
366				} else {
367					return nil, invalidf("+incompatible suffix not allowed: module contains a go.mod file, so semantic import versioning is required")
368				}
369			}
370
371			if err := module.CheckPathMajor(strings.TrimSuffix(info2.Version, "+incompatible"), r.pathMajor); err == nil {
372				return nil, invalidf("+incompatible suffix not allowed: major version %s is compatible", semver.Major(info2.Version))
373			}
374		}
375
376		return info2, nil
377	}
378
379	// Determine version.
380	//
381	// If statVers is canonical, then the original call was repo.Stat(statVers).
382	// Since the version is canonical, we must not resolve it to anything but
383	// itself, possibly with a '+incompatible' annotation: we do not need to do
384	// the work required to look for an arbitrary pseudo-version.
385	if statVers != "" && statVers == module.CanonicalVersion(statVers) {
386		info2.Version = statVers
387
388		if IsPseudoVersion(info2.Version) {
389			if err := r.validatePseudoVersion(info, info2.Version); err != nil {
390				return nil, err
391			}
392			return checkGoMod()
393		}
394
395		if err := module.CheckPathMajor(info2.Version, r.pathMajor); err != nil {
396			if canUseIncompatible() {
397				info2.Version += "+incompatible"
398				return checkGoMod()
399			} else {
400				if vErr, ok := err.(*module.InvalidVersionError); ok {
401					// We're going to describe why the version is invalid in more detail,
402					// so strip out the existing “invalid version” wrapper.
403					err = vErr.Err
404				}
405				return nil, invalidf("module contains a go.mod file, so major version must be compatible: %v", err)
406			}
407		}
408
409		return checkGoMod()
410	}
411
412	// statVers is empty or non-canonical, so we need to resolve it to a canonical
413	// version or pseudo-version.
414
415	// Derive or verify a version from a code repo tag.
416	// Tag must have a prefix matching codeDir.
417	tagPrefix := ""
418	if r.codeDir != "" {
419		tagPrefix = r.codeDir + "/"
420	}
421
422	isRetracted, err := r.retractedVersions()
423	if err != nil {
424		isRetracted = func(string) bool { return false }
425	}
426
427	// tagToVersion returns the version obtained by trimming tagPrefix from tag.
428	// If the tag is invalid, retracted, or a pseudo-version, tagToVersion returns
429	// an empty version.
430	tagToVersion := func(tag string) (v string, tagIsCanonical bool) {
431		if !strings.HasPrefix(tag, tagPrefix) {
432			return "", false
433		}
434		trimmed := tag[len(tagPrefix):]
435		// Tags that look like pseudo-versions would be confusing. Ignore them.
436		if IsPseudoVersion(tag) {
437			return "", false
438		}
439
440		v = semver.Canonical(trimmed) // Not module.Canonical: we don't want to pick up an explicit "+incompatible" suffix from the tag.
441		if v == "" || !strings.HasPrefix(trimmed, v) {
442			return "", false // Invalid or incomplete version (just vX or vX.Y).
443		}
444		if isRetracted(v) {
445			return "", false
446		}
447		if v == trimmed {
448			tagIsCanonical = true
449		}
450
451		if err := module.CheckPathMajor(v, r.pathMajor); err != nil {
452			if canUseIncompatible() {
453				return v + "+incompatible", tagIsCanonical
454			}
455			return "", false
456		}
457
458		return v, tagIsCanonical
459	}
460
461	// If the VCS gave us a valid version, use that.
462	if v, tagIsCanonical := tagToVersion(info.Version); tagIsCanonical {
463		info2.Version = v
464		return checkGoMod()
465	}
466
467	// Look through the tags on the revision for either a usable canonical version
468	// or an appropriate base for a pseudo-version.
469	var pseudoBase string
470	for _, pathTag := range info.Tags {
471		v, tagIsCanonical := tagToVersion(pathTag)
472		if tagIsCanonical {
473			if statVers != "" && semver.Compare(v, statVers) == 0 {
474				// The user requested a non-canonical version, but the tag for the
475				// canonical equivalent refers to the same revision. Use it.
476				info2.Version = v
477				return checkGoMod()
478			} else {
479				// Save the highest canonical tag for the revision. If we don't find a
480				// better match, we'll use it as the canonical version.
481				//
482				// NOTE: Do not replace this with semver.Max. Despite the name,
483				// semver.Max *also* canonicalizes its arguments, which uses
484				// semver.Canonical instead of module.CanonicalVersion and thereby
485				// strips our "+incompatible" suffix.
486				if semver.Compare(info2.Version, v) < 0 {
487					info2.Version = v
488				}
489			}
490		} else if v != "" && semver.Compare(v, statVers) == 0 {
491			// The user explicitly requested something equivalent to this tag. We
492			// can't use the version from the tag directly: since the tag is not
493			// canonical, it could be ambiguous. For example, tags v0.0.1+a and
494			// v0.0.1+b might both exist and refer to different revisions.
495			//
496			// The tag is otherwise valid for the module, so we can at least use it as
497			// the base of an unambiguous pseudo-version.
498			//
499			// If multiple tags match, tagToVersion will canonicalize them to the same
500			// base version.
501			pseudoBase = v
502		}
503	}
504
505	// If we found any canonical tag for the revision, return it.
506	// Even if we found a good pseudo-version base, a canonical version is better.
507	if info2.Version != "" {
508		return checkGoMod()
509	}
510
511	// Find the highest tagged version in the revision's history, subject to
512	// major version and +incompatible constraints. Use that version as the
513	// pseudo-version base so that the pseudo-version sorts higher. Ignore
514	// retracted versions.
515	allowedMajor := func(major string) func(v string) bool {
516		return func(v string) bool {
517			return (major == "" || semver.Major(v) == major) && !isRetracted(v)
518		}
519	}
520	if pseudoBase == "" {
521		var tag string
522		if r.pseudoMajor != "" || canUseIncompatible() {
523			tag, _ = r.code.RecentTag(info.Name, tagPrefix, allowedMajor(r.pseudoMajor))
524		} else {
525			// Allow either v1 or v0, but not incompatible higher versions.
526			tag, _ = r.code.RecentTag(info.Name, tagPrefix, allowedMajor("v1"))
527			if tag == "" {
528				tag, _ = r.code.RecentTag(info.Name, tagPrefix, allowedMajor("v0"))
529			}
530		}
531		pseudoBase, _ = tagToVersion(tag) // empty if the tag is invalid
532	}
533
534	info2.Version = PseudoVersion(r.pseudoMajor, pseudoBase, info.Time, info.Short)
535	return checkGoMod()
536}
537
538// validatePseudoVersion checks that version has a major version compatible with
539// r.modPath and encodes a base version and commit metadata that agrees with
540// info.
541//
542// Note that verifying a nontrivial base version in particular may be somewhat
543// expensive: in order to do so, r.code.DescendsFrom will need to fetch at least
544// enough of the commit history to find a path between version and its base.
545// Fortunately, many pseudo-versions — such as those for untagged repositories —
546// have trivial bases!
547func (r *codeRepo) validatePseudoVersion(info *codehost.RevInfo, version string) (err error) {
548	defer func() {
549		if err != nil {
550			if _, ok := err.(*module.ModuleError); !ok {
551				if _, ok := err.(*module.InvalidVersionError); !ok {
552					err = &module.InvalidVersionError{Version: version, Pseudo: true, Err: err}
553				}
554				err = &module.ModuleError{Path: r.modPath, Err: err}
555			}
556		}
557	}()
558
559	if err := module.CheckPathMajor(version, r.pathMajor); err != nil {
560		return err
561	}
562
563	rev, err := PseudoVersionRev(version)
564	if err != nil {
565		return err
566	}
567	if rev != info.Short {
568		switch {
569		case strings.HasPrefix(rev, info.Short):
570			return fmt.Errorf("revision is longer than canonical (%s)", info.Short)
571		case strings.HasPrefix(info.Short, rev):
572			return fmt.Errorf("revision is shorter than canonical (%s)", info.Short)
573		default:
574			return fmt.Errorf("does not match short name of revision (%s)", info.Short)
575		}
576	}
577
578	t, err := PseudoVersionTime(version)
579	if err != nil {
580		return err
581	}
582	if !t.Equal(info.Time.Truncate(time.Second)) {
583		return fmt.Errorf("does not match version-control timestamp (expected %s)", info.Time.UTC().Format(pseudoVersionTimestampFormat))
584	}
585
586	tagPrefix := ""
587	if r.codeDir != "" {
588		tagPrefix = r.codeDir + "/"
589	}
590
591	// A pseudo-version should have a precedence just above its parent revisions,
592	// and no higher. Otherwise, it would be possible for library authors to "pin"
593	// dependency versions (and bypass the usual minimum version selection) by
594	// naming an extremely high pseudo-version rather than an accurate one.
595	//
596	// Moreover, if we allow a pseudo-version to use any arbitrary pre-release
597	// tag, we end up with infinitely many possible names for each commit. Each
598	// name consumes resources in the module cache and proxies, so we want to
599	// restrict them to a finite set under control of the module author.
600	//
601	// We address both of these issues by requiring the tag upon which the
602	// pseudo-version is based to refer to some ancestor of the revision. We
603	// prefer the highest such tag when constructing a new pseudo-version, but do
604	// not enforce that property when resolving existing pseudo-versions: we don't
605	// know when the parent tags were added, and the highest-tagged parent may not
606	// have existed when the pseudo-version was first resolved.
607	base, err := PseudoVersionBase(strings.TrimSuffix(version, "+incompatible"))
608	if err != nil {
609		return err
610	}
611	if base == "" {
612		if r.pseudoMajor == "" && semver.Major(version) == "v1" {
613			return fmt.Errorf("major version without preceding tag must be v0, not v1")
614		}
615		return nil
616	} else {
617		for _, tag := range info.Tags {
618			versionOnly := strings.TrimPrefix(tag, tagPrefix)
619			if versionOnly == base {
620				// The base version is canonical, so if the version from the tag is
621				// literally equal (not just equivalent), then the tag is canonical too.
622				//
623				// We allow pseudo-versions to be derived from non-canonical tags on the
624				// same commit, so that tags like "v1.1.0+some-metadata" resolve as
625				// close as possible to the canonical version ("v1.1.0") while still
626				// enforcing a total ordering ("v1.1.1-0.[…]" with a unique suffix).
627				//
628				// However, canonical tags already have a total ordering, so there is no
629				// reason not to use the canonical tag directly, and we know that the
630				// canonical tag must already exist because the pseudo-version is
631				// derived from it. In that case, referring to the revision by a
632				// pseudo-version derived from its own canonical tag is just confusing.
633				return fmt.Errorf("tag (%s) found on revision %s is already canonical, so should not be replaced with a pseudo-version derived from that tag", tag, rev)
634			}
635		}
636	}
637
638	tags, err := r.code.Tags(tagPrefix + base)
639	if err != nil {
640		return err
641	}
642
643	var lastTag string // Prefer to log some real tag rather than a canonically-equivalent base.
644	ancestorFound := false
645	for _, tag := range tags {
646		versionOnly := strings.TrimPrefix(tag, tagPrefix)
647		if semver.Compare(versionOnly, base) == 0 {
648			lastTag = tag
649			ancestorFound, err = r.code.DescendsFrom(info.Name, tag)
650			if ancestorFound {
651				break
652			}
653		}
654	}
655
656	if lastTag == "" {
657		return fmt.Errorf("preceding tag (%s) not found", base)
658	}
659
660	if !ancestorFound {
661		if err != nil {
662			return err
663		}
664		rev, err := PseudoVersionRev(version)
665		if err != nil {
666			return fmt.Errorf("not a descendent of preceding tag (%s)", lastTag)
667		}
668		return fmt.Errorf("revision %s is not a descendent of preceding tag (%s)", rev, lastTag)
669	}
670	return nil
671}
672
673func (r *codeRepo) revToRev(rev string) string {
674	if semver.IsValid(rev) {
675		if IsPseudoVersion(rev) {
676			r, _ := PseudoVersionRev(rev)
677			return r
678		}
679		if semver.Build(rev) == "+incompatible" {
680			rev = rev[:len(rev)-len("+incompatible")]
681		}
682		if r.codeDir == "" {
683			return rev
684		}
685		return r.codeDir + "/" + rev
686	}
687	return rev
688}
689
690func (r *codeRepo) versionToRev(version string) (rev string, err error) {
691	if !semver.IsValid(version) {
692		return "", &module.ModuleError{
693			Path: r.modPath,
694			Err: &module.InvalidVersionError{
695				Version: version,
696				Err:     errors.New("syntax error"),
697			},
698		}
699	}
700	return r.revToRev(version), nil
701}
702
703// findDir locates the directory within the repo containing the module.
704//
705// If r.pathMajor is non-empty, this can be either r.codeDir or — if a go.mod
706// file exists — r.codeDir/r.pathMajor[1:].
707func (r *codeRepo) findDir(version string) (rev, dir string, gomod []byte, err error) {
708	rev, err = r.versionToRev(version)
709	if err != nil {
710		return "", "", nil, err
711	}
712
713	// Load info about go.mod but delay consideration
714	// (except I/O error) until we rule out v2/go.mod.
715	file1 := path.Join(r.codeDir, "go.mod")
716	gomod1, err1 := r.code.ReadFile(rev, file1, codehost.MaxGoMod)
717	if err1 != nil && !os.IsNotExist(err1) {
718		return "", "", nil, fmt.Errorf("reading %s/%s at revision %s: %v", r.pathPrefix, file1, rev, err1)
719	}
720	mpath1 := modfile.ModulePath(gomod1)
721	found1 := err1 == nil && (isMajor(mpath1, r.pathMajor) || r.canReplaceMismatchedVersionDueToBug(mpath1))
722
723	var file2 string
724	if r.pathMajor != "" && r.codeRoot != r.modPath && !strings.HasPrefix(r.pathMajor, ".") {
725		// Suppose pathMajor is "/v2".
726		// Either go.mod should claim v2 and v2/go.mod should not exist,
727		// or v2/go.mod should exist and claim v2. Not both.
728		// Note that we don't check the full path, just the major suffix,
729		// because of replacement modules. This might be a fork of
730		// the real module, found at a different path, usable only in
731		// a replace directive.
732		dir2 := path.Join(r.codeDir, r.pathMajor[1:])
733		file2 = path.Join(dir2, "go.mod")
734		gomod2, err2 := r.code.ReadFile(rev, file2, codehost.MaxGoMod)
735		if err2 != nil && !os.IsNotExist(err2) {
736			return "", "", nil, fmt.Errorf("reading %s/%s at revision %s: %v", r.pathPrefix, file2, rev, err2)
737		}
738		mpath2 := modfile.ModulePath(gomod2)
739		found2 := err2 == nil && isMajor(mpath2, r.pathMajor)
740
741		if found1 && found2 {
742			return "", "", nil, fmt.Errorf("%s/%s and ...%s/go.mod both have ...%s module paths at revision %s", r.pathPrefix, file1, r.pathMajor, r.pathMajor, rev)
743		}
744		if found2 {
745			return rev, dir2, gomod2, nil
746		}
747		if err2 == nil {
748			if mpath2 == "" {
749				return "", "", nil, fmt.Errorf("%s/%s is missing module path at revision %s", r.pathPrefix, file2, rev)
750			}
751			return "", "", nil, fmt.Errorf("%s/%s has non-...%s module path %q at revision %s", r.pathPrefix, file2, r.pathMajor, mpath2, rev)
752		}
753	}
754
755	// Not v2/go.mod, so it's either go.mod or nothing. Which is it?
756	if found1 {
757		// Explicit go.mod with matching major version ok.
758		return rev, r.codeDir, gomod1, nil
759	}
760	if err1 == nil {
761		// Explicit go.mod with non-matching major version disallowed.
762		suffix := ""
763		if file2 != "" {
764			suffix = fmt.Sprintf(" (and ...%s/go.mod does not exist)", r.pathMajor)
765		}
766		if mpath1 == "" {
767			return "", "", nil, fmt.Errorf("%s is missing module path%s at revision %s", file1, suffix, rev)
768		}
769		if r.pathMajor != "" { // ".v1", ".v2" for gopkg.in
770			return "", "", nil, fmt.Errorf("%s has non-...%s module path %q%s at revision %s", file1, r.pathMajor, mpath1, suffix, rev)
771		}
772		if _, _, ok := module.SplitPathVersion(mpath1); !ok {
773			return "", "", nil, fmt.Errorf("%s has malformed module path %q%s at revision %s", file1, mpath1, suffix, rev)
774		}
775		return "", "", nil, fmt.Errorf("%s has post-%s module path %q%s at revision %s", file1, semver.Major(version), mpath1, suffix, rev)
776	}
777
778	if r.codeDir == "" && (r.pathMajor == "" || strings.HasPrefix(r.pathMajor, ".")) {
779		// Implicit go.mod at root of repo OK for v0/v1 and for gopkg.in.
780		return rev, "", nil, nil
781	}
782
783	// Implicit go.mod below root of repo or at v2+ disallowed.
784	// Be clear about possibility of using either location for v2+.
785	if file2 != "" {
786		return "", "", nil, fmt.Errorf("missing %s/go.mod and ...%s/go.mod at revision %s", r.pathPrefix, r.pathMajor, rev)
787	}
788	return "", "", nil, fmt.Errorf("missing %s/go.mod at revision %s", r.pathPrefix, rev)
789}
790
791// isMajor reports whether the versions allowed for mpath are compatible with
792// the major version(s) implied by pathMajor, or false if mpath has an invalid
793// version suffix.
794func isMajor(mpath, pathMajor string) bool {
795	if mpath == "" {
796		// If we don't have a path, we don't know what version(s) it is compatible with.
797		return false
798	}
799	_, mpathMajor, ok := module.SplitPathVersion(mpath)
800	if !ok {
801		// An invalid module path is not compatible with any version.
802		return false
803	}
804	if pathMajor == "" {
805		// All of the valid versions for a gopkg.in module that requires major
806		// version v0 or v1 are compatible with the "v0 or v1" implied by an empty
807		// pathMajor.
808		switch module.PathMajorPrefix(mpathMajor) {
809		case "", "v0", "v1":
810			return true
811		default:
812			return false
813		}
814	}
815	if mpathMajor == "" {
816		// Even if pathMajor is ".v0" or ".v1", we can't be sure that a module
817		// without a suffix is tagged appropriately. Besides, we don't expect clones
818		// of non-gopkg.in modules to have gopkg.in paths, so a non-empty,
819		// non-gopkg.in mpath is probably the wrong module for any such pathMajor
820		// anyway.
821		return false
822	}
823	// If both pathMajor and mpathMajor are non-empty, then we only care that they
824	// have the same major-version validation rules. A clone fetched via a /v2
825	// path might replace a module with path gopkg.in/foo.v2-unstable, and that's
826	// ok.
827	return pathMajor[1:] == mpathMajor[1:]
828}
829
830// canReplaceMismatchedVersionDueToBug reports whether versions of r
831// could replace versions of mpath with otherwise-mismatched major versions
832// due to a historical bug in the Go command (golang.org/issue/34254).
833func (r *codeRepo) canReplaceMismatchedVersionDueToBug(mpath string) bool {
834	// The bug caused us to erroneously accept unversioned paths as replacements
835	// for versioned gopkg.in paths.
836	unversioned := r.pathMajor == ""
837	replacingGopkgIn := strings.HasPrefix(mpath, "gopkg.in/")
838	return unversioned && replacingGopkgIn
839}
840
841func (r *codeRepo) GoMod(version string) (data []byte, err error) {
842	if version != module.CanonicalVersion(version) {
843		return nil, fmt.Errorf("version %s is not canonical", version)
844	}
845
846	if IsPseudoVersion(version) {
847		// findDir ignores the metadata encoded in a pseudo-version,
848		// only using the revision at the end.
849		// Invoke Stat to verify the metadata explicitly so we don't return
850		// a bogus file for an invalid version.
851		_, err := r.Stat(version)
852		if err != nil {
853			return nil, err
854		}
855	}
856
857	rev, dir, gomod, err := r.findDir(version)
858	if err != nil {
859		return nil, err
860	}
861	if gomod != nil {
862		return gomod, nil
863	}
864	data, err = r.code.ReadFile(rev, path.Join(dir, "go.mod"), codehost.MaxGoMod)
865	if err != nil {
866		if os.IsNotExist(err) {
867			return r.legacyGoMod(rev, dir), nil
868		}
869		return nil, err
870	}
871	return data, nil
872}
873
874func (r *codeRepo) legacyGoMod(rev, dir string) []byte {
875	// We used to try to build a go.mod reflecting pre-existing
876	// package management metadata files, but the conversion
877	// was inherently imperfect (because those files don't have
878	// exactly the same semantics as go.mod) and, when done
879	// for dependencies in the middle of a build, impossible to
880	// correct. So we stopped.
881	// Return a fake go.mod that simply declares the module path.
882	return []byte(fmt.Sprintf("module %s\n", modfile.AutoQuote(r.modPath)))
883}
884
885func (r *codeRepo) modPrefix(rev string) string {
886	return r.modPath + "@" + rev
887}
888
889func (r *codeRepo) retractedVersions() (func(string) bool, error) {
890	versions, err := r.Versions("")
891	if err != nil {
892		return nil, err
893	}
894
895	for i, v := range versions {
896		if strings.HasSuffix(v, "+incompatible") {
897			versions = versions[:i]
898			break
899		}
900	}
901	if len(versions) == 0 {
902		return func(string) bool { return false }, nil
903	}
904
905	var highest string
906	for i := len(versions) - 1; i >= 0; i-- {
907		v := versions[i]
908		if semver.Prerelease(v) == "" {
909			highest = v
910			break
911		}
912	}
913	if highest == "" {
914		highest = versions[len(versions)-1]
915	}
916
917	data, err := r.GoMod(highest)
918	if err != nil {
919		return nil, err
920	}
921	f, err := modfile.ParseLax("go.mod", data, nil)
922	if err != nil {
923		return nil, err
924	}
925	retractions := make([]modfile.VersionInterval, len(f.Retract))
926	for _, r := range f.Retract {
927		retractions = append(retractions, r.VersionInterval)
928	}
929
930	return func(v string) bool {
931		for _, r := range retractions {
932			if semver.Compare(r.Low, v) <= 0 && semver.Compare(v, r.High) <= 0 {
933				return true
934			}
935		}
936		return false
937	}, nil
938}
939
940func (r *codeRepo) Zip(dst io.Writer, version string) error {
941	if version != module.CanonicalVersion(version) {
942		return fmt.Errorf("version %s is not canonical", version)
943	}
944
945	if IsPseudoVersion(version) {
946		// findDir ignores the metadata encoded in a pseudo-version,
947		// only using the revision at the end.
948		// Invoke Stat to verify the metadata explicitly so we don't return
949		// a bogus file for an invalid version.
950		_, err := r.Stat(version)
951		if err != nil {
952			return err
953		}
954	}
955
956	rev, subdir, _, err := r.findDir(version)
957	if err != nil {
958		return err
959	}
960	dl, err := r.code.ReadZip(rev, subdir, codehost.MaxZipFile)
961	if err != nil {
962		return err
963	}
964	defer dl.Close()
965	subdir = strings.Trim(subdir, "/")
966
967	// Spool to local file.
968	f, err := os.CreateTemp("", "go-codehost-")
969	if err != nil {
970		dl.Close()
971		return err
972	}
973	defer os.Remove(f.Name())
974	defer f.Close()
975	maxSize := int64(codehost.MaxZipFile)
976	lr := &io.LimitedReader{R: dl, N: maxSize + 1}
977	if _, err := io.Copy(f, lr); err != nil {
978		dl.Close()
979		return err
980	}
981	dl.Close()
982	if lr.N <= 0 {
983		return fmt.Errorf("downloaded zip file too large")
984	}
985	size := (maxSize + 1) - lr.N
986	if _, err := f.Seek(0, 0); err != nil {
987		return err
988	}
989
990	// Translate from zip file we have to zip file we want.
991	zr, err := zip.NewReader(f, size)
992	if err != nil {
993		return err
994	}
995
996	var files []modzip.File
997	if subdir != "" {
998		subdir += "/"
999	}
1000	haveLICENSE := false
1001	topPrefix := ""
1002	for _, zf := range zr.File {
1003		if topPrefix == "" {
1004			i := strings.Index(zf.Name, "/")
1005			if i < 0 {
1006				return fmt.Errorf("missing top-level directory prefix")
1007			}
1008			topPrefix = zf.Name[:i+1]
1009		}
1010		if !strings.HasPrefix(zf.Name, topPrefix) {
1011			return fmt.Errorf("zip file contains more than one top-level directory")
1012		}
1013		name := strings.TrimPrefix(zf.Name, topPrefix)
1014		if !strings.HasPrefix(name, subdir) {
1015			continue
1016		}
1017		name = strings.TrimPrefix(name, subdir)
1018		if name == "" || strings.HasSuffix(name, "/") {
1019			continue
1020		}
1021		files = append(files, zipFile{name: name, f: zf})
1022		if name == "LICENSE" {
1023			haveLICENSE = true
1024		}
1025	}
1026
1027	if !haveLICENSE && subdir != "" {
1028		data, err := r.code.ReadFile(rev, "LICENSE", codehost.MaxLICENSE)
1029		if err == nil {
1030			files = append(files, dataFile{name: "LICENSE", data: data})
1031		}
1032	}
1033
1034	return modzip.Create(dst, module.Version{Path: r.modPath, Version: version}, files)
1035}
1036
1037type zipFile struct {
1038	name string
1039	f    *zip.File
1040}
1041
1042func (f zipFile) Path() string                 { return f.name }
1043func (f zipFile) Lstat() (fs.FileInfo, error)  { return f.f.FileInfo(), nil }
1044func (f zipFile) Open() (io.ReadCloser, error) { return f.f.Open() }
1045
1046type dataFile struct {
1047	name string
1048	data []byte
1049}
1050
1051func (f dataFile) Path() string                { return f.name }
1052func (f dataFile) Lstat() (fs.FileInfo, error) { return dataFileInfo{f}, nil }
1053func (f dataFile) Open() (io.ReadCloser, error) {
1054	return io.NopCloser(bytes.NewReader(f.data)), nil
1055}
1056
1057type dataFileInfo struct {
1058	f dataFile
1059}
1060
1061func (fi dataFileInfo) Name() string       { return path.Base(fi.f.name) }
1062func (fi dataFileInfo) Size() int64        { return int64(len(fi.f.data)) }
1063func (fi dataFileInfo) Mode() fs.FileMode  { return 0644 }
1064func (fi dataFileInfo) ModTime() time.Time { return time.Time{} }
1065func (fi dataFileInfo) IsDir() bool        { return false }
1066func (fi dataFileInfo) Sys() interface{}   { return nil }
1067
1068// hasPathPrefix reports whether the path s begins with the
1069// elements in prefix.
1070func hasPathPrefix(s, prefix string) bool {
1071	switch {
1072	default:
1073		return false
1074	case len(s) == len(prefix):
1075		return s == prefix
1076	case len(s) > len(prefix):
1077		if prefix != "" && prefix[len(prefix)-1] == '/' {
1078			return strings.HasPrefix(s, prefix)
1079		}
1080		return s[len(prefix)] == '/' && s[:len(prefix)] == prefix
1081	}
1082}
1083