1// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package modfetch
6
7import (
8	"archive/zip"
9	"fmt"
10	"io"
11	"io/ioutil"
12	"os"
13	"path"
14	"strings"
15
16	"cmd/go/internal/modfetch/codehost"
17	"cmd/go/internal/modfile"
18	"cmd/go/internal/module"
19	"cmd/go/internal/semver"
20)
21
22// A codeRepo implements modfetch.Repo using an underlying codehost.Repo.
23type codeRepo struct {
24	modPath string
25
26	// code is the repository containing this module.
27	code codehost.Repo
28	// codeRoot is the import path at the root of code.
29	codeRoot string
30	// codeDir is the directory (relative to root) at which we expect to find the module.
31	// If pathMajor is non-empty and codeRoot is not the full modPath,
32	// then we look in both codeDir and codeDir+modPath
33	codeDir string
34
35	// pathMajor is the suffix of modPath that indicates its major version,
36	// or the empty string if modPath is at major version 0 or 1.
37	//
38	// pathMajor is typically of the form "/vN", but possibly ".vN", or
39	// ".vN-unstable" for modules resolved using gopkg.in.
40	pathMajor string
41	// pathPrefix is the prefix of modPath that excludes pathMajor.
42	// It is used only for logging.
43	pathPrefix string
44
45	// pseudoMajor is the major version prefix to use when generating
46	// pseudo-versions for this module, derived from the module path.
47	//
48	// TODO(golang.org/issue/29262): We can't distinguish v0 from v1 using the
49	// path alone: we have to compute it by examining the tags at a particular
50	// revision.
51	pseudoMajor string
52}
53
54// newCodeRepo returns a Repo that reads the source code for the module with the
55// given path, from the repo stored in code, with the root of the repo
56// containing the path given by codeRoot.
57func newCodeRepo(code codehost.Repo, codeRoot, path string) (Repo, error) {
58	if !hasPathPrefix(path, codeRoot) {
59		return nil, fmt.Errorf("mismatched repo: found %s for %s", codeRoot, path)
60	}
61	pathPrefix, pathMajor, ok := module.SplitPathVersion(path)
62	if !ok {
63		return nil, fmt.Errorf("invalid module path %q", path)
64	}
65	if codeRoot == path {
66		pathPrefix = path
67	}
68	pseudoMajor := "v0"
69	if pathMajor != "" {
70		pseudoMajor = pathMajor[1:]
71	}
72
73	// Compute codeDir = bar, the subdirectory within the repo
74	// corresponding to the module root.
75	//
76	// At this point we might have:
77	//	path = github.com/rsc/foo/bar/v2
78	//	codeRoot = github.com/rsc/foo
79	//	pathPrefix = github.com/rsc/foo/bar
80	//	pathMajor = /v2
81	//	pseudoMajor = v2
82	//
83	// which gives
84	//	codeDir = bar
85	//
86	// We know that pathPrefix is a prefix of path, and codeRoot is a prefix of
87	// path, but codeRoot may or may not be a prefix of pathPrefix, because
88	// codeRoot may be the entire path (in which case codeDir should be empty).
89	// That occurs in two situations.
90	//
91	// One is when a go-import meta tag resolves the complete module path,
92	// including the pathMajor suffix:
93	//	path = nanomsg.org/go/mangos/v2
94	//	codeRoot = nanomsg.org/go/mangos/v2
95	//	pathPrefix = nanomsg.org/go/mangos
96	//	pathMajor = /v2
97	//	pseudoMajor = v2
98	//
99	// The other is similar: for gopkg.in only, the major version is encoded
100	// with a dot rather than a slash, and thus can't be in a subdirectory.
101	//	path = gopkg.in/yaml.v2
102	//	codeRoot = gopkg.in/yaml.v2
103	//	pathPrefix = gopkg.in/yaml
104	//	pathMajor = .v2
105	//	pseudoMajor = v2
106	//
107	codeDir := ""
108	if codeRoot != path {
109		if !hasPathPrefix(pathPrefix, codeRoot) {
110			return nil, fmt.Errorf("repository rooted at %s cannot contain module %s", codeRoot, path)
111		}
112		codeDir = strings.Trim(pathPrefix[len(codeRoot):], "/")
113	}
114
115	r := &codeRepo{
116		modPath:     path,
117		code:        code,
118		codeRoot:    codeRoot,
119		codeDir:     codeDir,
120		pathPrefix:  pathPrefix,
121		pathMajor:   pathMajor,
122		pseudoMajor: pseudoMajor,
123	}
124
125	return r, nil
126}
127
128func (r *codeRepo) ModulePath() string {
129	return r.modPath
130}
131
132func (r *codeRepo) Versions(prefix string) ([]string, error) {
133	// Special case: gopkg.in/macaroon-bakery.v2-unstable
134	// does not use the v2 tags (those are for macaroon-bakery.v2).
135	// It has no possible tags at all.
136	if strings.HasPrefix(r.modPath, "gopkg.in/") && strings.HasSuffix(r.modPath, "-unstable") {
137		return nil, nil
138	}
139
140	p := prefix
141	if r.codeDir != "" {
142		p = r.codeDir + "/" + p
143	}
144	tags, err := r.code.Tags(p)
145	if err != nil {
146		return nil, err
147	}
148
149	list := []string{}
150	var incompatible []string
151	for _, tag := range tags {
152		if !strings.HasPrefix(tag, p) {
153			continue
154		}
155		v := tag
156		if r.codeDir != "" {
157			v = v[len(r.codeDir)+1:]
158		}
159		if v == "" || v != module.CanonicalVersion(v) || IsPseudoVersion(v) {
160			continue
161		}
162		if !module.MatchPathMajor(v, r.pathMajor) {
163			if r.codeDir == "" && r.pathMajor == "" && semver.Major(v) > "v1" {
164				incompatible = append(incompatible, v)
165			}
166			continue
167		}
168		list = append(list, v)
169	}
170
171	if len(incompatible) > 0 {
172		// Check for later versions that were created not following semantic import versioning,
173		// as indicated by the absence of a go.mod file. Those versions can be addressed
174		// by referring to them with a +incompatible suffix, as in v17.0.0+incompatible.
175		files, err := r.code.ReadFileRevs(incompatible, "go.mod", codehost.MaxGoMod)
176		if err != nil {
177			return nil, err
178		}
179		for _, rev := range incompatible {
180			f := files[rev]
181			if os.IsNotExist(f.Err) {
182				list = append(list, rev+"+incompatible")
183			}
184		}
185	}
186
187	SortVersions(list)
188	return list, nil
189}
190
191func (r *codeRepo) Stat(rev string) (*RevInfo, error) {
192	if rev == "latest" {
193		return r.Latest()
194	}
195	codeRev := r.revToRev(rev)
196	info, err := r.code.Stat(codeRev)
197	if err != nil {
198		return nil, err
199	}
200	return r.convert(info, rev)
201}
202
203func (r *codeRepo) Latest() (*RevInfo, error) {
204	info, err := r.code.Latest()
205	if err != nil {
206		return nil, err
207	}
208	return r.convert(info, "")
209}
210
211func (r *codeRepo) convert(info *codehost.RevInfo, statVers string) (*RevInfo, error) {
212	info2 := &RevInfo{
213		Name:  info.Name,
214		Short: info.Short,
215		Time:  info.Time,
216	}
217
218	// Determine version.
219	if module.CanonicalVersion(statVers) == statVers && module.MatchPathMajor(statVers, r.pathMajor) {
220		// The original call was repo.Stat(statVers), and requestedVersion is OK, so use it.
221		info2.Version = statVers
222	} else {
223		// Otherwise derive a version from a code repo tag.
224		// Tag must have a prefix matching codeDir.
225		p := ""
226		if r.codeDir != "" {
227			p = r.codeDir + "/"
228		}
229
230		// If this is a plain tag (no dir/ prefix)
231		// and the module path is unversioned,
232		// and if the underlying file tree has no go.mod,
233		// then allow using the tag with a +incompatible suffix.
234		canUseIncompatible := false
235		if r.codeDir == "" && r.pathMajor == "" {
236			_, errGoMod := r.code.ReadFile(info.Name, "go.mod", codehost.MaxGoMod)
237			if errGoMod != nil {
238				canUseIncompatible = true
239			}
240		}
241
242		tagToVersion := func(v string) string {
243			if !strings.HasPrefix(v, p) {
244				return ""
245			}
246			v = v[len(p):]
247			if module.CanonicalVersion(v) != v || IsPseudoVersion(v) {
248				return ""
249			}
250			if module.MatchPathMajor(v, r.pathMajor) {
251				return v
252			}
253			if canUseIncompatible {
254				return v + "+incompatible"
255			}
256			return ""
257		}
258
259		// If info.Version is OK, use it.
260		if v := tagToVersion(info.Version); v != "" {
261			info2.Version = v
262		} else {
263			// Otherwise look through all known tags for latest in semver ordering.
264			for _, tag := range info.Tags {
265				if v := tagToVersion(tag); v != "" && semver.Compare(info2.Version, v) < 0 {
266					info2.Version = v
267				}
268			}
269			// Otherwise make a pseudo-version.
270			if info2.Version == "" {
271				tag, _ := r.code.RecentTag(statVers, p)
272				v = tagToVersion(tag)
273				// TODO: Check that v is OK for r.pseudoMajor or else is OK for incompatible.
274				info2.Version = PseudoVersion(r.pseudoMajor, v, info.Time, info.Short)
275			}
276		}
277	}
278
279	// Do not allow a successful stat of a pseudo-version for a subdirectory
280	// unless the subdirectory actually does have a go.mod.
281	if IsPseudoVersion(info2.Version) && r.codeDir != "" {
282		_, _, _, err := r.findDir(info2.Version)
283		if err != nil {
284			// TODO: It would be nice to return an error like "not a module".
285			// Right now we return "missing go.mod", which is a little confusing.
286			return nil, err
287		}
288	}
289
290	return info2, nil
291}
292
293func (r *codeRepo) revToRev(rev string) string {
294	if semver.IsValid(rev) {
295		if IsPseudoVersion(rev) {
296			r, _ := PseudoVersionRev(rev)
297			return r
298		}
299		if semver.Build(rev) == "+incompatible" {
300			rev = rev[:len(rev)-len("+incompatible")]
301		}
302		if r.codeDir == "" {
303			return rev
304		}
305		return r.codeDir + "/" + rev
306	}
307	return rev
308}
309
310func (r *codeRepo) versionToRev(version string) (rev string, err error) {
311	if !semver.IsValid(version) {
312		return "", fmt.Errorf("malformed semantic version %q", version)
313	}
314	return r.revToRev(version), nil
315}
316
317func (r *codeRepo) findDir(version string) (rev, dir string, gomod []byte, err error) {
318	rev, err = r.versionToRev(version)
319	if err != nil {
320		return "", "", nil, err
321	}
322
323	// Load info about go.mod but delay consideration
324	// (except I/O error) until we rule out v2/go.mod.
325	file1 := path.Join(r.codeDir, "go.mod")
326	gomod1, err1 := r.code.ReadFile(rev, file1, codehost.MaxGoMod)
327	if err1 != nil && !os.IsNotExist(err1) {
328		return "", "", nil, fmt.Errorf("reading %s/%s at revision %s: %v", r.pathPrefix, file1, rev, err1)
329	}
330	mpath1 := modfile.ModulePath(gomod1)
331	found1 := err1 == nil && isMajor(mpath1, r.pathMajor)
332
333	var file2 string
334	if r.pathMajor != "" && r.codeRoot != r.modPath && !strings.HasPrefix(r.pathMajor, ".") {
335		// Suppose pathMajor is "/v2".
336		// Either go.mod should claim v2 and v2/go.mod should not exist,
337		// or v2/go.mod should exist and claim v2. Not both.
338		// Note that we don't check the full path, just the major suffix,
339		// because of replacement modules. This might be a fork of
340		// the real module, found at a different path, usable only in
341		// a replace directive.
342		//
343		// TODO(bcmills): This doesn't seem right. Investigate futher.
344		// (Notably: why can't we replace foo/v2 with fork-of-foo/v3?)
345		dir2 := path.Join(r.codeDir, r.pathMajor[1:])
346		file2 = path.Join(dir2, "go.mod")
347		gomod2, err2 := r.code.ReadFile(rev, file2, codehost.MaxGoMod)
348		if err2 != nil && !os.IsNotExist(err2) {
349			return "", "", nil, fmt.Errorf("reading %s/%s at revision %s: %v", r.pathPrefix, file2, rev, err2)
350		}
351		mpath2 := modfile.ModulePath(gomod2)
352		found2 := err2 == nil && isMajor(mpath2, r.pathMajor)
353
354		if found1 && found2 {
355			return "", "", nil, fmt.Errorf("%s/%s and ...%s/go.mod both have ...%s module paths at revision %s", r.pathPrefix, file1, r.pathMajor, r.pathMajor, rev)
356		}
357		if found2 {
358			return rev, dir2, gomod2, nil
359		}
360		if err2 == nil {
361			if mpath2 == "" {
362				return "", "", nil, fmt.Errorf("%s/%s is missing module path at revision %s", r.pathPrefix, file2, rev)
363			}
364			return "", "", nil, fmt.Errorf("%s/%s has non-...%s module path %q at revision %s", r.pathPrefix, file2, r.pathMajor, mpath2, rev)
365		}
366	}
367
368	// Not v2/go.mod, so it's either go.mod or nothing. Which is it?
369	if found1 {
370		// Explicit go.mod with matching module path OK.
371		return rev, r.codeDir, gomod1, nil
372	}
373	if err1 == nil {
374		// Explicit go.mod with non-matching module path disallowed.
375		suffix := ""
376		if file2 != "" {
377			suffix = fmt.Sprintf(" (and ...%s/go.mod does not exist)", r.pathMajor)
378		}
379		if mpath1 == "" {
380			return "", "", nil, fmt.Errorf("%s is missing module path%s at revision %s", file1, suffix, rev)
381		}
382		if r.pathMajor != "" { // ".v1", ".v2" for gopkg.in
383			return "", "", nil, fmt.Errorf("%s has non-...%s module path %q%s at revision %s", file1, r.pathMajor, mpath1, suffix, rev)
384		}
385		return "", "", nil, fmt.Errorf("%s has post-%s module path %q%s at revision %s", file1, semver.Major(version), mpath1, suffix, rev)
386	}
387
388	if r.codeDir == "" && (r.pathMajor == "" || strings.HasPrefix(r.pathMajor, ".")) {
389		// Implicit go.mod at root of repo OK for v0/v1 and for gopkg.in.
390		return rev, "", nil, nil
391	}
392
393	// Implicit go.mod below root of repo or at v2+ disallowed.
394	// Be clear about possibility of using either location for v2+.
395	if file2 != "" {
396		return "", "", nil, fmt.Errorf("missing %s/go.mod and ...%s/go.mod at revision %s", r.pathPrefix, r.pathMajor, rev)
397	}
398	return "", "", nil, fmt.Errorf("missing %s/go.mod at revision %s", r.pathPrefix, rev)
399}
400
401func isMajor(mpath, pathMajor string) bool {
402	if mpath == "" {
403		return false
404	}
405	if pathMajor == "" {
406		// mpath must NOT have version suffix.
407		i := len(mpath)
408		for i > 0 && '0' <= mpath[i-1] && mpath[i-1] <= '9' {
409			i--
410		}
411		if i < len(mpath) && i >= 2 && mpath[i-1] == 'v' && mpath[i-2] == '/' {
412			// Found valid suffix.
413			return false
414		}
415		return true
416	}
417	// Otherwise pathMajor is ".v1", ".v2" (gopkg.in), or "/v2", "/v3" etc.
418	return strings.HasSuffix(mpath, pathMajor)
419}
420
421func (r *codeRepo) GoMod(version string) (data []byte, err error) {
422	rev, dir, gomod, err := r.findDir(version)
423	if err != nil {
424		return nil, err
425	}
426	if gomod != nil {
427		return gomod, nil
428	}
429	data, err = r.code.ReadFile(rev, path.Join(dir, "go.mod"), codehost.MaxGoMod)
430	if err != nil {
431		if os.IsNotExist(err) {
432			return r.legacyGoMod(rev, dir), nil
433		}
434		return nil, err
435	}
436	return data, nil
437}
438
439func (r *codeRepo) legacyGoMod(rev, dir string) []byte {
440	// We used to try to build a go.mod reflecting pre-existing
441	// package management metadata files, but the conversion
442	// was inherently imperfect (because those files don't have
443	// exactly the same semantics as go.mod) and, when done
444	// for dependencies in the middle of a build, impossible to
445	// correct. So we stopped.
446	// Return a fake go.mod that simply declares the module path.
447	return []byte(fmt.Sprintf("module %s\n", modfile.AutoQuote(r.modPath)))
448}
449
450func (r *codeRepo) modPrefix(rev string) string {
451	return r.modPath + "@" + rev
452}
453
454func (r *codeRepo) Zip(dst io.Writer, version string) error {
455	rev, dir, _, err := r.findDir(version)
456	if err != nil {
457		return err
458	}
459	dl, actualDir, err := r.code.ReadZip(rev, dir, codehost.MaxZipFile)
460	if err != nil {
461		return err
462	}
463	defer dl.Close()
464	if actualDir != "" && !hasPathPrefix(dir, actualDir) {
465		return fmt.Errorf("internal error: downloading %v %v: dir=%q but actualDir=%q", r.modPath, rev, dir, actualDir)
466	}
467	subdir := strings.Trim(strings.TrimPrefix(dir, actualDir), "/")
468
469	// Spool to local file.
470	f, err := ioutil.TempFile("", "go-codehost-")
471	if err != nil {
472		dl.Close()
473		return err
474	}
475	defer os.Remove(f.Name())
476	defer f.Close()
477	maxSize := int64(codehost.MaxZipFile)
478	lr := &io.LimitedReader{R: dl, N: maxSize + 1}
479	if _, err := io.Copy(f, lr); err != nil {
480		dl.Close()
481		return err
482	}
483	dl.Close()
484	if lr.N <= 0 {
485		return fmt.Errorf("downloaded zip file too large")
486	}
487	size := (maxSize + 1) - lr.N
488	if _, err := f.Seek(0, 0); err != nil {
489		return err
490	}
491
492	// Translate from zip file we have to zip file we want.
493	zr, err := zip.NewReader(f, size)
494	if err != nil {
495		return err
496	}
497
498	zw := zip.NewWriter(dst)
499	if subdir != "" {
500		subdir += "/"
501	}
502	haveLICENSE := false
503	topPrefix := ""
504	haveGoMod := make(map[string]bool)
505	for _, zf := range zr.File {
506		if topPrefix == "" {
507			i := strings.Index(zf.Name, "/")
508			if i < 0 {
509				return fmt.Errorf("missing top-level directory prefix")
510			}
511			topPrefix = zf.Name[:i+1]
512		}
513		if !strings.HasPrefix(zf.Name, topPrefix) {
514			return fmt.Errorf("zip file contains more than one top-level directory")
515		}
516		dir, file := path.Split(zf.Name)
517		if file == "go.mod" {
518			haveGoMod[dir] = true
519		}
520	}
521	root := topPrefix + subdir
522	inSubmodule := func(name string) bool {
523		for {
524			dir, _ := path.Split(name)
525			if len(dir) <= len(root) {
526				return false
527			}
528			if haveGoMod[dir] {
529				return true
530			}
531			name = dir[:len(dir)-1]
532		}
533	}
534
535	for _, zf := range zr.File {
536		if !zf.FileInfo().Mode().IsRegular() {
537			// Skip symlinks (golang.org/issue/27093).
538			continue
539		}
540
541		if topPrefix == "" {
542			i := strings.Index(zf.Name, "/")
543			if i < 0 {
544				return fmt.Errorf("missing top-level directory prefix")
545			}
546			topPrefix = zf.Name[:i+1]
547		}
548		if strings.HasSuffix(zf.Name, "/") { // drop directory dummy entries
549			continue
550		}
551		if !strings.HasPrefix(zf.Name, topPrefix) {
552			return fmt.Errorf("zip file contains more than one top-level directory")
553		}
554		name := strings.TrimPrefix(zf.Name, topPrefix)
555		if !strings.HasPrefix(name, subdir) {
556			continue
557		}
558		if name == ".hg_archival.txt" {
559			// Inserted by hg archive.
560			// Not correct to drop from other version control systems, but too bad.
561			continue
562		}
563		name = strings.TrimPrefix(name, subdir)
564		if isVendoredPackage(name) {
565			continue
566		}
567		if inSubmodule(zf.Name) {
568			continue
569		}
570		base := path.Base(name)
571		if strings.ToLower(base) == "go.mod" && base != "go.mod" {
572			return fmt.Errorf("zip file contains %s, want all lower-case go.mod", zf.Name)
573		}
574		if name == "LICENSE" {
575			haveLICENSE = true
576		}
577		size := int64(zf.UncompressedSize64)
578		if size < 0 || maxSize < size {
579			return fmt.Errorf("module source tree too big")
580		}
581		maxSize -= size
582
583		rc, err := zf.Open()
584		if err != nil {
585			return err
586		}
587		w, err := zw.Create(r.modPrefix(version) + "/" + name)
588		lr := &io.LimitedReader{R: rc, N: size + 1}
589		if _, err := io.Copy(w, lr); err != nil {
590			return err
591		}
592		if lr.N <= 0 {
593			return fmt.Errorf("individual file too large")
594		}
595	}
596
597	if !haveLICENSE && subdir != "" {
598		data, err := r.code.ReadFile(rev, "LICENSE", codehost.MaxLICENSE)
599		if err == nil {
600			w, err := zw.Create(r.modPrefix(version) + "/LICENSE")
601			if err != nil {
602				return err
603			}
604			if _, err := w.Write(data); err != nil {
605				return err
606			}
607		}
608	}
609
610	return zw.Close()
611}
612
613// hasPathPrefix reports whether the path s begins with the
614// elements in prefix.
615func hasPathPrefix(s, prefix string) bool {
616	switch {
617	default:
618		return false
619	case len(s) == len(prefix):
620		return s == prefix
621	case len(s) > len(prefix):
622		if prefix != "" && prefix[len(prefix)-1] == '/' {
623			return strings.HasPrefix(s, prefix)
624		}
625		return s[len(prefix)] == '/' && s[:len(prefix)] == prefix
626	}
627}
628
629func isVendoredPackage(name string) bool {
630	var i int
631	if strings.HasPrefix(name, "vendor/") {
632		i += len("vendor/")
633	} else if j := strings.Index(name, "/vendor/"); j >= 0 {
634		i += len("/vendor/")
635	} else {
636		return false
637	}
638	return strings.Contains(name[i:], "/")
639}
640