1// Copyright 2019 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package imports
6
7import (
8	"bytes"
9	"context"
10	"encoding/json"
11	"fmt"
12	"io/ioutil"
13	"os"
14	"path"
15	"path/filepath"
16	"regexp"
17	"sort"
18	"strconv"
19	"strings"
20
21	"golang.org/x/mod/module"
22	"golang.org/x/tools/internal/gocommand"
23	"golang.org/x/tools/internal/gopathwalk"
24)
25
26// ModuleResolver implements resolver for modules using the go command as little
27// as feasible.
28type ModuleResolver struct {
29	env            *ProcessEnv
30	moduleCacheDir string
31	dummyVendorMod *gocommand.ModuleJSON // If vendoring is enabled, the pseudo-module that represents the /vendor directory.
32	roots          []gopathwalk.Root
33	scanSema       chan struct{} // scanSema prevents concurrent scans and guards scannedRoots.
34	scannedRoots   map[gopathwalk.Root]bool
35
36	initialized   bool
37	main          *gocommand.ModuleJSON
38	modsByModPath []*gocommand.ModuleJSON // All modules, ordered by # of path components in module Path...
39	modsByDir     []*gocommand.ModuleJSON // ...or Dir.
40
41	// moduleCacheCache stores information about the module cache.
42	moduleCacheCache *dirInfoCache
43	otherCache       *dirInfoCache
44}
45
46func newModuleResolver(e *ProcessEnv) *ModuleResolver {
47	r := &ModuleResolver{
48		env:      e,
49		scanSema: make(chan struct{}, 1),
50	}
51	r.scanSema <- struct{}{}
52	return r
53}
54
55func (r *ModuleResolver) init() error {
56	if r.initialized {
57		return nil
58	}
59
60	goenv, err := r.env.goEnv()
61	if err != nil {
62		return err
63	}
64	inv := gocommand.Invocation{
65		BuildFlags: r.env.BuildFlags,
66		ModFlag:    r.env.ModFlag,
67		ModFile:    r.env.ModFile,
68		Env:        r.env.env(),
69		Logf:       r.env.Logf,
70		WorkingDir: r.env.WorkingDir,
71	}
72	mainMod, vendorEnabled, err := gocommand.VendorEnabled(context.TODO(), inv, r.env.GocmdRunner)
73	if err != nil {
74		return err
75	}
76
77	if mainMod != nil && vendorEnabled {
78		// Vendor mode is on, so all the non-Main modules are irrelevant,
79		// and we need to search /vendor for everything.
80		r.main = mainMod
81		r.dummyVendorMod = &gocommand.ModuleJSON{
82			Path: "",
83			Dir:  filepath.Join(mainMod.Dir, "vendor"),
84		}
85		r.modsByModPath = []*gocommand.ModuleJSON{mainMod, r.dummyVendorMod}
86		r.modsByDir = []*gocommand.ModuleJSON{mainMod, r.dummyVendorMod}
87	} else {
88		// Vendor mode is off, so run go list -m ... to find everything.
89		err := r.initAllMods()
90		// We expect an error when running outside of a module with
91		// GO111MODULE=on. Other errors are fatal.
92		if err != nil {
93			if errMsg := err.Error(); !strings.Contains(errMsg, "working directory is not part of a module") && !strings.Contains(errMsg, "go.mod file not found") {
94				return err
95			}
96		}
97	}
98
99	if gmc := r.env.Env["GOMODCACHE"]; gmc != "" {
100		r.moduleCacheDir = gmc
101	} else {
102		gopaths := filepath.SplitList(goenv["GOPATH"])
103		if len(gopaths) == 0 {
104			return fmt.Errorf("empty GOPATH")
105		}
106		r.moduleCacheDir = filepath.Join(gopaths[0], "/pkg/mod")
107	}
108
109	sort.Slice(r.modsByModPath, func(i, j int) bool {
110		count := func(x int) int {
111			return strings.Count(r.modsByModPath[x].Path, "/")
112		}
113		return count(j) < count(i) // descending order
114	})
115	sort.Slice(r.modsByDir, func(i, j int) bool {
116		count := func(x int) int {
117			return strings.Count(r.modsByDir[x].Dir, "/")
118		}
119		return count(j) < count(i) // descending order
120	})
121
122	r.roots = []gopathwalk.Root{
123		{filepath.Join(goenv["GOROOT"], "/src"), gopathwalk.RootGOROOT},
124	}
125	if r.main != nil {
126		r.roots = append(r.roots, gopathwalk.Root{r.main.Dir, gopathwalk.RootCurrentModule})
127	}
128	if vendorEnabled {
129		r.roots = append(r.roots, gopathwalk.Root{r.dummyVendorMod.Dir, gopathwalk.RootOther})
130	} else {
131		addDep := func(mod *gocommand.ModuleJSON) {
132			if mod.Replace == nil {
133				// This is redundant with the cache, but we'll skip it cheaply enough.
134				r.roots = append(r.roots, gopathwalk.Root{mod.Dir, gopathwalk.RootModuleCache})
135			} else {
136				r.roots = append(r.roots, gopathwalk.Root{mod.Dir, gopathwalk.RootOther})
137			}
138		}
139		// Walk dependent modules before scanning the full mod cache, direct deps first.
140		for _, mod := range r.modsByModPath {
141			if !mod.Indirect && !mod.Main {
142				addDep(mod)
143			}
144		}
145		for _, mod := range r.modsByModPath {
146			if mod.Indirect && !mod.Main {
147				addDep(mod)
148			}
149		}
150		r.roots = append(r.roots, gopathwalk.Root{r.moduleCacheDir, gopathwalk.RootModuleCache})
151	}
152
153	r.scannedRoots = map[gopathwalk.Root]bool{}
154	if r.moduleCacheCache == nil {
155		r.moduleCacheCache = &dirInfoCache{
156			dirs:      map[string]*directoryPackageInfo{},
157			listeners: map[*int]cacheListener{},
158		}
159	}
160	if r.otherCache == nil {
161		r.otherCache = &dirInfoCache{
162			dirs:      map[string]*directoryPackageInfo{},
163			listeners: map[*int]cacheListener{},
164		}
165	}
166	r.initialized = true
167	return nil
168}
169
170func (r *ModuleResolver) initAllMods() error {
171	stdout, err := r.env.invokeGo(context.TODO(), "list", "-m", "-e", "-json", "...")
172	if err != nil {
173		return err
174	}
175	for dec := json.NewDecoder(stdout); dec.More(); {
176		mod := &gocommand.ModuleJSON{}
177		if err := dec.Decode(mod); err != nil {
178			return err
179		}
180		if mod.Dir == "" {
181			if r.env.Logf != nil {
182				r.env.Logf("module %v has not been downloaded and will be ignored", mod.Path)
183			}
184			// Can't do anything with a module that's not downloaded.
185			continue
186		}
187		// golang/go#36193: the go command doesn't always clean paths.
188		mod.Dir = filepath.Clean(mod.Dir)
189		r.modsByModPath = append(r.modsByModPath, mod)
190		r.modsByDir = append(r.modsByDir, mod)
191		if mod.Main {
192			r.main = mod
193		}
194	}
195	return nil
196}
197
198func (r *ModuleResolver) ClearForNewScan() {
199	<-r.scanSema
200	r.scannedRoots = map[gopathwalk.Root]bool{}
201	r.otherCache = &dirInfoCache{
202		dirs:      map[string]*directoryPackageInfo{},
203		listeners: map[*int]cacheListener{},
204	}
205	r.scanSema <- struct{}{}
206}
207
208func (r *ModuleResolver) ClearForNewMod() {
209	<-r.scanSema
210	*r = ModuleResolver{
211		env:              r.env,
212		moduleCacheCache: r.moduleCacheCache,
213		otherCache:       r.otherCache,
214		scanSema:         r.scanSema,
215	}
216	r.init()
217	r.scanSema <- struct{}{}
218}
219
220// findPackage returns the module and directory that contains the package at
221// the given import path, or returns nil, "" if no module is in scope.
222func (r *ModuleResolver) findPackage(importPath string) (*gocommand.ModuleJSON, string) {
223	// This can't find packages in the stdlib, but that's harmless for all
224	// the existing code paths.
225	for _, m := range r.modsByModPath {
226		if !strings.HasPrefix(importPath, m.Path) {
227			continue
228		}
229		pathInModule := importPath[len(m.Path):]
230		pkgDir := filepath.Join(m.Dir, pathInModule)
231		if r.dirIsNestedModule(pkgDir, m) {
232			continue
233		}
234
235		if info, ok := r.cacheLoad(pkgDir); ok {
236			if loaded, err := info.reachedStatus(nameLoaded); loaded {
237				if err != nil {
238					continue // No package in this dir.
239				}
240				return m, pkgDir
241			}
242			if scanned, err := info.reachedStatus(directoryScanned); scanned && err != nil {
243				continue // Dir is unreadable, etc.
244			}
245			// This is slightly wrong: a directory doesn't have to have an
246			// importable package to count as a package for package-to-module
247			// resolution. package main or _test files should count but
248			// don't.
249			// TODO(heschi): fix this.
250			if _, err := r.cachePackageName(info); err == nil {
251				return m, pkgDir
252			}
253		}
254
255		// Not cached. Read the filesystem.
256		pkgFiles, err := ioutil.ReadDir(pkgDir)
257		if err != nil {
258			continue
259		}
260		// A module only contains a package if it has buildable go
261		// files in that directory. If not, it could be provided by an
262		// outer module. See #29736.
263		for _, fi := range pkgFiles {
264			if ok, _ := r.env.matchFile(pkgDir, fi.Name()); ok {
265				return m, pkgDir
266			}
267		}
268	}
269	return nil, ""
270}
271
272func (r *ModuleResolver) cacheLoad(dir string) (directoryPackageInfo, bool) {
273	if info, ok := r.moduleCacheCache.Load(dir); ok {
274		return info, ok
275	}
276	return r.otherCache.Load(dir)
277}
278
279func (r *ModuleResolver) cacheStore(info directoryPackageInfo) {
280	if info.rootType == gopathwalk.RootModuleCache {
281		r.moduleCacheCache.Store(info.dir, info)
282	} else {
283		r.otherCache.Store(info.dir, info)
284	}
285}
286
287func (r *ModuleResolver) cacheKeys() []string {
288	return append(r.moduleCacheCache.Keys(), r.otherCache.Keys()...)
289}
290
291// cachePackageName caches the package name for a dir already in the cache.
292func (r *ModuleResolver) cachePackageName(info directoryPackageInfo) (string, error) {
293	if info.rootType == gopathwalk.RootModuleCache {
294		return r.moduleCacheCache.CachePackageName(info)
295	}
296	return r.otherCache.CachePackageName(info)
297}
298
299func (r *ModuleResolver) cacheExports(ctx context.Context, env *ProcessEnv, info directoryPackageInfo) (string, []string, error) {
300	if info.rootType == gopathwalk.RootModuleCache {
301		return r.moduleCacheCache.CacheExports(ctx, env, info)
302	}
303	return r.otherCache.CacheExports(ctx, env, info)
304}
305
306// findModuleByDir returns the module that contains dir, or nil if no such
307// module is in scope.
308func (r *ModuleResolver) findModuleByDir(dir string) *gocommand.ModuleJSON {
309	// This is quite tricky and may not be correct. dir could be:
310	// - a package in the main module.
311	// - a replace target underneath the main module's directory.
312	//    - a nested module in the above.
313	// - a replace target somewhere totally random.
314	//    - a nested module in the above.
315	// - in the mod cache.
316	// - in /vendor/ in -mod=vendor mode.
317	//    - nested module? Dunno.
318	// Rumor has it that replace targets cannot contain other replace targets.
319	for _, m := range r.modsByDir {
320		if !strings.HasPrefix(dir, m.Dir) {
321			continue
322		}
323
324		if r.dirIsNestedModule(dir, m) {
325			continue
326		}
327
328		return m
329	}
330	return nil
331}
332
333// dirIsNestedModule reports if dir is contained in a nested module underneath
334// mod, not actually in mod.
335func (r *ModuleResolver) dirIsNestedModule(dir string, mod *gocommand.ModuleJSON) bool {
336	if !strings.HasPrefix(dir, mod.Dir) {
337		return false
338	}
339	if r.dirInModuleCache(dir) {
340		// Nested modules in the module cache are pruned,
341		// so it cannot be a nested module.
342		return false
343	}
344	if mod != nil && mod == r.dummyVendorMod {
345		// The /vendor pseudomodule is flattened and doesn't actually count.
346		return false
347	}
348	modDir, _ := r.modInfo(dir)
349	if modDir == "" {
350		return false
351	}
352	return modDir != mod.Dir
353}
354
355func (r *ModuleResolver) modInfo(dir string) (modDir string, modName string) {
356	readModName := func(modFile string) string {
357		modBytes, err := ioutil.ReadFile(modFile)
358		if err != nil {
359			return ""
360		}
361		return modulePath(modBytes)
362	}
363
364	if r.dirInModuleCache(dir) {
365		if matches := modCacheRegexp.FindStringSubmatch(dir); len(matches) == 3 {
366			index := strings.Index(dir, matches[1]+"@"+matches[2])
367			modDir := filepath.Join(dir[:index], matches[1]+"@"+matches[2])
368			return modDir, readModName(filepath.Join(modDir, "go.mod"))
369		}
370	}
371	for {
372		if info, ok := r.cacheLoad(dir); ok {
373			return info.moduleDir, info.moduleName
374		}
375		f := filepath.Join(dir, "go.mod")
376		info, err := os.Stat(f)
377		if err == nil && !info.IsDir() {
378			return dir, readModName(f)
379		}
380
381		d := filepath.Dir(dir)
382		if len(d) >= len(dir) {
383			return "", "" // reached top of file system, no go.mod
384		}
385		dir = d
386	}
387}
388
389func (r *ModuleResolver) dirInModuleCache(dir string) bool {
390	if r.moduleCacheDir == "" {
391		return false
392	}
393	return strings.HasPrefix(dir, r.moduleCacheDir)
394}
395
396func (r *ModuleResolver) loadPackageNames(importPaths []string, srcDir string) (map[string]string, error) {
397	if err := r.init(); err != nil {
398		return nil, err
399	}
400	names := map[string]string{}
401	for _, path := range importPaths {
402		_, packageDir := r.findPackage(path)
403		if packageDir == "" {
404			continue
405		}
406		name, err := packageDirToName(packageDir)
407		if err != nil {
408			continue
409		}
410		names[path] = name
411	}
412	return names, nil
413}
414
415func (r *ModuleResolver) scan(ctx context.Context, callback *scanCallback) error {
416	if err := r.init(); err != nil {
417		return err
418	}
419
420	processDir := func(info directoryPackageInfo) {
421		// Skip this directory if we were not able to get the package information successfully.
422		if scanned, err := info.reachedStatus(directoryScanned); !scanned || err != nil {
423			return
424		}
425		pkg, err := r.canonicalize(info)
426		if err != nil {
427			return
428		}
429
430		if !callback.dirFound(pkg) {
431			return
432		}
433		pkg.packageName, err = r.cachePackageName(info)
434		if err != nil {
435			return
436		}
437
438		if !callback.packageNameLoaded(pkg) {
439			return
440		}
441		_, exports, err := r.loadExports(ctx, pkg, false)
442		if err != nil {
443			return
444		}
445		callback.exportsLoaded(pkg, exports)
446	}
447
448	// Start processing everything in the cache, and listen for the new stuff
449	// we discover in the walk below.
450	stop1 := r.moduleCacheCache.ScanAndListen(ctx, processDir)
451	defer stop1()
452	stop2 := r.otherCache.ScanAndListen(ctx, processDir)
453	defer stop2()
454
455	// We assume cached directories are fully cached, including all their
456	// children, and have not changed. We can skip them.
457	skip := func(root gopathwalk.Root, dir string) bool {
458		info, ok := r.cacheLoad(dir)
459		if !ok {
460			return false
461		}
462		// This directory can be skipped as long as we have already scanned it.
463		// Packages with errors will continue to have errors, so there is no need
464		// to rescan them.
465		packageScanned, _ := info.reachedStatus(directoryScanned)
466		return packageScanned
467	}
468
469	// Add anything new to the cache, and process it if we're still listening.
470	add := func(root gopathwalk.Root, dir string) {
471		r.cacheStore(r.scanDirForPackage(root, dir))
472	}
473
474	// r.roots and the callback are not necessarily safe to use in the
475	// goroutine below. Process them eagerly.
476	roots := filterRoots(r.roots, callback.rootFound)
477	// We can't cancel walks, because we need them to finish to have a usable
478	// cache. Instead, run them in a separate goroutine and detach.
479	scanDone := make(chan struct{})
480	go func() {
481		select {
482		case <-ctx.Done():
483			return
484		case <-r.scanSema:
485		}
486		defer func() { r.scanSema <- struct{}{} }()
487		// We have the lock on r.scannedRoots, and no other scans can run.
488		for _, root := range roots {
489			if ctx.Err() != nil {
490				return
491			}
492
493			if r.scannedRoots[root] {
494				continue
495			}
496			gopathwalk.WalkSkip([]gopathwalk.Root{root}, add, skip, gopathwalk.Options{Logf: r.env.Logf, ModulesEnabled: true})
497			r.scannedRoots[root] = true
498		}
499		close(scanDone)
500	}()
501	select {
502	case <-ctx.Done():
503	case <-scanDone:
504	}
505	return nil
506}
507
508func (r *ModuleResolver) scoreImportPath(ctx context.Context, path string) float64 {
509	if _, ok := stdlib[path]; ok {
510		return MaxRelevance
511	}
512	mod, _ := r.findPackage(path)
513	return modRelevance(mod)
514}
515
516func modRelevance(mod *gocommand.ModuleJSON) float64 {
517	var relevance float64
518	switch {
519	case mod == nil: // out of scope
520		return MaxRelevance - 4
521	case mod.Indirect:
522		relevance = MaxRelevance - 3
523	case !mod.Main:
524		relevance = MaxRelevance - 2
525	default:
526		relevance = MaxRelevance - 1 // main module ties with stdlib
527	}
528
529	_, versionString, ok := module.SplitPathVersion(mod.Path)
530	if ok {
531		index := strings.Index(versionString, "v")
532		if index == -1 {
533			return relevance
534		}
535		if versionNumber, err := strconv.ParseFloat(versionString[index+1:], 64); err == nil {
536			relevance += versionNumber / 1000
537		}
538	}
539
540	return relevance
541}
542
543// canonicalize gets the result of canonicalizing the packages using the results
544// of initializing the resolver from 'go list -m'.
545func (r *ModuleResolver) canonicalize(info directoryPackageInfo) (*pkg, error) {
546	// Packages in GOROOT are already canonical, regardless of the std/cmd modules.
547	if info.rootType == gopathwalk.RootGOROOT {
548		return &pkg{
549			importPathShort: info.nonCanonicalImportPath,
550			dir:             info.dir,
551			packageName:     path.Base(info.nonCanonicalImportPath),
552			relevance:       MaxRelevance,
553		}, nil
554	}
555
556	importPath := info.nonCanonicalImportPath
557	mod := r.findModuleByDir(info.dir)
558	// Check if the directory is underneath a module that's in scope.
559	if mod != nil {
560		// It is. If dir is the target of a replace directive,
561		// our guessed import path is wrong. Use the real one.
562		if mod.Dir == info.dir {
563			importPath = mod.Path
564		} else {
565			dirInMod := info.dir[len(mod.Dir)+len("/"):]
566			importPath = path.Join(mod.Path, filepath.ToSlash(dirInMod))
567		}
568	} else if !strings.HasPrefix(importPath, info.moduleName) {
569		// The module's name doesn't match the package's import path. It
570		// probably needs a replace directive we don't have.
571		return nil, fmt.Errorf("package in %q is not valid without a replace statement", info.dir)
572	}
573
574	res := &pkg{
575		importPathShort: importPath,
576		dir:             info.dir,
577		relevance:       modRelevance(mod),
578	}
579	// We may have discovered a package that has a different version
580	// in scope already. Canonicalize to that one if possible.
581	if _, canonicalDir := r.findPackage(importPath); canonicalDir != "" {
582		res.dir = canonicalDir
583	}
584	return res, nil
585}
586
587func (r *ModuleResolver) loadExports(ctx context.Context, pkg *pkg, includeTest bool) (string, []string, error) {
588	if err := r.init(); err != nil {
589		return "", nil, err
590	}
591	if info, ok := r.cacheLoad(pkg.dir); ok && !includeTest {
592		return r.cacheExports(ctx, r.env, info)
593	}
594	return loadExportsFromFiles(ctx, r.env, pkg.dir, includeTest)
595}
596
597func (r *ModuleResolver) scanDirForPackage(root gopathwalk.Root, dir string) directoryPackageInfo {
598	subdir := ""
599	if dir != root.Path {
600		subdir = dir[len(root.Path)+len("/"):]
601	}
602	importPath := filepath.ToSlash(subdir)
603	if strings.HasPrefix(importPath, "vendor/") {
604		// Only enter vendor directories if they're explicitly requested as a root.
605		return directoryPackageInfo{
606			status: directoryScanned,
607			err:    fmt.Errorf("unwanted vendor directory"),
608		}
609	}
610	switch root.Type {
611	case gopathwalk.RootCurrentModule:
612		importPath = path.Join(r.main.Path, filepath.ToSlash(subdir))
613	case gopathwalk.RootModuleCache:
614		matches := modCacheRegexp.FindStringSubmatch(subdir)
615		if len(matches) == 0 {
616			return directoryPackageInfo{
617				status: directoryScanned,
618				err:    fmt.Errorf("invalid module cache path: %v", subdir),
619			}
620		}
621		modPath, err := module.UnescapePath(filepath.ToSlash(matches[1]))
622		if err != nil {
623			if r.env.Logf != nil {
624				r.env.Logf("decoding module cache path %q: %v", subdir, err)
625			}
626			return directoryPackageInfo{
627				status: directoryScanned,
628				err:    fmt.Errorf("decoding module cache path %q: %v", subdir, err),
629			}
630		}
631		importPath = path.Join(modPath, filepath.ToSlash(matches[3]))
632	}
633
634	modDir, modName := r.modInfo(dir)
635	result := directoryPackageInfo{
636		status:                 directoryScanned,
637		dir:                    dir,
638		rootType:               root.Type,
639		nonCanonicalImportPath: importPath,
640		moduleDir:              modDir,
641		moduleName:             modName,
642	}
643	if root.Type == gopathwalk.RootGOROOT {
644		// stdlib packages are always in scope, despite the confusing go.mod
645		return result
646	}
647	return result
648}
649
650// modCacheRegexp splits a path in a module cache into module, module version, and package.
651var modCacheRegexp = regexp.MustCompile(`(.*)@([^/\\]*)(.*)`)
652
653var (
654	slashSlash = []byte("//")
655	moduleStr  = []byte("module")
656)
657
658// modulePath returns the module path from the gomod file text.
659// If it cannot find a module path, it returns an empty string.
660// It is tolerant of unrelated problems in the go.mod file.
661//
662// Copied from cmd/go/internal/modfile.
663func modulePath(mod []byte) string {
664	for len(mod) > 0 {
665		line := mod
666		mod = nil
667		if i := bytes.IndexByte(line, '\n'); i >= 0 {
668			line, mod = line[:i], line[i+1:]
669		}
670		if i := bytes.Index(line, slashSlash); i >= 0 {
671			line = line[:i]
672		}
673		line = bytes.TrimSpace(line)
674		if !bytes.HasPrefix(line, moduleStr) {
675			continue
676		}
677		line = line[len(moduleStr):]
678		n := len(line)
679		line = bytes.TrimSpace(line)
680		if len(line) == n || len(line) == 0 {
681			continue
682		}
683
684		if line[0] == '"' || line[0] == '`' {
685			p, err := strconv.Unquote(string(line))
686			if err != nil {
687				return "" // malformed quoted string or multiline module path
688			}
689			return p
690		}
691
692		return string(line)
693	}
694	return "" // missing module path
695}
696