1// Copyright 2019 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package cache
6
7import (
8	"bytes"
9	"context"
10	"go/ast"
11	"go/parser"
12	"go/scanner"
13	"go/token"
14	"reflect"
15
16	"golang.org/x/tools/internal/lsp/protocol"
17	"golang.org/x/tools/internal/lsp/source"
18	"golang.org/x/tools/internal/lsp/telemetry"
19	"golang.org/x/tools/internal/memoize"
20	"golang.org/x/tools/internal/span"
21	"golang.org/x/tools/internal/telemetry/log"
22	"golang.org/x/tools/internal/telemetry/trace"
23	errors "golang.org/x/xerrors"
24)
25
26// Limits the number of parallel parser calls per process.
27var parseLimit = make(chan struct{}, 20)
28
29// parseKey uniquely identifies a parsed Go file.
30type parseKey struct {
31	file source.FileIdentity
32	mode source.ParseMode
33}
34
35type parseGoHandle struct {
36	handle *memoize.Handle
37	file   source.FileHandle
38	mode   source.ParseMode
39}
40
41type parseGoData struct {
42	memoize.NoCopy
43
44	ast        *ast.File
45	parseError error // errors associated with parsing the file
46	mapper     *protocol.ColumnMapper
47	err        error
48}
49
50func (c *cache) ParseGoHandle(fh source.FileHandle, mode source.ParseMode) source.ParseGoHandle {
51	key := parseKey{
52		file: fh.Identity(),
53		mode: mode,
54	}
55	fset := c.fset
56	h := c.store.Bind(key, func(ctx context.Context) interface{} {
57		data := &parseGoData{}
58		data.ast, data.mapper, data.parseError, data.err = parseGo(ctx, fset, fh, mode)
59		return data
60	})
61	return &parseGoHandle{
62		handle: h,
63		file:   fh,
64		mode:   mode,
65	}
66}
67
68func (pgh *parseGoHandle) String() string {
69	return pgh.File().Identity().URI.Filename()
70}
71
72func (pgh *parseGoHandle) File() source.FileHandle {
73	return pgh.file
74}
75
76func (pgh *parseGoHandle) Mode() source.ParseMode {
77	return pgh.mode
78}
79
80func (pgh *parseGoHandle) Parse(ctx context.Context) (*ast.File, *protocol.ColumnMapper, error, error) {
81	v := pgh.handle.Get(ctx)
82	if v == nil {
83		return nil, nil, nil, errors.Errorf("no parsed file for %s", pgh.File().Identity().URI)
84	}
85	data := v.(*parseGoData)
86	return data.ast, data.mapper, data.parseError, data.err
87}
88
89func (pgh *parseGoHandle) Cached() (*ast.File, *protocol.ColumnMapper, error, error) {
90	v := pgh.handle.Cached()
91	if v == nil {
92		return nil, nil, nil, errors.Errorf("no cached AST for %s", pgh.file.Identity().URI)
93	}
94	data := v.(*parseGoData)
95	return data.ast, data.mapper, data.parseError, data.err
96}
97
98func hashParseKey(ph source.ParseGoHandle) string {
99	b := bytes.NewBuffer(nil)
100	b.WriteString(ph.File().Identity().String())
101	b.WriteString(string(ph.Mode()))
102	return hashContents(b.Bytes())
103}
104
105func hashParseKeys(phs []source.ParseGoHandle) string {
106	b := bytes.NewBuffer(nil)
107	for _, ph := range phs {
108		b.WriteString(hashParseKey(ph))
109	}
110	return hashContents(b.Bytes())
111}
112
113func parseGo(ctx context.Context, fset *token.FileSet, fh source.FileHandle, mode source.ParseMode) (file *ast.File, mapper *protocol.ColumnMapper, parseError error, err error) {
114	ctx, done := trace.StartSpan(ctx, "cache.parseGo", telemetry.File.Of(fh.Identity().URI.Filename()))
115	defer done()
116
117	if fh.Identity().Kind != source.Go {
118		return nil, nil, nil, errors.Errorf("cannot parse non-Go file %s", fh.Identity().URI)
119	}
120	buf, _, err := fh.Read(ctx)
121	if err != nil {
122		return nil, nil, nil, err
123	}
124	parseLimit <- struct{}{}
125	defer func() { <-parseLimit }()
126	parserMode := parser.AllErrors | parser.ParseComments
127	if mode == source.ParseHeader {
128		parserMode = parser.ImportsOnly | parser.ParseComments
129	}
130	file, parseError = parser.ParseFile(fset, fh.Identity().URI.Filename(), buf, parserMode)
131	var tok *token.File
132	if file != nil {
133		// Fix any badly parsed parts of the AST.
134		tok = fset.File(file.Pos())
135		if tok == nil {
136			return nil, nil, nil, errors.Errorf("successfully parsed but no token.File for %s (%v)", fh.Identity().URI, parseError)
137		}
138		if mode == source.ParseExported {
139			trimAST(file)
140		}
141		if err := fixAST(ctx, file, tok, buf); err != nil {
142			log.Error(ctx, "failed to fix AST", err)
143		}
144	}
145	if file == nil {
146		// If the file is nil only due to parse errors,
147		// the parse errors are the actual errors.
148		err := parseError
149		if err == nil {
150			err = errors.Errorf("no AST for %s", fh.Identity().URI)
151		}
152		return nil, nil, parseError, err
153	}
154	m := &protocol.ColumnMapper{
155		URI:       fh.Identity().URI,
156		Converter: span.NewTokenConverter(fset, tok),
157		Content:   buf,
158	}
159
160	return file, m, parseError, nil
161}
162
163// trimAST clears any part of the AST not relevant to type checking
164// expressions at pos.
165func trimAST(file *ast.File) {
166	ast.Inspect(file, func(n ast.Node) bool {
167		if n == nil {
168			return false
169		}
170		switch n := n.(type) {
171		case *ast.FuncDecl:
172			n.Body = nil
173		case *ast.BlockStmt:
174			n.List = nil
175		case *ast.CaseClause:
176			n.Body = nil
177		case *ast.CommClause:
178			n.Body = nil
179		case *ast.CompositeLit:
180			// Leave elts in place for [...]T
181			// array literals, because they can
182			// affect the expression's type.
183			if !isEllipsisArray(n.Type) {
184				n.Elts = nil
185			}
186		}
187		return true
188	})
189}
190
191func isEllipsisArray(n ast.Expr) bool {
192	at, ok := n.(*ast.ArrayType)
193	if !ok {
194		return false
195	}
196	_, ok = at.Len.(*ast.Ellipsis)
197	return ok
198}
199
200// fixAST inspects the AST and potentially modifies any *ast.BadStmts so that it can be
201// type-checked more effectively.
202func fixAST(ctx context.Context, n ast.Node, tok *token.File, src []byte) error {
203	var err error
204	walkASTWithParent(n, func(n, parent ast.Node) bool {
205		switch n := n.(type) {
206		case *ast.BadStmt:
207			err = fixDeferOrGoStmt(n, parent, tok, src) // don't shadow err
208			if err == nil {
209				// Recursively fix in our fixed node.
210				err = fixAST(ctx, parent, tok, src)
211			} else {
212				err = errors.Errorf("unable to parse defer or go from *ast.BadStmt: %v", err)
213			}
214			return false
215		case *ast.BadExpr:
216			// Don't propagate this error since *ast.BadExpr is very common
217			// and it is only sometimes due to array types. Errors from here
218			// are expected and not actionable in general.
219			if fixArrayType(n, parent, tok, src) == nil {
220				// Recursively fix in our fixed node.
221				err = fixAST(ctx, parent, tok, src)
222				return false
223			}
224			return false
225		case *ast.SelectorExpr:
226			// Fix cases where a keyword prefix results in a phantom "_" selector, e.g.:
227			//
228			//   foo.var<> // want to complete to "foo.variance"
229			//
230			fixPhantomSelector(n, tok, src)
231			return true
232		default:
233			return true
234		}
235	})
236
237	return err
238}
239
240// walkASTWithParent walks the AST rooted at n. The semantics are
241// similar to ast.Inspect except it does not call f(nil).
242func walkASTWithParent(n ast.Node, f func(n ast.Node, parent ast.Node) bool) {
243	var ancestors []ast.Node
244	ast.Inspect(n, func(n ast.Node) (recurse bool) {
245		defer func() {
246			if recurse {
247				ancestors = append(ancestors, n)
248			}
249		}()
250
251		if n == nil {
252			ancestors = ancestors[:len(ancestors)-1]
253			return false
254		}
255
256		var parent ast.Node
257		if len(ancestors) > 0 {
258			parent = ancestors[len(ancestors)-1]
259		}
260
261		return f(n, parent)
262	})
263}
264
265// fixPhantomSelector tries to fix selector expressions with phantom
266// "_" selectors. In particular, we check if the selector is a
267// keyword, and if so we swap in an *ast.Ident with the keyword text. For example:
268//
269// foo.var
270//
271// yields a "_" selector instead of "var" since "var" is a keyword.
272func fixPhantomSelector(sel *ast.SelectorExpr, tok *token.File, src []byte) {
273	if !isPhantomUnderscore(sel.Sel, tok, src) {
274		return
275	}
276
277	maybeKeyword := readKeyword(sel.Sel.Pos(), tok, src)
278	if maybeKeyword == "" {
279		return
280	}
281
282	replaceNode(sel, sel.Sel, &ast.Ident{
283		Name:    maybeKeyword,
284		NamePos: sel.Sel.Pos(),
285	})
286}
287
288// isPhantomUnderscore reports whether the given ident is a phantom
289// underscore. The parser sometimes inserts phantom underscores when
290// it encounters otherwise unparseable situations.
291func isPhantomUnderscore(id *ast.Ident, tok *token.File, src []byte) bool {
292	if id == nil || id.Name != "_" {
293		return false
294	}
295
296	// Phantom underscore means the underscore is not actually in the
297	// program text.
298	offset := tok.Offset(id.Pos())
299	return len(src) <= offset || src[offset] != '_'
300}
301
302// readKeyword reads the keyword starting at pos, if any.
303func readKeyword(pos token.Pos, tok *token.File, src []byte) string {
304	var kwBytes []byte
305	for i := tok.Offset(pos); i < len(src); i++ {
306		// Use a simplified identifier check since keywords are always lowercase ASCII.
307		if src[i] < 'a' || src[i] > 'z' {
308			break
309		}
310		kwBytes = append(kwBytes, src[i])
311
312		// Stop search at arbitrarily chosen too-long-for-a-keyword length.
313		if len(kwBytes) > 15 {
314			return ""
315		}
316	}
317
318	if kw := string(kwBytes); token.Lookup(kw).IsKeyword() {
319		return kw
320	}
321
322	return ""
323}
324
325// fixArrayType tries to parse an *ast.BadExpr into an *ast.ArrayType.
326// go/parser often turns lone array types like "[]int" into BadExprs
327// if it isn't expecting a type.
328func fixArrayType(bad *ast.BadExpr, parent ast.Node, tok *token.File, src []byte) error {
329	// Our expected input is a bad expression that looks like "[]someExpr".
330
331	from := bad.Pos()
332	to := bad.End()
333
334	if !from.IsValid() || !to.IsValid() {
335		return errors.Errorf("invalid BadExpr from/to: %d/%d", from, to)
336	}
337
338	exprBytes := make([]byte, 0, int(to-from)+3)
339	// Avoid doing tok.Offset(to) since that panics if badExpr ends at EOF.
340	exprBytes = append(exprBytes, src[tok.Offset(from):tok.Offset(to-1)+1]...)
341	exprBytes = bytes.TrimSpace(exprBytes)
342
343	// If our expression ends in "]" (e.g. "[]"), add a phantom selector
344	// so we can complete directly after the "[]".
345	if len(exprBytes) > 0 && exprBytes[len(exprBytes)-1] == ']' {
346		exprBytes = append(exprBytes, '_')
347	}
348
349	// Add "{}" to turn our ArrayType into a CompositeLit. This is to
350	// handle the case of "[...]int" where we must make it a composite
351	// literal to be parseable.
352	exprBytes = append(exprBytes, '{', '}')
353
354	expr, err := parseExpr(from, exprBytes)
355	if err != nil {
356		return err
357	}
358
359	cl, _ := expr.(*ast.CompositeLit)
360	if cl == nil {
361		return errors.Errorf("expr not compLit (%T)", expr)
362	}
363
364	at, _ := cl.Type.(*ast.ArrayType)
365	if at == nil {
366		return errors.Errorf("compLit type not array (%T)", cl.Type)
367	}
368
369	if !replaceNode(parent, bad, at) {
370		return errors.Errorf("couldn't replace array type")
371	}
372
373	return nil
374}
375
376// fixDeferOrGoStmt tries to parse an *ast.BadStmt into a defer or a go statement.
377//
378// go/parser packages a statement of the form "defer x." as an *ast.BadStmt because
379// it does not include a call expression. This means that go/types skips type-checking
380// this statement entirely, and we can't use the type information when completing.
381// Here, we try to generate a fake *ast.DeferStmt or *ast.GoStmt to put into the AST,
382// instead of the *ast.BadStmt.
383func fixDeferOrGoStmt(bad *ast.BadStmt, parent ast.Node, tok *token.File, src []byte) error {
384	// Check if we have a bad statement containing either a "go" or "defer".
385	s := &scanner.Scanner{}
386	s.Init(tok, src, nil, 0)
387
388	var (
389		pos token.Pos
390		tkn token.Token
391	)
392	for {
393		if tkn == token.EOF {
394			return errors.Errorf("reached the end of the file")
395		}
396		if pos >= bad.From {
397			break
398		}
399		pos, tkn, _ = s.Scan()
400	}
401
402	var stmt ast.Stmt
403	switch tkn {
404	case token.DEFER:
405		stmt = &ast.DeferStmt{
406			Defer: pos,
407		}
408	case token.GO:
409		stmt = &ast.GoStmt{
410			Go: pos,
411		}
412	default:
413		return errors.Errorf("no defer or go statement found")
414	}
415
416	var (
417		from, to, last   token.Pos
418		lastToken        token.Token
419		braceDepth       int
420		phantomSelectors []token.Pos
421	)
422FindTo:
423	for {
424		to, tkn, _ = s.Scan()
425
426		if from == token.NoPos {
427			from = to
428		}
429
430		switch tkn {
431		case token.EOF:
432			break FindTo
433		case token.SEMICOLON:
434			// If we aren't in nested braces, end of statement means
435			// end of expression.
436			if braceDepth == 0 {
437				break FindTo
438			}
439		case token.LBRACE:
440			braceDepth++
441		}
442
443		// This handles the common dangling selector case. For example in
444		//
445		// defer fmt.
446		// y := 1
447		//
448		// we notice the dangling period and end our expression.
449		//
450		// If the previous token was a "." and we are looking at a "}",
451		// the period is likely a dangling selector and needs a phantom
452		// "_". Likewise if the current token is on a different line than
453		// the period, the period is likely a dangling selector.
454		if lastToken == token.PERIOD && (tkn == token.RBRACE || tok.Line(to) > tok.Line(last)) {
455			// Insert phantom "_" selector after the dangling ".".
456			phantomSelectors = append(phantomSelectors, last+1)
457			// If we aren't in a block then end the expression after the ".".
458			if braceDepth == 0 {
459				to = last + 1
460				break
461			}
462		}
463
464		lastToken = tkn
465		last = to
466
467		switch tkn {
468		case token.RBRACE:
469			braceDepth--
470			if braceDepth <= 0 {
471				if braceDepth == 0 {
472					// +1 to include the "}" itself.
473					to += 1
474				}
475				break FindTo
476			}
477		}
478	}
479
480	if !from.IsValid() || tok.Offset(from) >= len(src) {
481		return errors.Errorf("invalid from position")
482	}
483
484	if !to.IsValid() || tok.Offset(to) >= len(src) {
485		return errors.Errorf("invalid to position %d", to)
486	}
487
488	// Insert any phantom selectors needed to prevent dangling "." from messing
489	// up the AST.
490	exprBytes := make([]byte, 0, int(to-from)+len(phantomSelectors))
491	for i, b := range src[tok.Offset(from):tok.Offset(to)] {
492		if len(phantomSelectors) > 0 && from+token.Pos(i) == phantomSelectors[0] {
493			exprBytes = append(exprBytes, '_')
494			phantomSelectors = phantomSelectors[1:]
495		}
496		exprBytes = append(exprBytes, b)
497	}
498
499	if len(phantomSelectors) > 0 {
500		exprBytes = append(exprBytes, '_')
501	}
502
503	expr, err := parseExpr(from, exprBytes)
504	if err != nil {
505		return err
506	}
507
508	// Package the expression into a fake *ast.CallExpr and re-insert
509	// into the function.
510	call := &ast.CallExpr{
511		Fun:    expr,
512		Lparen: to,
513		Rparen: to,
514	}
515
516	switch stmt := stmt.(type) {
517	case *ast.DeferStmt:
518		stmt.Call = call
519	case *ast.GoStmt:
520		stmt.Call = call
521	}
522
523	if !replaceNode(parent, bad, stmt) {
524		return errors.Errorf("couldn't replace CallExpr")
525	}
526
527	return nil
528}
529
530// parseStmt parses the statement in src and updates its position to
531// start at pos.
532func parseStmt(pos token.Pos, src []byte) (ast.Stmt, error) {
533	// Wrap our expression to make it a valid Go file we can pass to ParseFile.
534	fileSrc := bytes.Join([][]byte{
535		[]byte("package fake;func _(){"),
536		src,
537		[]byte("}"),
538	}, nil)
539
540	// Use ParseFile instead of ParseExpr because ParseFile has
541	// best-effort behavior, whereas ParseExpr fails hard on any error.
542	fakeFile, err := parser.ParseFile(token.NewFileSet(), "", fileSrc, 0)
543	if fakeFile == nil {
544		return nil, errors.Errorf("error reading fake file source: %v", err)
545	}
546
547	// Extract our expression node from inside the fake file.
548	if len(fakeFile.Decls) == 0 {
549		return nil, errors.Errorf("error parsing fake file: %v", err)
550	}
551
552	fakeDecl, _ := fakeFile.Decls[0].(*ast.FuncDecl)
553	if fakeDecl == nil || len(fakeDecl.Body.List) == 0 {
554		return nil, errors.Errorf("no statement in %s: %v", src, err)
555	}
556
557	stmt := fakeDecl.Body.List[0]
558
559	// parser.ParseFile returns undefined positions.
560	// Adjust them for the current file.
561	offsetPositions(stmt, pos-1-(stmt.Pos()-1))
562
563	return stmt, nil
564}
565
566// parseExpr parses the expression in src and updates its position to
567// start at pos.
568func parseExpr(pos token.Pos, src []byte) (ast.Expr, error) {
569	stmt, err := parseStmt(pos, src)
570	if err != nil {
571		return nil, err
572	}
573
574	exprStmt, ok := stmt.(*ast.ExprStmt)
575	if !ok {
576		return nil, errors.Errorf("no expr in %s: %v", src, err)
577	}
578
579	return exprStmt.X, nil
580}
581
582var tokenPosType = reflect.TypeOf(token.NoPos)
583
584// offsetPositions applies an offset to the positions in an ast.Node.
585func offsetPositions(n ast.Node, offset token.Pos) {
586	ast.Inspect(n, func(n ast.Node) bool {
587		if n == nil {
588			return false
589		}
590
591		v := reflect.ValueOf(n).Elem()
592
593		switch v.Kind() {
594		case reflect.Struct:
595			for i := 0; i < v.NumField(); i++ {
596				f := v.Field(i)
597				if f.Type() != tokenPosType {
598					continue
599				}
600
601				if !f.CanSet() {
602					continue
603				}
604
605				f.SetInt(f.Int() + int64(offset))
606			}
607		}
608
609		return true
610	})
611}
612
613// replaceNode updates parent's child oldChild to be newChild. It
614// retuns whether it replaced successfully.
615func replaceNode(parent, oldChild, newChild ast.Node) bool {
616	if parent == nil || oldChild == nil || newChild == nil {
617		return false
618	}
619
620	parentVal := reflect.ValueOf(parent).Elem()
621	if parentVal.Kind() != reflect.Struct {
622		return false
623	}
624
625	newChildVal := reflect.ValueOf(newChild)
626
627	tryReplace := func(v reflect.Value) bool {
628		if !v.CanSet() || !v.CanInterface() {
629			return false
630		}
631
632		// If the existing value is oldChild, we found our child. Make
633		// sure our newChild is assignable and then make the swap.
634		if v.Interface() == oldChild && newChildVal.Type().AssignableTo(v.Type()) {
635			v.Set(newChildVal)
636			return true
637		}
638
639		return false
640	}
641
642	// Loop over parent's struct fields.
643	for i := 0; i < parentVal.NumField(); i++ {
644		f := parentVal.Field(i)
645
646		switch f.Kind() {
647		// Check interface and pointer fields.
648		case reflect.Interface, reflect.Ptr:
649			if tryReplace(f) {
650				return true
651			}
652
653		// Search through any slice fields.
654		case reflect.Slice:
655			for i := 0; i < f.Len(); i++ {
656				if tryReplace(f.Index(i)) {
657					return true
658				}
659			}
660		}
661	}
662
663	return false
664}
665