1// Copyright 2019 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package cache 6 7import ( 8 "bytes" 9 "context" 10 "go/ast" 11 "go/parser" 12 "go/scanner" 13 "go/token" 14 "reflect" 15 16 "golang.org/x/tools/internal/lsp/protocol" 17 "golang.org/x/tools/internal/lsp/source" 18 "golang.org/x/tools/internal/lsp/telemetry" 19 "golang.org/x/tools/internal/memoize" 20 "golang.org/x/tools/internal/span" 21 "golang.org/x/tools/internal/telemetry/log" 22 "golang.org/x/tools/internal/telemetry/trace" 23 errors "golang.org/x/xerrors" 24) 25 26// Limits the number of parallel parser calls per process. 27var parseLimit = make(chan struct{}, 20) 28 29// parseKey uniquely identifies a parsed Go file. 30type parseKey struct { 31 file source.FileIdentity 32 mode source.ParseMode 33} 34 35type parseGoHandle struct { 36 handle *memoize.Handle 37 file source.FileHandle 38 mode source.ParseMode 39} 40 41type parseGoData struct { 42 memoize.NoCopy 43 44 ast *ast.File 45 parseError error // errors associated with parsing the file 46 mapper *protocol.ColumnMapper 47 err error 48} 49 50func (c *cache) ParseGoHandle(fh source.FileHandle, mode source.ParseMode) source.ParseGoHandle { 51 key := parseKey{ 52 file: fh.Identity(), 53 mode: mode, 54 } 55 fset := c.fset 56 h := c.store.Bind(key, func(ctx context.Context) interface{} { 57 data := &parseGoData{} 58 data.ast, data.mapper, data.parseError, data.err = parseGo(ctx, fset, fh, mode) 59 return data 60 }) 61 return &parseGoHandle{ 62 handle: h, 63 file: fh, 64 mode: mode, 65 } 66} 67 68func (pgh *parseGoHandle) String() string { 69 return pgh.File().Identity().URI.Filename() 70} 71 72func (pgh *parseGoHandle) File() source.FileHandle { 73 return pgh.file 74} 75 76func (pgh *parseGoHandle) Mode() source.ParseMode { 77 return pgh.mode 78} 79 80func (pgh *parseGoHandle) Parse(ctx context.Context) (*ast.File, *protocol.ColumnMapper, error, error) { 81 v := pgh.handle.Get(ctx) 82 if v == nil { 83 return nil, nil, nil, errors.Errorf("no parsed file for %s", pgh.File().Identity().URI) 84 } 85 data := v.(*parseGoData) 86 return data.ast, data.mapper, data.parseError, data.err 87} 88 89func (pgh *parseGoHandle) Cached() (*ast.File, *protocol.ColumnMapper, error, error) { 90 v := pgh.handle.Cached() 91 if v == nil { 92 return nil, nil, nil, errors.Errorf("no cached AST for %s", pgh.file.Identity().URI) 93 } 94 data := v.(*parseGoData) 95 return data.ast, data.mapper, data.parseError, data.err 96} 97 98func hashParseKey(ph source.ParseGoHandle) string { 99 b := bytes.NewBuffer(nil) 100 b.WriteString(ph.File().Identity().String()) 101 b.WriteString(string(ph.Mode())) 102 return hashContents(b.Bytes()) 103} 104 105func hashParseKeys(phs []source.ParseGoHandle) string { 106 b := bytes.NewBuffer(nil) 107 for _, ph := range phs { 108 b.WriteString(hashParseKey(ph)) 109 } 110 return hashContents(b.Bytes()) 111} 112 113func parseGo(ctx context.Context, fset *token.FileSet, fh source.FileHandle, mode source.ParseMode) (file *ast.File, mapper *protocol.ColumnMapper, parseError error, err error) { 114 ctx, done := trace.StartSpan(ctx, "cache.parseGo", telemetry.File.Of(fh.Identity().URI.Filename())) 115 defer done() 116 117 if fh.Identity().Kind != source.Go { 118 return nil, nil, nil, errors.Errorf("cannot parse non-Go file %s", fh.Identity().URI) 119 } 120 buf, _, err := fh.Read(ctx) 121 if err != nil { 122 return nil, nil, nil, err 123 } 124 parseLimit <- struct{}{} 125 defer func() { <-parseLimit }() 126 parserMode := parser.AllErrors | parser.ParseComments 127 if mode == source.ParseHeader { 128 parserMode = parser.ImportsOnly | parser.ParseComments 129 } 130 file, parseError = parser.ParseFile(fset, fh.Identity().URI.Filename(), buf, parserMode) 131 var tok *token.File 132 if file != nil { 133 // Fix any badly parsed parts of the AST. 134 tok = fset.File(file.Pos()) 135 if tok == nil { 136 return nil, nil, nil, errors.Errorf("successfully parsed but no token.File for %s (%v)", fh.Identity().URI, parseError) 137 } 138 if mode == source.ParseExported { 139 trimAST(file) 140 } 141 if err := fixAST(ctx, file, tok, buf); err != nil { 142 log.Error(ctx, "failed to fix AST", err) 143 } 144 } 145 if file == nil { 146 // If the file is nil only due to parse errors, 147 // the parse errors are the actual errors. 148 err := parseError 149 if err == nil { 150 err = errors.Errorf("no AST for %s", fh.Identity().URI) 151 } 152 return nil, nil, parseError, err 153 } 154 m := &protocol.ColumnMapper{ 155 URI: fh.Identity().URI, 156 Converter: span.NewTokenConverter(fset, tok), 157 Content: buf, 158 } 159 160 return file, m, parseError, nil 161} 162 163// trimAST clears any part of the AST not relevant to type checking 164// expressions at pos. 165func trimAST(file *ast.File) { 166 ast.Inspect(file, func(n ast.Node) bool { 167 if n == nil { 168 return false 169 } 170 switch n := n.(type) { 171 case *ast.FuncDecl: 172 n.Body = nil 173 case *ast.BlockStmt: 174 n.List = nil 175 case *ast.CaseClause: 176 n.Body = nil 177 case *ast.CommClause: 178 n.Body = nil 179 case *ast.CompositeLit: 180 // Leave elts in place for [...]T 181 // array literals, because they can 182 // affect the expression's type. 183 if !isEllipsisArray(n.Type) { 184 n.Elts = nil 185 } 186 } 187 return true 188 }) 189} 190 191func isEllipsisArray(n ast.Expr) bool { 192 at, ok := n.(*ast.ArrayType) 193 if !ok { 194 return false 195 } 196 _, ok = at.Len.(*ast.Ellipsis) 197 return ok 198} 199 200// fixAST inspects the AST and potentially modifies any *ast.BadStmts so that it can be 201// type-checked more effectively. 202func fixAST(ctx context.Context, n ast.Node, tok *token.File, src []byte) error { 203 var err error 204 walkASTWithParent(n, func(n, parent ast.Node) bool { 205 switch n := n.(type) { 206 case *ast.BadStmt: 207 err = fixDeferOrGoStmt(n, parent, tok, src) // don't shadow err 208 if err == nil { 209 // Recursively fix in our fixed node. 210 err = fixAST(ctx, parent, tok, src) 211 } else { 212 err = errors.Errorf("unable to parse defer or go from *ast.BadStmt: %v", err) 213 } 214 return false 215 case *ast.BadExpr: 216 // Don't propagate this error since *ast.BadExpr is very common 217 // and it is only sometimes due to array types. Errors from here 218 // are expected and not actionable in general. 219 if fixArrayType(n, parent, tok, src) == nil { 220 // Recursively fix in our fixed node. 221 err = fixAST(ctx, parent, tok, src) 222 return false 223 } 224 return false 225 case *ast.SelectorExpr: 226 // Fix cases where a keyword prefix results in a phantom "_" selector, e.g.: 227 // 228 // foo.var<> // want to complete to "foo.variance" 229 // 230 fixPhantomSelector(n, tok, src) 231 return true 232 default: 233 return true 234 } 235 }) 236 237 return err 238} 239 240// walkASTWithParent walks the AST rooted at n. The semantics are 241// similar to ast.Inspect except it does not call f(nil). 242func walkASTWithParent(n ast.Node, f func(n ast.Node, parent ast.Node) bool) { 243 var ancestors []ast.Node 244 ast.Inspect(n, func(n ast.Node) (recurse bool) { 245 defer func() { 246 if recurse { 247 ancestors = append(ancestors, n) 248 } 249 }() 250 251 if n == nil { 252 ancestors = ancestors[:len(ancestors)-1] 253 return false 254 } 255 256 var parent ast.Node 257 if len(ancestors) > 0 { 258 parent = ancestors[len(ancestors)-1] 259 } 260 261 return f(n, parent) 262 }) 263} 264 265// fixPhantomSelector tries to fix selector expressions with phantom 266// "_" selectors. In particular, we check if the selector is a 267// keyword, and if so we swap in an *ast.Ident with the keyword text. For example: 268// 269// foo.var 270// 271// yields a "_" selector instead of "var" since "var" is a keyword. 272func fixPhantomSelector(sel *ast.SelectorExpr, tok *token.File, src []byte) { 273 if !isPhantomUnderscore(sel.Sel, tok, src) { 274 return 275 } 276 277 maybeKeyword := readKeyword(sel.Sel.Pos(), tok, src) 278 if maybeKeyword == "" { 279 return 280 } 281 282 replaceNode(sel, sel.Sel, &ast.Ident{ 283 Name: maybeKeyword, 284 NamePos: sel.Sel.Pos(), 285 }) 286} 287 288// isPhantomUnderscore reports whether the given ident is a phantom 289// underscore. The parser sometimes inserts phantom underscores when 290// it encounters otherwise unparseable situations. 291func isPhantomUnderscore(id *ast.Ident, tok *token.File, src []byte) bool { 292 if id == nil || id.Name != "_" { 293 return false 294 } 295 296 // Phantom underscore means the underscore is not actually in the 297 // program text. 298 offset := tok.Offset(id.Pos()) 299 return len(src) <= offset || src[offset] != '_' 300} 301 302// readKeyword reads the keyword starting at pos, if any. 303func readKeyword(pos token.Pos, tok *token.File, src []byte) string { 304 var kwBytes []byte 305 for i := tok.Offset(pos); i < len(src); i++ { 306 // Use a simplified identifier check since keywords are always lowercase ASCII. 307 if src[i] < 'a' || src[i] > 'z' { 308 break 309 } 310 kwBytes = append(kwBytes, src[i]) 311 312 // Stop search at arbitrarily chosen too-long-for-a-keyword length. 313 if len(kwBytes) > 15 { 314 return "" 315 } 316 } 317 318 if kw := string(kwBytes); token.Lookup(kw).IsKeyword() { 319 return kw 320 } 321 322 return "" 323} 324 325// fixArrayType tries to parse an *ast.BadExpr into an *ast.ArrayType. 326// go/parser often turns lone array types like "[]int" into BadExprs 327// if it isn't expecting a type. 328func fixArrayType(bad *ast.BadExpr, parent ast.Node, tok *token.File, src []byte) error { 329 // Our expected input is a bad expression that looks like "[]someExpr". 330 331 from := bad.Pos() 332 to := bad.End() 333 334 if !from.IsValid() || !to.IsValid() { 335 return errors.Errorf("invalid BadExpr from/to: %d/%d", from, to) 336 } 337 338 exprBytes := make([]byte, 0, int(to-from)+3) 339 // Avoid doing tok.Offset(to) since that panics if badExpr ends at EOF. 340 exprBytes = append(exprBytes, src[tok.Offset(from):tok.Offset(to-1)+1]...) 341 exprBytes = bytes.TrimSpace(exprBytes) 342 343 // If our expression ends in "]" (e.g. "[]"), add a phantom selector 344 // so we can complete directly after the "[]". 345 if len(exprBytes) > 0 && exprBytes[len(exprBytes)-1] == ']' { 346 exprBytes = append(exprBytes, '_') 347 } 348 349 // Add "{}" to turn our ArrayType into a CompositeLit. This is to 350 // handle the case of "[...]int" where we must make it a composite 351 // literal to be parseable. 352 exprBytes = append(exprBytes, '{', '}') 353 354 expr, err := parseExpr(from, exprBytes) 355 if err != nil { 356 return err 357 } 358 359 cl, _ := expr.(*ast.CompositeLit) 360 if cl == nil { 361 return errors.Errorf("expr not compLit (%T)", expr) 362 } 363 364 at, _ := cl.Type.(*ast.ArrayType) 365 if at == nil { 366 return errors.Errorf("compLit type not array (%T)", cl.Type) 367 } 368 369 if !replaceNode(parent, bad, at) { 370 return errors.Errorf("couldn't replace array type") 371 } 372 373 return nil 374} 375 376// fixDeferOrGoStmt tries to parse an *ast.BadStmt into a defer or a go statement. 377// 378// go/parser packages a statement of the form "defer x." as an *ast.BadStmt because 379// it does not include a call expression. This means that go/types skips type-checking 380// this statement entirely, and we can't use the type information when completing. 381// Here, we try to generate a fake *ast.DeferStmt or *ast.GoStmt to put into the AST, 382// instead of the *ast.BadStmt. 383func fixDeferOrGoStmt(bad *ast.BadStmt, parent ast.Node, tok *token.File, src []byte) error { 384 // Check if we have a bad statement containing either a "go" or "defer". 385 s := &scanner.Scanner{} 386 s.Init(tok, src, nil, 0) 387 388 var ( 389 pos token.Pos 390 tkn token.Token 391 ) 392 for { 393 if tkn == token.EOF { 394 return errors.Errorf("reached the end of the file") 395 } 396 if pos >= bad.From { 397 break 398 } 399 pos, tkn, _ = s.Scan() 400 } 401 402 var stmt ast.Stmt 403 switch tkn { 404 case token.DEFER: 405 stmt = &ast.DeferStmt{ 406 Defer: pos, 407 } 408 case token.GO: 409 stmt = &ast.GoStmt{ 410 Go: pos, 411 } 412 default: 413 return errors.Errorf("no defer or go statement found") 414 } 415 416 var ( 417 from, to, last token.Pos 418 lastToken token.Token 419 braceDepth int 420 phantomSelectors []token.Pos 421 ) 422FindTo: 423 for { 424 to, tkn, _ = s.Scan() 425 426 if from == token.NoPos { 427 from = to 428 } 429 430 switch tkn { 431 case token.EOF: 432 break FindTo 433 case token.SEMICOLON: 434 // If we aren't in nested braces, end of statement means 435 // end of expression. 436 if braceDepth == 0 { 437 break FindTo 438 } 439 case token.LBRACE: 440 braceDepth++ 441 } 442 443 // This handles the common dangling selector case. For example in 444 // 445 // defer fmt. 446 // y := 1 447 // 448 // we notice the dangling period and end our expression. 449 // 450 // If the previous token was a "." and we are looking at a "}", 451 // the period is likely a dangling selector and needs a phantom 452 // "_". Likewise if the current token is on a different line than 453 // the period, the period is likely a dangling selector. 454 if lastToken == token.PERIOD && (tkn == token.RBRACE || tok.Line(to) > tok.Line(last)) { 455 // Insert phantom "_" selector after the dangling ".". 456 phantomSelectors = append(phantomSelectors, last+1) 457 // If we aren't in a block then end the expression after the ".". 458 if braceDepth == 0 { 459 to = last + 1 460 break 461 } 462 } 463 464 lastToken = tkn 465 last = to 466 467 switch tkn { 468 case token.RBRACE: 469 braceDepth-- 470 if braceDepth <= 0 { 471 if braceDepth == 0 { 472 // +1 to include the "}" itself. 473 to += 1 474 } 475 break FindTo 476 } 477 } 478 } 479 480 if !from.IsValid() || tok.Offset(from) >= len(src) { 481 return errors.Errorf("invalid from position") 482 } 483 484 if !to.IsValid() || tok.Offset(to) >= len(src) { 485 return errors.Errorf("invalid to position %d", to) 486 } 487 488 // Insert any phantom selectors needed to prevent dangling "." from messing 489 // up the AST. 490 exprBytes := make([]byte, 0, int(to-from)+len(phantomSelectors)) 491 for i, b := range src[tok.Offset(from):tok.Offset(to)] { 492 if len(phantomSelectors) > 0 && from+token.Pos(i) == phantomSelectors[0] { 493 exprBytes = append(exprBytes, '_') 494 phantomSelectors = phantomSelectors[1:] 495 } 496 exprBytes = append(exprBytes, b) 497 } 498 499 if len(phantomSelectors) > 0 { 500 exprBytes = append(exprBytes, '_') 501 } 502 503 expr, err := parseExpr(from, exprBytes) 504 if err != nil { 505 return err 506 } 507 508 // Package the expression into a fake *ast.CallExpr and re-insert 509 // into the function. 510 call := &ast.CallExpr{ 511 Fun: expr, 512 Lparen: to, 513 Rparen: to, 514 } 515 516 switch stmt := stmt.(type) { 517 case *ast.DeferStmt: 518 stmt.Call = call 519 case *ast.GoStmt: 520 stmt.Call = call 521 } 522 523 if !replaceNode(parent, bad, stmt) { 524 return errors.Errorf("couldn't replace CallExpr") 525 } 526 527 return nil 528} 529 530// parseStmt parses the statement in src and updates its position to 531// start at pos. 532func parseStmt(pos token.Pos, src []byte) (ast.Stmt, error) { 533 // Wrap our expression to make it a valid Go file we can pass to ParseFile. 534 fileSrc := bytes.Join([][]byte{ 535 []byte("package fake;func _(){"), 536 src, 537 []byte("}"), 538 }, nil) 539 540 // Use ParseFile instead of ParseExpr because ParseFile has 541 // best-effort behavior, whereas ParseExpr fails hard on any error. 542 fakeFile, err := parser.ParseFile(token.NewFileSet(), "", fileSrc, 0) 543 if fakeFile == nil { 544 return nil, errors.Errorf("error reading fake file source: %v", err) 545 } 546 547 // Extract our expression node from inside the fake file. 548 if len(fakeFile.Decls) == 0 { 549 return nil, errors.Errorf("error parsing fake file: %v", err) 550 } 551 552 fakeDecl, _ := fakeFile.Decls[0].(*ast.FuncDecl) 553 if fakeDecl == nil || len(fakeDecl.Body.List) == 0 { 554 return nil, errors.Errorf("no statement in %s: %v", src, err) 555 } 556 557 stmt := fakeDecl.Body.List[0] 558 559 // parser.ParseFile returns undefined positions. 560 // Adjust them for the current file. 561 offsetPositions(stmt, pos-1-(stmt.Pos()-1)) 562 563 return stmt, nil 564} 565 566// parseExpr parses the expression in src and updates its position to 567// start at pos. 568func parseExpr(pos token.Pos, src []byte) (ast.Expr, error) { 569 stmt, err := parseStmt(pos, src) 570 if err != nil { 571 return nil, err 572 } 573 574 exprStmt, ok := stmt.(*ast.ExprStmt) 575 if !ok { 576 return nil, errors.Errorf("no expr in %s: %v", src, err) 577 } 578 579 return exprStmt.X, nil 580} 581 582var tokenPosType = reflect.TypeOf(token.NoPos) 583 584// offsetPositions applies an offset to the positions in an ast.Node. 585func offsetPositions(n ast.Node, offset token.Pos) { 586 ast.Inspect(n, func(n ast.Node) bool { 587 if n == nil { 588 return false 589 } 590 591 v := reflect.ValueOf(n).Elem() 592 593 switch v.Kind() { 594 case reflect.Struct: 595 for i := 0; i < v.NumField(); i++ { 596 f := v.Field(i) 597 if f.Type() != tokenPosType { 598 continue 599 } 600 601 if !f.CanSet() { 602 continue 603 } 604 605 f.SetInt(f.Int() + int64(offset)) 606 } 607 } 608 609 return true 610 }) 611} 612 613// replaceNode updates parent's child oldChild to be newChild. It 614// retuns whether it replaced successfully. 615func replaceNode(parent, oldChild, newChild ast.Node) bool { 616 if parent == nil || oldChild == nil || newChild == nil { 617 return false 618 } 619 620 parentVal := reflect.ValueOf(parent).Elem() 621 if parentVal.Kind() != reflect.Struct { 622 return false 623 } 624 625 newChildVal := reflect.ValueOf(newChild) 626 627 tryReplace := func(v reflect.Value) bool { 628 if !v.CanSet() || !v.CanInterface() { 629 return false 630 } 631 632 // If the existing value is oldChild, we found our child. Make 633 // sure our newChild is assignable and then make the swap. 634 if v.Interface() == oldChild && newChildVal.Type().AssignableTo(v.Type()) { 635 v.Set(newChildVal) 636 return true 637 } 638 639 return false 640 } 641 642 // Loop over parent's struct fields. 643 for i := 0; i < parentVal.NumField(); i++ { 644 f := parentVal.Field(i) 645 646 switch f.Kind() { 647 // Check interface and pointer fields. 648 case reflect.Interface, reflect.Ptr: 649 if tryReplace(f) { 650 return true 651 } 652 653 // Search through any slice fields. 654 case reflect.Slice: 655 for i := 0; i < f.Len(); i++ { 656 if tryReplace(f.Index(i)) { 657 return true 658 } 659 } 660 } 661 } 662 663 return false 664} 665