1// Copyright 2016 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package pipeline 6 7import ( 8 "bytes" 9 "fmt" 10 "go/ast" 11 "go/constant" 12 "go/format" 13 "go/token" 14 "go/types" 15 "path/filepath" 16 "strings" 17 "unicode" 18 "unicode/utf8" 19 20 fmtparser "golang.org/x/text/internal/format" 21 "golang.org/x/tools/go/callgraph" 22 "golang.org/x/tools/go/callgraph/cha" 23 "golang.org/x/tools/go/loader" 24 "golang.org/x/tools/go/ssa" 25 "golang.org/x/tools/go/ssa/ssautil" 26) 27 28const debug = false 29 30// TODO: 31// - merge information into existing files 32// - handle different file formats (PO, XLIFF) 33// - handle features (gender, plural) 34// - message rewriting 35 36// - `msg:"etc"` tags 37 38// Extract extracts all strings form the package defined in Config. 39func Extract(c *Config) (*State, error) { 40 x, err := newExtracter(c) 41 if err != nil { 42 return nil, wrap(err, "") 43 } 44 45 x.seedEndpoints() 46 x.extractMessages() 47 48 return &State{ 49 Config: *c, 50 program: x.iprog, 51 Extracted: Messages{ 52 Language: c.SourceLanguage, 53 Messages: x.messages, 54 }, 55 }, nil 56} 57 58type extracter struct { 59 conf loader.Config 60 iprog *loader.Program 61 prog *ssa.Program 62 callGraph *callgraph.Graph 63 64 // Calls and other expressions to collect. 65 globals map[token.Pos]*constData 66 funcs map[token.Pos]*callData 67 messages []Message 68} 69 70func newExtracter(c *Config) (x *extracter, err error) { 71 x = &extracter{ 72 conf: loader.Config{}, 73 globals: map[token.Pos]*constData{}, 74 funcs: map[token.Pos]*callData{}, 75 } 76 77 x.iprog, err = loadPackages(&x.conf, c.Packages) 78 if err != nil { 79 return nil, wrap(err, "") 80 } 81 82 x.prog = ssautil.CreateProgram(x.iprog, ssa.GlobalDebug|ssa.BareInits) 83 x.prog.Build() 84 85 x.callGraph = cha.CallGraph(x.prog) 86 87 return x, nil 88} 89 90func (x *extracter) globalData(pos token.Pos) *constData { 91 cd := x.globals[pos] 92 if cd == nil { 93 cd = &constData{} 94 x.globals[pos] = cd 95 } 96 return cd 97} 98 99func (x *extracter) seedEndpoints() { 100 pkg := x.prog.Package(x.iprog.Package("golang.org/x/text/message").Pkg) 101 typ := types.NewPointer(pkg.Type("Printer").Type()) 102 103 x.processGlobalVars() 104 105 x.handleFunc(x.prog.LookupMethod(typ, pkg.Pkg, "Printf"), &callData{ 106 formatPos: 1, 107 argPos: 2, 108 isMethod: true, 109 }) 110 x.handleFunc(x.prog.LookupMethod(typ, pkg.Pkg, "Sprintf"), &callData{ 111 formatPos: 1, 112 argPos: 2, 113 isMethod: true, 114 }) 115 x.handleFunc(x.prog.LookupMethod(typ, pkg.Pkg, "Fprintf"), &callData{ 116 formatPos: 2, 117 argPos: 3, 118 isMethod: true, 119 }) 120} 121 122// processGlobalVars finds string constants that are assigned to global 123// variables. 124func (x *extracter) processGlobalVars() { 125 for _, p := range x.prog.AllPackages() { 126 m, ok := p.Members["init"] 127 if !ok { 128 continue 129 } 130 for _, b := range m.(*ssa.Function).Blocks { 131 for _, i := range b.Instrs { 132 s, ok := i.(*ssa.Store) 133 if !ok { 134 continue 135 } 136 a, ok := s.Addr.(*ssa.Global) 137 if !ok { 138 continue 139 } 140 t := a.Type() 141 for { 142 p, ok := t.(*types.Pointer) 143 if !ok { 144 break 145 } 146 t = p.Elem() 147 } 148 if b, ok := t.(*types.Basic); !ok || b.Kind() != types.String { 149 continue 150 } 151 x.visitInit(a, s.Val) 152 } 153 } 154 } 155} 156 157type constData struct { 158 call *callData // to provide a signature for the constants 159 values []constVal 160 others []token.Pos // Assigned to other global data. 161} 162 163func (d *constData) visit(x *extracter, f func(c constant.Value)) { 164 for _, v := range d.values { 165 f(v.value) 166 } 167 for _, p := range d.others { 168 if od, ok := x.globals[p]; ok { 169 od.visit(x, f) 170 } 171 } 172} 173 174type constVal struct { 175 value constant.Value 176 pos token.Pos 177} 178 179type callData struct { 180 call ssa.CallInstruction 181 expr *ast.CallExpr 182 formats []constant.Value 183 184 callee *callData 185 isMethod bool 186 formatPos int 187 argPos int // varargs at this position in the call 188 argTypes []int // arguments extractable from this position 189} 190 191func (c *callData) callFormatPos() int { 192 c = c.callee 193 if c.isMethod { 194 return c.formatPos - 1 195 } 196 return c.formatPos 197} 198 199func (c *callData) callArgsStart() int { 200 c = c.callee 201 if c.isMethod { 202 return c.argPos - 1 203 } 204 return c.argPos 205} 206 207func (c *callData) Pos() token.Pos { return c.call.Pos() } 208func (c *callData) Pkg() *types.Package { return c.call.Parent().Pkg.Pkg } 209 210func (x *extracter) handleFunc(f *ssa.Function, fd *callData) { 211 for _, e := range x.callGraph.Nodes[f].In { 212 if e.Pos() == 0 { 213 continue 214 } 215 216 call := e.Site 217 caller := x.funcs[call.Pos()] 218 if caller != nil { 219 // TODO: theoretically a format string could be passed to multiple 220 // arguments of a function. Support this eventually. 221 continue 222 } 223 x.debug(call, "CALL", f.String()) 224 225 caller = &callData{ 226 call: call, 227 callee: fd, 228 formatPos: -1, 229 argPos: -1, 230 } 231 // Offset by one if we are invoking an interface method. 232 offset := 0 233 if call.Common().IsInvoke() { 234 offset = -1 235 } 236 x.funcs[call.Pos()] = caller 237 if fd.argPos >= 0 { 238 x.visitArgs(caller, call.Common().Args[fd.argPos+offset]) 239 } 240 x.visitFormats(caller, call.Common().Args[fd.formatPos+offset]) 241 } 242} 243 244type posser interface { 245 Pos() token.Pos 246 Parent() *ssa.Function 247} 248 249func (x *extracter) debug(v posser, header string, args ...interface{}) { 250 if debug { 251 pos := "" 252 if p := v.Parent(); p != nil { 253 pos = posString(&x.conf, p.Package().Pkg, v.Pos()) 254 } 255 if header != "CALL" && header != "INSERT" { 256 header = " " + header 257 } 258 fmt.Printf("%-32s%-10s%-15T ", pos+fmt.Sprintf("@%d", v.Pos()), header, v) 259 for _, a := range args { 260 fmt.Printf(" %v", a) 261 } 262 fmt.Println() 263 } 264} 265 266// visitInit evaluates and collects values assigned to global variables in an 267// init function. 268func (x *extracter) visitInit(global *ssa.Global, v ssa.Value) { 269 if v == nil { 270 return 271 } 272 x.debug(v, "GLOBAL", v) 273 274 switch v := v.(type) { 275 case *ssa.Phi: 276 for _, e := range v.Edges { 277 x.visitInit(global, e) 278 } 279 280 case *ssa.Const: 281 // Only record strings with letters. 282 if str := constant.StringVal(v.Value); isMsg(str) { 283 cd := x.globalData(global.Pos()) 284 cd.values = append(cd.values, constVal{v.Value, v.Pos()}) 285 } 286 // TODO: handle %m-directive. 287 288 case *ssa.Global: 289 cd := x.globalData(global.Pos()) 290 cd.others = append(cd.others, v.Pos()) 291 292 case *ssa.FieldAddr, *ssa.Field: 293 // TODO: mark field index v.Field of v.X.Type() for extraction. extract 294 // an example args as to give parameters for the translator. 295 296 case *ssa.Slice: 297 if v.Low == nil && v.High == nil && v.Max == nil { 298 x.visitInit(global, v.X) 299 } 300 301 case *ssa.Alloc: 302 if ref := v.Referrers(); ref == nil { 303 for _, r := range *ref { 304 values := []ssa.Value{} 305 for _, o := range r.Operands(nil) { 306 if o == nil || *o == v { 307 continue 308 } 309 values = append(values, *o) 310 } 311 // TODO: return something different if we care about multiple 312 // values as well. 313 if len(values) == 1 { 314 x.visitInit(global, values[0]) 315 } 316 } 317 } 318 319 case ssa.Instruction: 320 rands := v.Operands(nil) 321 if len(rands) == 1 && rands[0] != nil { 322 x.visitInit(global, *rands[0]) 323 } 324 } 325 return 326} 327 328// visitFormats finds the original source of the value. The returned index is 329// position of the argument if originated from a function argument or -1 330// otherwise. 331func (x *extracter) visitFormats(call *callData, v ssa.Value) { 332 if v == nil { 333 return 334 } 335 x.debug(v, "VALUE", v) 336 337 switch v := v.(type) { 338 case *ssa.Phi: 339 for _, e := range v.Edges { 340 x.visitFormats(call, e) 341 } 342 343 case *ssa.Const: 344 // Only record strings with letters. 345 if isMsg(constant.StringVal(v.Value)) { 346 x.debug(call.call, "FORMAT", v.Value.ExactString()) 347 call.formats = append(call.formats, v.Value) 348 } 349 // TODO: handle %m-directive. 350 351 case *ssa.Global: 352 x.globalData(v.Pos()).call = call 353 354 case *ssa.FieldAddr, *ssa.Field: 355 // TODO: mark field index v.Field of v.X.Type() for extraction. extract 356 // an example args as to give parameters for the translator. 357 358 case *ssa.Slice: 359 if v.Low == nil && v.High == nil && v.Max == nil { 360 x.visitFormats(call, v.X) 361 } 362 363 case *ssa.Parameter: 364 // TODO: handle the function for the index parameter. 365 f := v.Parent() 366 for i, p := range f.Params { 367 if p == v { 368 if call.formatPos < 0 { 369 call.formatPos = i 370 // TODO: is there a better way to detect this is calling 371 // a method rather than a function? 372 call.isMethod = len(f.Params) > f.Signature.Params().Len() 373 x.handleFunc(v.Parent(), call) 374 } else if debug && i != call.formatPos { 375 // TODO: support this. 376 fmt.Printf("WARNING:%s: format string passed to arg %d and %d\n", 377 posString(&x.conf, call.Pkg(), call.Pos()), 378 call.formatPos, i) 379 } 380 } 381 } 382 383 case *ssa.Alloc: 384 if ref := v.Referrers(); ref == nil { 385 for _, r := range *ref { 386 values := []ssa.Value{} 387 for _, o := range r.Operands(nil) { 388 if o == nil || *o == v { 389 continue 390 } 391 values = append(values, *o) 392 } 393 // TODO: return something different if we care about multiple 394 // values as well. 395 if len(values) == 1 { 396 x.visitFormats(call, values[0]) 397 } 398 } 399 } 400 401 // TODO: 402 // case *ssa.Index: 403 // // Get all values in the array if applicable 404 // case *ssa.IndexAddr: 405 // // Get all values in the slice or *array if applicable. 406 // case *ssa.Lookup: 407 // // Get all values in the map if applicable. 408 409 case *ssa.FreeVar: 410 // TODO: find the link between free variables and parameters: 411 // 412 // func freeVar(p *message.Printer, str string) { 413 // fn := func(p *message.Printer) { 414 // p.Printf(str) 415 // } 416 // fn(p) 417 // } 418 419 case ssa.Instruction: 420 rands := v.Operands(nil) 421 if len(rands) == 1 && rands[0] != nil { 422 x.visitFormats(call, *rands[0]) 423 } 424 case *ssa.Call: 425 } 426} 427 428// Note: a function may have an argument marked as both format and passthrough. 429 430// visitArgs collects information on arguments. For wrapped functions it will 431// just determine the position of the variable args slice. 432func (x *extracter) visitArgs(fd *callData, v ssa.Value) { 433 if v == nil { 434 return 435 } 436 x.debug(v, "ARGV", v) 437 switch v := v.(type) { 438 439 case *ssa.Slice: 440 if v.Low == nil && v.High == nil && v.Max == nil { 441 x.visitArgs(fd, v.X) 442 } 443 444 case *ssa.Parameter: 445 // TODO: handle the function for the index parameter. 446 f := v.Parent() 447 for i, p := range f.Params { 448 if p == v { 449 fd.argPos = i 450 } 451 } 452 453 case *ssa.Alloc: 454 if ref := v.Referrers(); ref == nil { 455 for _, r := range *ref { 456 values := []ssa.Value{} 457 for _, o := range r.Operands(nil) { 458 if o == nil || *o == v { 459 continue 460 } 461 values = append(values, *o) 462 } 463 // TODO: return something different if we care about 464 // multiple values as well. 465 if len(values) == 1 { 466 x.visitArgs(fd, values[0]) 467 } 468 } 469 } 470 471 case ssa.Instruction: 472 rands := v.Operands(nil) 473 if len(rands) == 1 && rands[0] != nil { 474 x.visitArgs(fd, *rands[0]) 475 } 476 } 477} 478 479// print returns Go syntax for the specified node. 480func (x *extracter) print(n ast.Node) string { 481 var buf bytes.Buffer 482 format.Node(&buf, x.conf.Fset, n) 483 return buf.String() 484} 485 486type packageExtracter struct { 487 f *ast.File 488 x *extracter 489 info *loader.PackageInfo 490 cmap ast.CommentMap 491} 492 493func (px packageExtracter) getComment(n ast.Node) string { 494 cs := px.cmap.Filter(n).Comments() 495 if len(cs) > 0 { 496 return strings.TrimSpace(cs[0].Text()) 497 } 498 return "" 499} 500 501func (x *extracter) extractMessages() { 502 prog := x.iprog 503 files := []packageExtracter{} 504 for _, info := range x.iprog.AllPackages { 505 for _, f := range info.Files { 506 // Associate comments with nodes. 507 px := packageExtracter{ 508 f, x, info, 509 ast.NewCommentMap(prog.Fset, f, f.Comments), 510 } 511 files = append(files, px) 512 } 513 } 514 for _, px := range files { 515 ast.Inspect(px.f, func(n ast.Node) bool { 516 switch v := n.(type) { 517 case *ast.CallExpr: 518 if d := x.funcs[v.Lparen]; d != nil { 519 d.expr = v 520 } 521 } 522 return true 523 }) 524 } 525 for _, px := range files { 526 ast.Inspect(px.f, func(n ast.Node) bool { 527 switch v := n.(type) { 528 case *ast.CallExpr: 529 return px.handleCall(v) 530 case *ast.ValueSpec: 531 return px.handleGlobal(v) 532 } 533 return true 534 }) 535 } 536} 537 538func (px packageExtracter) handleGlobal(spec *ast.ValueSpec) bool { 539 comment := px.getComment(spec) 540 541 for _, ident := range spec.Names { 542 data, ok := px.x.globals[ident.Pos()] 543 if !ok { 544 continue 545 } 546 name := ident.Name 547 var arguments []argument 548 if data.call != nil { 549 arguments = px.getArguments(data.call) 550 } else if !strings.HasPrefix(name, "msg") && !strings.HasPrefix(name, "Msg") { 551 continue 552 } 553 data.visit(px.x, func(c constant.Value) { 554 px.addMessage(spec.Pos(), []string{name}, c, comment, arguments) 555 }) 556 } 557 558 return true 559} 560 561func (px packageExtracter) handleCall(call *ast.CallExpr) bool { 562 x := px.x 563 data := x.funcs[call.Lparen] 564 if data == nil || len(data.formats) == 0 { 565 return true 566 } 567 if data.expr != call { 568 panic("invariant `data.call != call` failed") 569 } 570 x.debug(data.call, "INSERT", data.formats) 571 572 argn := data.callFormatPos() 573 if argn >= len(call.Args) { 574 return true 575 } 576 format := call.Args[argn] 577 578 arguments := px.getArguments(data) 579 580 comment := "" 581 key := []string{} 582 if ident, ok := format.(*ast.Ident); ok { 583 key = append(key, ident.Name) 584 if v, ok := ident.Obj.Decl.(*ast.ValueSpec); ok && v.Comment != nil { 585 // TODO: get comment above ValueSpec as well 586 comment = v.Comment.Text() 587 } 588 } 589 if c := px.getComment(call.Args[0]); c != "" { 590 comment = c 591 } 592 593 formats := data.formats 594 for _, c := range formats { 595 px.addMessage(call.Lparen, key, c, comment, arguments) 596 } 597 return true 598} 599 600func (px packageExtracter) getArguments(data *callData) []argument { 601 arguments := []argument{} 602 x := px.x 603 info := px.info 604 if data.callArgsStart() >= 0 { 605 args := data.expr.Args[data.callArgsStart():] 606 for i, arg := range args { 607 expr := x.print(arg) 608 val := "" 609 if v := info.Types[arg].Value; v != nil { 610 val = v.ExactString() 611 switch arg.(type) { 612 case *ast.BinaryExpr, *ast.UnaryExpr: 613 expr = val 614 } 615 } 616 arguments = append(arguments, argument{ 617 ArgNum: i + 1, 618 Type: info.Types[arg].Type.String(), 619 UnderlyingType: info.Types[arg].Type.Underlying().String(), 620 Expr: expr, 621 Value: val, 622 Comment: px.getComment(arg), 623 Position: posString(&x.conf, info.Pkg, arg.Pos()), 624 // TODO report whether it implements 625 // interfaces plural.Interface, 626 // gender.Interface. 627 }) 628 } 629 } 630 return arguments 631} 632 633func (px packageExtracter) addMessage( 634 pos token.Pos, 635 key []string, 636 c constant.Value, 637 comment string, 638 arguments []argument) { 639 x := px.x 640 fmtMsg := constant.StringVal(c) 641 642 ph := placeholders{index: map[string]string{}} 643 644 trimmed, _, _ := trimWS(fmtMsg) 645 646 p := fmtparser.Parser{} 647 simArgs := make([]interface{}, len(arguments)) 648 for i, v := range arguments { 649 simArgs[i] = v 650 } 651 msg := "" 652 p.Reset(simArgs) 653 for p.SetFormat(trimmed); p.Scan(); { 654 name := "" 655 var arg *argument 656 switch p.Status { 657 case fmtparser.StatusText: 658 msg += p.Text() 659 continue 660 case fmtparser.StatusSubstitution, 661 fmtparser.StatusBadWidthSubstitution, 662 fmtparser.StatusBadPrecSubstitution: 663 arguments[p.ArgNum-1].used = true 664 arg = &arguments[p.ArgNum-1] 665 name = getID(arg) 666 case fmtparser.StatusBadArgNum, fmtparser.StatusMissingArg: 667 arg = &argument{ 668 ArgNum: p.ArgNum, 669 Position: posString(&x.conf, px.info.Pkg, pos), 670 } 671 name, arg.UnderlyingType = verbToPlaceholder(p.Text(), p.ArgNum) 672 } 673 sub := p.Text() 674 if !p.HasIndex { 675 r, sz := utf8.DecodeLastRuneInString(sub) 676 sub = fmt.Sprintf("%s[%d]%c", sub[:len(sub)-sz], p.ArgNum, r) 677 } 678 msg += fmt.Sprintf("{%s}", ph.addArg(arg, name, sub)) 679 } 680 key = append(key, msg) 681 682 // Add additional Placeholders that can be used in translations 683 // that are not present in the string. 684 for _, arg := range arguments { 685 if arg.used { 686 continue 687 } 688 ph.addArg(&arg, getID(&arg), fmt.Sprintf("%%[%d]v", arg.ArgNum)) 689 } 690 691 x.messages = append(x.messages, Message{ 692 ID: key, 693 Key: fmtMsg, 694 Message: Text{Msg: msg}, 695 // TODO(fix): this doesn't get the before comment. 696 Comment: comment, 697 Placeholders: ph.slice, 698 Position: posString(&x.conf, px.info.Pkg, pos), 699 }) 700} 701 702func posString(conf *loader.Config, pkg *types.Package, pos token.Pos) string { 703 p := conf.Fset.Position(pos) 704 file := fmt.Sprintf("%s:%d:%d", filepath.Base(p.Filename), p.Line, p.Column) 705 return filepath.Join(pkg.Path(), file) 706} 707 708func getID(arg *argument) string { 709 s := getLastComponent(arg.Expr) 710 s = strip(s) 711 s = strings.Replace(s, " ", "", -1) 712 // For small variable names, use user-defined types for more info. 713 if len(s) <= 2 && arg.UnderlyingType != arg.Type { 714 s = getLastComponent(arg.Type) 715 } 716 return strings.Title(s) 717} 718 719// strip is a dirty hack to convert function calls to placeholder IDs. 720func strip(s string) string { 721 s = strings.Map(func(r rune) rune { 722 if unicode.IsSpace(r) || r == '-' { 723 return '_' 724 } 725 if !unicode.In(r, unicode.Letter, unicode.Mark, unicode.Number) { 726 return -1 727 } 728 return r 729 }, s) 730 // Strip "Get" from getter functions. 731 if strings.HasPrefix(s, "Get") || strings.HasPrefix(s, "get") { 732 if len(s) > len("get") { 733 r, _ := utf8.DecodeRuneInString(s) 734 if !unicode.In(r, unicode.Ll, unicode.M) { // not lower or mark 735 s = s[len("get"):] 736 } 737 } 738 } 739 return s 740} 741 742// verbToPlaceholder gives a name for a placeholder based on the substitution 743// verb. This is only to be used if there is otherwise no other type information 744// available. 745func verbToPlaceholder(sub string, pos int) (name, underlying string) { 746 r, _ := utf8.DecodeLastRuneInString(sub) 747 name = fmt.Sprintf("Arg_%d", pos) 748 switch r { 749 case 's', 'q': 750 underlying = "string" 751 case 'd': 752 name = "Integer" 753 underlying = "int" 754 case 'e', 'f', 'g': 755 name = "Number" 756 underlying = "float64" 757 case 'm': 758 name = "Message" 759 underlying = "string" 760 default: 761 underlying = "interface{}" 762 } 763 return name, underlying 764} 765 766type placeholders struct { 767 index map[string]string 768 slice []Placeholder 769} 770 771func (p *placeholders) addArg(arg *argument, name, sub string) (id string) { 772 id = name 773 alt, ok := p.index[id] 774 for i := 1; ok && alt != sub; i++ { 775 id = fmt.Sprintf("%s_%d", name, i) 776 alt, ok = p.index[id] 777 } 778 p.index[id] = sub 779 p.slice = append(p.slice, Placeholder{ 780 ID: id, 781 String: sub, 782 Type: arg.Type, 783 UnderlyingType: arg.UnderlyingType, 784 ArgNum: arg.ArgNum, 785 Expr: arg.Expr, 786 Comment: arg.Comment, 787 }) 788 return id 789} 790 791func getLastComponent(s string) string { 792 return s[1+strings.LastIndexByte(s, '.'):] 793} 794 795// isMsg returns whether s should be translated. 796func isMsg(s string) bool { 797 // TODO: parse as format string and omit strings that contain letters 798 // coming from format verbs. 799 for _, r := range s { 800 if unicode.In(r, unicode.L) { 801 return true 802 } 803 } 804 return false 805} 806