1// Copyright 2016 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package pipeline 6 7import ( 8 "bytes" 9 "fmt" 10 "go/ast" 11 "go/constant" 12 "go/format" 13 "go/token" 14 "go/types" 15 "path" 16 "path/filepath" 17 "strings" 18 "unicode" 19 "unicode/utf8" 20 21 fmtparser "golang.org/x/text/internal/format" 22 "golang.org/x/tools/go/loader" 23) 24 25// TODO: 26// - merge information into existing files 27// - handle different file formats (PO, XLIFF) 28// - handle features (gender, plural) 29// - message rewriting 30 31// Extract extracts all strings form the package defined in Config. 32func Extract(c *Config) (*Locale, error) { 33 conf := loader.Config{} 34 prog, err := loadPackages(&conf, c.Packages) 35 if err != nil { 36 return nil, wrap(err, "") 37 } 38 39 // print returns Go syntax for the specified node. 40 print := func(n ast.Node) string { 41 var buf bytes.Buffer 42 format.Node(&buf, conf.Fset, n) 43 return buf.String() 44 } 45 46 var messages []Message 47 48 for _, info := range prog.AllPackages { 49 for _, f := range info.Files { 50 // Associate comments with nodes. 51 cmap := ast.NewCommentMap(prog.Fset, f, f.Comments) 52 getComment := func(n ast.Node) string { 53 cs := cmap.Filter(n).Comments() 54 if len(cs) > 0 { 55 return strings.TrimSpace(cs[0].Text()) 56 } 57 return "" 58 } 59 60 // Find function calls. 61 ast.Inspect(f, func(n ast.Node) bool { 62 call, ok := n.(*ast.CallExpr) 63 if !ok { 64 return true 65 } 66 67 // Skip calls of functions other than 68 // (*message.Printer).{Sp,Fp,P}rintf. 69 sel, ok := call.Fun.(*ast.SelectorExpr) 70 if !ok { 71 return true 72 } 73 meth := info.Selections[sel] 74 if meth == nil || meth.Kind() != types.MethodVal { 75 return true 76 } 77 // TODO: remove cheap hack and check if the type either 78 // implements some interface or is specifically of type 79 // "golang.org/x/text/message".Printer. 80 m, ok := extractFuncs[path.Base(meth.Recv().String())] 81 if !ok { 82 return true 83 } 84 85 fmtType, ok := m[meth.Obj().Name()] 86 if !ok { 87 return true 88 } 89 // argn is the index of the format string. 90 argn := fmtType.arg 91 if argn >= len(call.Args) { 92 return true 93 } 94 95 args := call.Args[fmtType.arg:] 96 97 fmtMsg, ok := msgStr(info, args[0]) 98 if !ok { 99 // TODO: identify the type of the format argument. If it 100 // is not a string, multiple keys may be defined. 101 return true 102 } 103 comment := "" 104 key := []string{} 105 if ident, ok := args[0].(*ast.Ident); ok { 106 key = append(key, ident.Name) 107 if v, ok := ident.Obj.Decl.(*ast.ValueSpec); ok && v.Comment != nil { 108 // TODO: get comment above ValueSpec as well 109 comment = v.Comment.Text() 110 } 111 } 112 113 arguments := []argument{} 114 args = args[1:] 115 simArgs := make([]interface{}, len(args)) 116 for i, arg := range args { 117 expr := print(arg) 118 val := "" 119 if v := info.Types[arg].Value; v != nil { 120 val = v.ExactString() 121 simArgs[i] = val 122 switch arg.(type) { 123 case *ast.BinaryExpr, *ast.UnaryExpr: 124 expr = val 125 } 126 } 127 arguments = append(arguments, argument{ 128 ArgNum: i + 1, 129 Type: info.Types[arg].Type.String(), 130 UnderlyingType: info.Types[arg].Type.Underlying().String(), 131 Expr: expr, 132 Value: val, 133 Comment: getComment(arg), 134 Position: posString(conf, info, arg.Pos()), 135 // TODO report whether it implements 136 // interfaces plural.Interface, 137 // gender.Interface. 138 }) 139 } 140 msg := "" 141 142 ph := placeholders{index: map[string]string{}} 143 144 p := fmtparser.Parser{} 145 p.Reset(simArgs) 146 for p.SetFormat(fmtMsg); p.Scan(); { 147 switch p.Status { 148 case fmtparser.StatusText: 149 msg += p.Text() 150 case fmtparser.StatusSubstitution, 151 fmtparser.StatusBadWidthSubstitution, 152 fmtparser.StatusBadPrecSubstitution: 153 arguments[p.ArgNum-1].used = true 154 arg := arguments[p.ArgNum-1] 155 sub := p.Text() 156 if !p.HasIndex { 157 r, sz := utf8.DecodeLastRuneInString(sub) 158 sub = fmt.Sprintf("%s[%d]%c", sub[:len(sub)-sz], p.ArgNum, r) 159 } 160 msg += fmt.Sprintf("{%s}", ph.addArg(&arg, sub)) 161 } 162 } 163 key = append(key, msg) 164 165 // Add additional Placeholders that can be used in translations 166 // that are not present in the string. 167 for _, arg := range arguments { 168 if arg.used { 169 continue 170 } 171 ph.addArg(&arg, fmt.Sprintf("%%[%d]v", arg.ArgNum)) 172 } 173 174 if c := getComment(call.Args[0]); c != "" { 175 comment = c 176 } 177 178 messages = append(messages, Message{ 179 ID: key, 180 Key: fmtMsg, 181 Message: Text{Msg: msg}, 182 // TODO(fix): this doesn't get the before comment. 183 Comment: comment, 184 Placeholders: ph.slice, 185 Position: posString(conf, info, call.Lparen), 186 }) 187 return true 188 }) 189 } 190 } 191 192 out := &Locale{ 193 Language: c.SourceLanguage, 194 Messages: messages, 195 } 196 return out, nil 197} 198 199func posString(conf loader.Config, info *loader.PackageInfo, pos token.Pos) string { 200 p := conf.Fset.Position(pos) 201 file := fmt.Sprintf("%s:%d:%d", filepath.Base(p.Filename), p.Line, p.Column) 202 return filepath.Join(info.Pkg.Path(), file) 203} 204 205// extractFuncs indicates the types and methods for which to extract strings, 206// and which argument to extract. 207// TODO: use the types in conf.Import("golang.org/x/text/message") to extract 208// the correct instances. 209var extractFuncs = map[string]map[string]extractType{ 210 // TODO: Printer -> *golang.org/x/text/message.Printer 211 "message.Printer": { 212 "Printf": extractType{arg: 0, format: true}, 213 "Sprintf": extractType{arg: 0, format: true}, 214 "Fprintf": extractType{arg: 1, format: true}, 215 216 "Lookup": extractType{arg: 0}, 217 }, 218} 219 220type extractType struct { 221 // format indicates if the next arg is a formatted string or whether to 222 // concatenate all arguments 223 format bool 224 // arg indicates the position of the argument to extract. 225 arg int 226} 227 228func getID(arg *argument) string { 229 s := getLastComponent(arg.Expr) 230 s = strip(s) 231 s = strings.Replace(s, " ", "", -1) 232 // For small variable names, use user-defined types for more info. 233 if len(s) <= 2 && arg.UnderlyingType != arg.Type { 234 s = getLastComponent(arg.Type) 235 } 236 return strings.Title(s) 237} 238 239// strip is a dirty hack to convert function calls to placeholder IDs. 240func strip(s string) string { 241 s = strings.Map(func(r rune) rune { 242 if unicode.IsSpace(r) || r == '-' { 243 return '_' 244 } 245 if !unicode.In(r, unicode.Letter, unicode.Mark) { 246 return -1 247 } 248 return r 249 }, s) 250 // Strip "Get" from getter functions. 251 if strings.HasPrefix(s, "Get") || strings.HasPrefix(s, "get") { 252 if len(s) > len("get") { 253 r, _ := utf8.DecodeRuneInString(s) 254 if !unicode.In(r, unicode.Ll, unicode.M) { // not lower or mark 255 s = s[len("get"):] 256 } 257 } 258 } 259 return s 260} 261 262type placeholders struct { 263 index map[string]string 264 slice []Placeholder 265} 266 267func (p *placeholders) addArg(arg *argument, sub string) (id string) { 268 id = getID(arg) 269 id1 := id 270 alt, ok := p.index[id1] 271 for i := 1; ok && alt != sub; i++ { 272 id1 = fmt.Sprintf("%s_%d", id, i) 273 alt, ok = p.index[id1] 274 } 275 p.index[id1] = sub 276 p.slice = append(p.slice, Placeholder{ 277 ID: id1, 278 String: sub, 279 Type: arg.Type, 280 UnderlyingType: arg.UnderlyingType, 281 ArgNum: arg.ArgNum, 282 Expr: arg.Expr, 283 Comment: arg.Comment, 284 }) 285 return id1 286} 287 288func getLastComponent(s string) string { 289 return s[1+strings.LastIndexByte(s, '.'):] 290} 291 292func msgStr(info *loader.PackageInfo, e ast.Expr) (s string, ok bool) { 293 v := info.Types[e].Value 294 if v == nil || v.Kind() != constant.String { 295 return "", false 296 } 297 s = constant.StringVal(v) 298 // Only record strings with letters. 299 for _, r := range s { 300 if unicode.In(r, unicode.L) { 301 return s, true 302 } 303 } 304 return "", false 305} 306