1// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package pipeline
6
7import (
8	"bytes"
9	"fmt"
10	"go/ast"
11	"go/constant"
12	"go/format"
13	"go/token"
14	"go/types"
15	"path"
16	"path/filepath"
17	"strings"
18	"unicode"
19	"unicode/utf8"
20
21	fmtparser "golang.org/x/text/internal/format"
22	"golang.org/x/tools/go/loader"
23)
24
25// TODO:
26// - merge information into existing files
27// - handle different file formats (PO, XLIFF)
28// - handle features (gender, plural)
29// - message rewriting
30
31// Extract extracts all strings form the package defined in Config.
32func Extract(c *Config) (*Locale, error) {
33	conf := loader.Config{}
34	prog, err := loadPackages(&conf, c.Packages)
35	if err != nil {
36		return nil, wrap(err, "")
37	}
38
39	// print returns Go syntax for the specified node.
40	print := func(n ast.Node) string {
41		var buf bytes.Buffer
42		format.Node(&buf, conf.Fset, n)
43		return buf.String()
44	}
45
46	var messages []Message
47
48	for _, info := range prog.AllPackages {
49		for _, f := range info.Files {
50			// Associate comments with nodes.
51			cmap := ast.NewCommentMap(prog.Fset, f, f.Comments)
52			getComment := func(n ast.Node) string {
53				cs := cmap.Filter(n).Comments()
54				if len(cs) > 0 {
55					return strings.TrimSpace(cs[0].Text())
56				}
57				return ""
58			}
59
60			// Find function calls.
61			ast.Inspect(f, func(n ast.Node) bool {
62				call, ok := n.(*ast.CallExpr)
63				if !ok {
64					return true
65				}
66
67				// Skip calls of functions other than
68				// (*message.Printer).{Sp,Fp,P}rintf.
69				sel, ok := call.Fun.(*ast.SelectorExpr)
70				if !ok {
71					return true
72				}
73				meth := info.Selections[sel]
74				if meth == nil || meth.Kind() != types.MethodVal {
75					return true
76				}
77				// TODO: remove cheap hack and check if the type either
78				// implements some interface or is specifically of type
79				// "golang.org/x/text/message".Printer.
80				m, ok := extractFuncs[path.Base(meth.Recv().String())]
81				if !ok {
82					return true
83				}
84
85				fmtType, ok := m[meth.Obj().Name()]
86				if !ok {
87					return true
88				}
89				// argn is the index of the format string.
90				argn := fmtType.arg
91				if argn >= len(call.Args) {
92					return true
93				}
94
95				args := call.Args[fmtType.arg:]
96
97				fmtMsg, ok := msgStr(info, args[0])
98				if !ok {
99					// TODO: identify the type of the format argument. If it
100					// is not a string, multiple keys may be defined.
101					return true
102				}
103				comment := ""
104				key := []string{}
105				if ident, ok := args[0].(*ast.Ident); ok {
106					key = append(key, ident.Name)
107					if v, ok := ident.Obj.Decl.(*ast.ValueSpec); ok && v.Comment != nil {
108						// TODO: get comment above ValueSpec as well
109						comment = v.Comment.Text()
110					}
111				}
112
113				arguments := []argument{}
114				args = args[1:]
115				simArgs := make([]interface{}, len(args))
116				for i, arg := range args {
117					expr := print(arg)
118					val := ""
119					if v := info.Types[arg].Value; v != nil {
120						val = v.ExactString()
121						simArgs[i] = val
122						switch arg.(type) {
123						case *ast.BinaryExpr, *ast.UnaryExpr:
124							expr = val
125						}
126					}
127					arguments = append(arguments, argument{
128						ArgNum:         i + 1,
129						Type:           info.Types[arg].Type.String(),
130						UnderlyingType: info.Types[arg].Type.Underlying().String(),
131						Expr:           expr,
132						Value:          val,
133						Comment:        getComment(arg),
134						Position:       posString(conf, info, arg.Pos()),
135						// TODO report whether it implements
136						// interfaces plural.Interface,
137						// gender.Interface.
138					})
139				}
140				msg := ""
141
142				ph := placeholders{index: map[string]string{}}
143
144				p := fmtparser.Parser{}
145				p.Reset(simArgs)
146				for p.SetFormat(fmtMsg); p.Scan(); {
147					switch p.Status {
148					case fmtparser.StatusText:
149						msg += p.Text()
150					case fmtparser.StatusSubstitution,
151						fmtparser.StatusBadWidthSubstitution,
152						fmtparser.StatusBadPrecSubstitution:
153						arguments[p.ArgNum-1].used = true
154						arg := arguments[p.ArgNum-1]
155						sub := p.Text()
156						if !p.HasIndex {
157							r, sz := utf8.DecodeLastRuneInString(sub)
158							sub = fmt.Sprintf("%s[%d]%c", sub[:len(sub)-sz], p.ArgNum, r)
159						}
160						msg += fmt.Sprintf("{%s}", ph.addArg(&arg, sub))
161					}
162				}
163				key = append(key, msg)
164
165				// Add additional Placeholders that can be used in translations
166				// that are not present in the string.
167				for _, arg := range arguments {
168					if arg.used {
169						continue
170					}
171					ph.addArg(&arg, fmt.Sprintf("%%[%d]v", arg.ArgNum))
172				}
173
174				if c := getComment(call.Args[0]); c != "" {
175					comment = c
176				}
177
178				messages = append(messages, Message{
179					ID:      key,
180					Key:     fmtMsg,
181					Message: Text{Msg: msg},
182					// TODO(fix): this doesn't get the before comment.
183					Comment:      comment,
184					Placeholders: ph.slice,
185					Position:     posString(conf, info, call.Lparen),
186				})
187				return true
188			})
189		}
190	}
191
192	out := &Locale{
193		Language: c.SourceLanguage,
194		Messages: messages,
195	}
196	return out, nil
197}
198
199func posString(conf loader.Config, info *loader.PackageInfo, pos token.Pos) string {
200	p := conf.Fset.Position(pos)
201	file := fmt.Sprintf("%s:%d:%d", filepath.Base(p.Filename), p.Line, p.Column)
202	return filepath.Join(info.Pkg.Path(), file)
203}
204
205// extractFuncs indicates the types and methods for which to extract strings,
206// and which argument to extract.
207// TODO: use the types in conf.Import("golang.org/x/text/message") to extract
208// the correct instances.
209var extractFuncs = map[string]map[string]extractType{
210	// TODO: Printer -> *golang.org/x/text/message.Printer
211	"message.Printer": {
212		"Printf":  extractType{arg: 0, format: true},
213		"Sprintf": extractType{arg: 0, format: true},
214		"Fprintf": extractType{arg: 1, format: true},
215
216		"Lookup": extractType{arg: 0},
217	},
218}
219
220type extractType struct {
221	// format indicates if the next arg is a formatted string or whether to
222	// concatenate all arguments
223	format bool
224	// arg indicates the position of the argument to extract.
225	arg int
226}
227
228func getID(arg *argument) string {
229	s := getLastComponent(arg.Expr)
230	s = strip(s)
231	s = strings.Replace(s, " ", "", -1)
232	// For small variable names, use user-defined types for more info.
233	if len(s) <= 2 && arg.UnderlyingType != arg.Type {
234		s = getLastComponent(arg.Type)
235	}
236	return strings.Title(s)
237}
238
239// strip is a dirty hack to convert function calls to placeholder IDs.
240func strip(s string) string {
241	s = strings.Map(func(r rune) rune {
242		if unicode.IsSpace(r) || r == '-' {
243			return '_'
244		}
245		if !unicode.In(r, unicode.Letter, unicode.Mark) {
246			return -1
247		}
248		return r
249	}, s)
250	// Strip "Get" from getter functions.
251	if strings.HasPrefix(s, "Get") || strings.HasPrefix(s, "get") {
252		if len(s) > len("get") {
253			r, _ := utf8.DecodeRuneInString(s)
254			if !unicode.In(r, unicode.Ll, unicode.M) { // not lower or mark
255				s = s[len("get"):]
256			}
257		}
258	}
259	return s
260}
261
262type placeholders struct {
263	index map[string]string
264	slice []Placeholder
265}
266
267func (p *placeholders) addArg(arg *argument, sub string) (id string) {
268	id = getID(arg)
269	id1 := id
270	alt, ok := p.index[id1]
271	for i := 1; ok && alt != sub; i++ {
272		id1 = fmt.Sprintf("%s_%d", id, i)
273		alt, ok = p.index[id1]
274	}
275	p.index[id1] = sub
276	p.slice = append(p.slice, Placeholder{
277		ID:             id1,
278		String:         sub,
279		Type:           arg.Type,
280		UnderlyingType: arg.UnderlyingType,
281		ArgNum:         arg.ArgNum,
282		Expr:           arg.Expr,
283		Comment:        arg.Comment,
284	})
285	return id1
286}
287
288func getLastComponent(s string) string {
289	return s[1+strings.LastIndexByte(s, '.'):]
290}
291
292func msgStr(info *loader.PackageInfo, e ast.Expr) (s string, ok bool) {
293	v := info.Types[e].Value
294	if v == nil || v.Kind() != constant.String {
295		return "", false
296	}
297	s = constant.StringVal(v)
298	// Only record strings with letters.
299	for _, r := range s {
300		if unicode.In(r, unicode.L) {
301			return s, true
302		}
303	}
304	return "", false
305}
306