1// Copyright 2017 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package pipeline provides tools for creating translation pipelines.
6//
7// NOTE: UNDER DEVELOPMENT. API MAY CHANGE.
8package pipeline
9
10import (
11	"bytes"
12	"encoding/json"
13	"fmt"
14	"go/build"
15	"go/parser"
16	"io/ioutil"
17	"log"
18	"os"
19	"path/filepath"
20	"regexp"
21	"strings"
22	"text/template"
23	"unicode"
24
25	"golang.org/x/text/internal"
26	"golang.org/x/text/language"
27	"golang.org/x/text/runes"
28	"golang.org/x/tools/go/loader"
29)
30
31const (
32	extractFile  = "extracted.gotext.json"
33	outFile      = "out.gotext.json"
34	gotextSuffix = "gotext.json"
35)
36
37// Config contains configuration for the translation pipeline.
38type Config struct {
39	// Supported indicates the languages for which data should be generated.
40	// The default is to support all locales for which there are matching
41	// translation files.
42	Supported []language.Tag
43
44	// --- Extraction
45
46	SourceLanguage language.Tag
47
48	Packages []string
49
50	// --- File structure
51
52	// Dir is the root dir for all operations.
53	Dir string
54
55	// TranslationsPattern is a regular expression to match incoming translation
56	// files. These files may appear in any directory rooted at Dir.
57	// language for the translation files is determined as follows:
58	//   1. From the Language field in the file.
59	//   2. If not present, from a valid language tag in the filename, separated
60	//      by dots (e.g. "en-US.json" or "incoming.pt_PT.xmb").
61	//   3. If not present, from a the closest subdirectory in which the file
62	//      is contained that parses as a valid language tag.
63	TranslationsPattern string
64
65	// OutPattern defines the location for translation files for a certain
66	// language. The default is "{{.Dir}}/{{.Language}}/out.{{.Ext}}"
67	OutPattern string
68
69	// Format defines the file format for generated translation files.
70	// The default is XMB. Alternatives are GetText, XLIFF, L20n, GoText.
71	Format string
72
73	Ext string
74
75	// TODO:
76	// Actions are additional actions to be performed after the initial extract
77	// and merge.
78	// Actions []struct {
79	// 	Name    string
80	// 	Options map[string]string
81	// }
82
83	// --- Generation
84
85	// GenFile may be in a different package. It is not defined, it will
86	// be written to stdout.
87	GenFile string
88
89	// GenPackage is the package or relative path into which to generate the
90	// file. If not specified it is relative to the current directory.
91	GenPackage string
92
93	// DeclareVar defines a variable to which to assing the generated Catalog.
94	DeclareVar string
95
96	// SetDefault determines whether to assign the generated Catalog to
97	// message.DefaultCatalog. The default for this is true if DeclareVar is
98	// not defined, false otherwise.
99	SetDefault bool
100
101	// TODO:
102	// - Printf-style configuration
103	// - Template-style configuration
104	// - Extraction options
105	// - Rewrite options
106	// - Generation options
107}
108
109// Operations:
110// - extract:       get the strings
111// - disambiguate:  find messages with the same key, but possible different meaning.
112// - create out:    create a list of messages that need translations
113// - load trans:    load the list of current translations
114// - merge:         assign list of translations as done
115// - (action)expand:    analyze features and create example sentences for each version.
116// - (action)googletrans:   pre-populate messages with automatic translations.
117// - (action)export:    send out messages somewhere non-standard
118// - (action)import:    load messages from somewhere non-standard
119// - vet program:   don't pass "foo" + var + "bar" strings. Not using funcs for translated strings.
120// - vet trans:     coverage: all translations/ all features.
121// - generate:      generate Go code
122
123// State holds all accumulated information on translations during processing.
124type State struct {
125	Config Config
126
127	Package string
128	program *loader.Program
129
130	Extracted Messages `json:"messages"`
131
132	// Messages includes all messages for which there need to be translations.
133	// Duplicates may be eliminated. Generation will be done from these messages
134	// (usually after merging).
135	Messages []Messages
136
137	// Translations are incoming translations for the application messages.
138	Translations []Messages
139}
140
141func (s *State) dir() string {
142	if d := s.Config.Dir; d != "" {
143		return d
144	}
145	return "./locales"
146}
147
148func outPattern(s *State) (string, error) {
149	c := s.Config
150	pat := c.OutPattern
151	if pat == "" {
152		pat = "{{.Dir}}/{{.Language}}/out.{{.Ext}}"
153	}
154
155	ext := c.Ext
156	if ext == "" {
157		ext = c.Format
158	}
159	if ext == "" {
160		ext = gotextSuffix
161	}
162	t, err := template.New("").Parse(pat)
163	if err != nil {
164		return "", wrap(err, "error parsing template")
165	}
166	buf := bytes.Buffer{}
167	err = t.Execute(&buf, map[string]string{
168		"Dir":      s.dir(),
169		"Language": "%s",
170		"Ext":      ext,
171	})
172	return filepath.FromSlash(buf.String()), wrap(err, "incorrect OutPattern")
173}
174
175var transRE = regexp.MustCompile(`.*\.` + gotextSuffix)
176
177// Import loads existing translation files.
178func (s *State) Import() error {
179	outPattern, err := outPattern(s)
180	if err != nil {
181		return err
182	}
183	re := transRE
184	if pat := s.Config.TranslationsPattern; pat != "" {
185		if re, err = regexp.Compile(pat); err != nil {
186			return wrapf(err, "error parsing regexp %q", s.Config.TranslationsPattern)
187		}
188	}
189	x := importer{s, outPattern, re}
190	return x.walkImport(s.dir(), s.Config.SourceLanguage)
191}
192
193type importer struct {
194	state      *State
195	outPattern string
196	transFile  *regexp.Regexp
197}
198
199func (i *importer) walkImport(path string, tag language.Tag) error {
200	files, err := ioutil.ReadDir(path)
201	if err != nil {
202		return nil
203	}
204	for _, f := range files {
205		name := f.Name()
206		tag := tag
207		if f.IsDir() {
208			if t, err := language.Parse(name); err == nil {
209				tag = t
210			}
211			// We ignore errors
212			if err := i.walkImport(filepath.Join(path, name), tag); err != nil {
213				return err
214			}
215			continue
216		}
217		for _, l := range strings.Split(name, ".") {
218			if t, err := language.Parse(l); err == nil {
219				tag = t
220			}
221		}
222		file := filepath.Join(path, name)
223		// TODO: Should we skip files that match output files?
224		if fmt.Sprintf(i.outPattern, tag) == file {
225			continue
226		}
227		// TODO: handle different file formats.
228		if !i.transFile.MatchString(name) {
229			continue
230		}
231		b, err := ioutil.ReadFile(file)
232		if err != nil {
233			return wrap(err, "read file failed")
234		}
235		var translations Messages
236		if err := json.Unmarshal(b, &translations); err != nil {
237			return wrap(err, "parsing translation file failed")
238		}
239		i.state.Translations = append(i.state.Translations, translations)
240	}
241	return nil
242}
243
244// Merge merges the extracted messages with the existing translations.
245func (s *State) Merge() error {
246	if s.Messages != nil {
247		panic("already merged")
248	}
249	// Create an index for each unique message.
250	// Duplicates are okay as long as the substitution arguments are okay as
251	// well.
252	// Top-level messages are okay to appear in multiple substitution points.
253
254	// Collect key equivalence.
255	msgs := []*Message{}
256	keyToIDs := map[string]*Message{}
257	for _, m := range s.Extracted.Messages {
258		m := m
259		if prev, ok := keyToIDs[m.Key]; ok {
260			if err := checkEquivalence(&m, prev); err != nil {
261				warnf("Key %q matches conflicting messages: %v and %v", m.Key, prev.ID, m.ID)
262				// TODO: track enough information so that the rewriter can
263				// suggest/disambiguate messages.
264			}
265			// TODO: add position to message.
266			continue
267		}
268		i := len(msgs)
269		msgs = append(msgs, &m)
270		keyToIDs[m.Key] = msgs[i]
271	}
272
273	// Messages with different keys may still refer to the same translated
274	// message (e.g. different whitespace). Filter these.
275	idMap := map[string]bool{}
276	filtered := []*Message{}
277	for _, m := range msgs {
278		found := false
279		for _, id := range m.ID {
280			found = found || idMap[id]
281		}
282		if !found {
283			filtered = append(filtered, m)
284		}
285		for _, id := range m.ID {
286			idMap[id] = true
287		}
288	}
289
290	// Build index of translations.
291	translations := map[language.Tag]map[string]Message{}
292	languages := append([]language.Tag{}, s.Config.Supported...)
293
294	for _, t := range s.Translations {
295		tag := t.Language
296		if _, ok := translations[tag]; !ok {
297			translations[tag] = map[string]Message{}
298			languages = append(languages, tag)
299		}
300		for _, m := range t.Messages {
301			if !m.Translation.IsEmpty() {
302				for _, id := range m.ID {
303					if _, ok := translations[tag][id]; ok {
304						warnf("Duplicate translation in locale %q for message %q", tag, id)
305					}
306					translations[tag][id] = m
307				}
308			}
309		}
310	}
311	languages = internal.UniqueTags(languages)
312
313	for _, tag := range languages {
314		ms := Messages{Language: tag}
315		for _, orig := range filtered {
316			m := *orig
317			m.Key = ""
318			m.Position = ""
319
320			for _, id := range m.ID {
321				if t, ok := translations[tag][id]; ok {
322					m.Translation = t.Translation
323					if t.TranslatorComment != "" {
324						m.TranslatorComment = t.TranslatorComment
325						m.Fuzzy = t.Fuzzy
326					}
327					break
328				}
329			}
330			if tag == s.Config.SourceLanguage && m.Translation.IsEmpty() {
331				m.Translation = m.Message
332				if m.TranslatorComment == "" {
333					m.TranslatorComment = "Copied from source."
334					m.Fuzzy = true
335				}
336			}
337			// TODO: if translation is empty: pre-expand based on available
338			// linguistic features. This may also be done as a plugin.
339			ms.Messages = append(ms.Messages, m)
340		}
341		s.Messages = append(s.Messages, ms)
342	}
343	return nil
344}
345
346// Export writes out the messages to translation out files.
347func (s *State) Export() error {
348	path, err := outPattern(s)
349	if err != nil {
350		return wrap(err, "export failed")
351	}
352	for _, out := range s.Messages {
353		// TODO: inject translations from existing files to avoid retranslation.
354		data, err := json.MarshalIndent(out, "", "    ")
355		if err != nil {
356			return wrap(err, "JSON marshal failed")
357		}
358		file := fmt.Sprintf(path, out.Language)
359		if err := os.MkdirAll(filepath.Dir(file), 0755); err != nil {
360			return wrap(err, "dir create failed")
361		}
362		if err := ioutil.WriteFile(file, data, 0644); err != nil {
363			return wrap(err, "write failed")
364		}
365	}
366	return nil
367}
368
369var (
370	ws    = runes.In(unicode.White_Space).Contains
371	notWS = runes.NotIn(unicode.White_Space).Contains
372)
373
374func trimWS(s string) (trimmed, leadWS, trailWS string) {
375	trimmed = strings.TrimRightFunc(s, ws)
376	trailWS = s[len(trimmed):]
377	if i := strings.IndexFunc(trimmed, notWS); i > 0 {
378		leadWS = trimmed[:i]
379		trimmed = trimmed[i:]
380	}
381	return trimmed, leadWS, trailWS
382}
383
384// NOTE: The command line tool already prefixes with "gotext:".
385var (
386	wrap = func(err error, msg string) error {
387		if err == nil {
388			return nil
389		}
390		return fmt.Errorf("%s: %v", msg, err)
391	}
392	wrapf = func(err error, msg string, args ...interface{}) error {
393		if err == nil {
394			return nil
395		}
396		return wrap(err, fmt.Sprintf(msg, args...))
397	}
398	errorf = fmt.Errorf
399)
400
401func warnf(format string, args ...interface{}) {
402	// TODO: don't log.
403	log.Printf(format, args...)
404}
405
406func loadPackages(conf *loader.Config, args []string) (*loader.Program, error) {
407	if len(args) == 0 {
408		args = []string{"."}
409	}
410
411	conf.Build = &build.Default
412	conf.ParserMode = parser.ParseComments
413
414	// Use the initial packages from the command line.
415	args, err := conf.FromArgs(args, false)
416	if err != nil {
417		return nil, wrap(err, "loading packages failed")
418	}
419
420	// Load, parse and type-check the whole program.
421	return conf.Load()
422}
423