1// Copyright 2013 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// +build ignore
6
7// This tool generates types for the various XML formats of CLDR.
8package main
9
10import (
11	"archive/zip"
12	"bytes"
13	"encoding/xml"
14	"flag"
15	"fmt"
16	"io"
17	"io/ioutil"
18	"log"
19	"os"
20	"regexp"
21	"strings"
22
23	"golang.org/x/text/internal/gen"
24)
25
26var outputFile = flag.String("output", "xml.go", "output file name")
27
28func main() {
29	flag.Parse()
30
31	r := gen.OpenCLDRCoreZip()
32	buffer, err := ioutil.ReadAll(r)
33	if err != nil {
34		log.Fatal("Could not read zip file")
35	}
36	r.Close()
37	z, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer)))
38	if err != nil {
39		log.Fatalf("Could not read zip archive: %v", err)
40	}
41
42	var buf bytes.Buffer
43
44	version := gen.CLDRVersion()
45
46	for _, dtd := range files {
47		for _, f := range z.File {
48			if strings.HasSuffix(f.Name, dtd.file+".dtd") {
49				r, err := f.Open()
50				failOnError(err)
51
52				b := makeBuilder(&buf, dtd)
53				b.parseDTD(r)
54				b.resolve(b.index[dtd.top[0]])
55				b.write()
56				if b.version != "" && version != b.version {
57					println(f.Name)
58					log.Fatalf("main: inconsistent versions: found %s; want %s", b.version, version)
59				}
60				break
61			}
62		}
63	}
64	fmt.Fprintln(&buf, "// Version is the version of CLDR from which the XML definitions are generated.")
65	fmt.Fprintf(&buf, "const Version = %q\n", version)
66
67	gen.WriteGoFile(*outputFile, "cldr", buf.Bytes())
68}
69
70func failOnError(err error) {
71	if err != nil {
72		log.New(os.Stderr, "", log.Lshortfile).Output(2, err.Error())
73		os.Exit(1)
74	}
75}
76
77// configuration data per DTD type
78type dtd struct {
79	file string   // base file name
80	root string   // Go name of the root XML element
81	top  []string // create a different type for this section
82
83	skipElem    []string // hard-coded or deprecated elements
84	skipAttr    []string // attributes to exclude
85	predefined  []string // hard-coded elements exist of the form <name>Elem
86	forceRepeat []string // elements to make slices despite DTD
87}
88
89var files = []dtd{
90	{
91		file: "ldmlBCP47",
92		root: "LDMLBCP47",
93		top:  []string{"ldmlBCP47"},
94		skipElem: []string{
95			"cldrVersion", // deprecated, not used
96		},
97	},
98	{
99		file: "ldmlSupplemental",
100		root: "SupplementalData",
101		top:  []string{"supplementalData"},
102		skipElem: []string{
103			"cldrVersion", // deprecated, not used
104		},
105		forceRepeat: []string{
106			"plurals", // data defined in plurals.xml and ordinals.xml
107		},
108	},
109	{
110		file: "ldml",
111		root: "LDML",
112		top: []string{
113			"ldml", "collation", "calendar", "timeZoneNames", "localeDisplayNames", "numbers",
114		},
115		skipElem: []string{
116			"cp",       // not used anywhere
117			"special",  // not used anywhere
118			"fallback", // deprecated, not used
119			"alias",    // in Common
120			"default",  // in Common
121		},
122		skipAttr: []string{
123			"hiraganaQuarternary", // typo in DTD, correct version included as well
124		},
125		predefined: []string{"rules"},
126	},
127}
128
129var comments = map[string]string{
130	"ldmlBCP47": `
131// LDMLBCP47 holds information on allowable values for various variables in LDML.
132`,
133	"supplementalData": `
134// SupplementalData holds information relevant for internationalization
135// and proper use of CLDR, but that is not contained in the locale hierarchy.
136`,
137	"ldml": `
138// LDML is the top-level type for locale-specific data.
139`,
140	"collation": `
141// Collation contains rules that specify a certain sort-order,
142// as a tailoring of the root order.
143// The parsed rules are obtained by passing a RuleProcessor to Collation's
144// Process method.
145`,
146	"calendar": `
147// Calendar specifies the fields used for formatting and parsing dates and times.
148// The month and quarter names are identified numerically, starting at 1.
149// The day (of the week) names are identified with short strings, since there is
150// no universally-accepted numeric designation.
151`,
152	"dates": `
153// Dates contains information regarding the format and parsing of dates and times.
154`,
155	"localeDisplayNames": `
156// LocaleDisplayNames specifies localized display names for for scripts, languages,
157// countries, currencies, and variants.
158`,
159	"numbers": `
160// Numbers supplies information for formatting and parsing numbers and currencies.
161`,
162}
163
164type element struct {
165	name      string // XML element name
166	category  string // elements contained by this element
167	signature string // category + attrKey*
168
169	attr []*attribute // attributes supported by this element.
170	sub  []struct {   // parsed and evaluated sub elements of this element.
171		e      *element
172		repeat bool // true if the element needs to be a slice
173	}
174
175	resolved bool // prevent multiple resolutions of this element.
176}
177
178type attribute struct {
179	name string
180	key  string
181	list []string
182
183	tag string // Go tag
184}
185
186var (
187	reHead  = regexp.MustCompile(` *(\w+) +([\w\-]+)`)
188	reAttr  = regexp.MustCompile(` *(\w+) *(?:(\w+)|\(([\w\- \|]+)\)) *(?:#([A-Z]*) *(?:\"([\.\d+])\")?)? *("[\w\-:]*")?`)
189	reElem  = regexp.MustCompile(`^ *(EMPTY|ANY|\(.*\)[\*\+\?]?) *$`)
190	reToken = regexp.MustCompile(`\w\-`)
191)
192
193// builder is used to read in the DTD files from CLDR and generate Go code
194// to be used with the encoding/xml package.
195type builder struct {
196	w       io.Writer
197	index   map[string]*element
198	elem    []*element
199	info    dtd
200	version string
201}
202
203func makeBuilder(w io.Writer, d dtd) builder {
204	return builder{
205		w:     w,
206		index: make(map[string]*element),
207		elem:  []*element{},
208		info:  d,
209	}
210}
211
212// parseDTD parses a DTD file.
213func (b *builder) parseDTD(r io.Reader) {
214	for d := xml.NewDecoder(r); ; {
215		t, err := d.Token()
216		if t == nil {
217			break
218		}
219		failOnError(err)
220		dir, ok := t.(xml.Directive)
221		if !ok {
222			continue
223		}
224		m := reHead.FindSubmatch(dir)
225		dir = dir[len(m[0]):]
226		ename := string(m[2])
227		el, elementFound := b.index[ename]
228		switch string(m[1]) {
229		case "ELEMENT":
230			if elementFound {
231				log.Fatal("parseDTD: duplicate entry for element %q", ename)
232			}
233			m := reElem.FindSubmatch(dir)
234			if m == nil {
235				log.Fatalf("parseDTD: invalid element %q", string(dir))
236			}
237			if len(m[0]) != len(dir) {
238				log.Fatal("parseDTD: invalid element %q", string(dir), len(dir), len(m[0]), string(m[0]))
239			}
240			s := string(m[1])
241			el = &element{
242				name:     ename,
243				category: s,
244			}
245			b.index[ename] = el
246		case "ATTLIST":
247			if !elementFound {
248				log.Fatalf("parseDTD: unknown element %q", ename)
249			}
250			s := string(dir)
251			m := reAttr.FindStringSubmatch(s)
252			if m == nil {
253				log.Fatal(fmt.Errorf("parseDTD: invalid attribute %q", string(dir)))
254			}
255			if m[4] == "FIXED" {
256				b.version = m[5]
257			} else {
258				switch m[1] {
259				case "draft", "references", "alt", "validSubLocales", "standard" /* in Common */ :
260				case "type", "choice":
261				default:
262					el.attr = append(el.attr, &attribute{
263						name: m[1],
264						key:  s,
265						list: reToken.FindAllString(m[3], -1),
266					})
267					el.signature = fmt.Sprintf("%s=%s+%s", el.signature, m[1], m[2])
268				}
269			}
270		}
271	}
272}
273
274var reCat = regexp.MustCompile(`[ ,\|]*(?:(\(|\)|\#?[\w_-]+)([\*\+\?]?))?`)
275
276// resolve takes a parsed element and converts it into structured data
277// that can be used to generate the XML code.
278func (b *builder) resolve(e *element) {
279	if e.resolved {
280		return
281	}
282	b.elem = append(b.elem, e)
283	e.resolved = true
284	s := e.category
285	found := make(map[string]bool)
286	sequenceStart := []int{}
287	for len(s) > 0 {
288		m := reCat.FindStringSubmatch(s)
289		if m == nil {
290			log.Fatalf("%s: invalid category string %q", e.name, s)
291		}
292		repeat := m[2] == "*" || m[2] == "+" || in(b.info.forceRepeat, m[1])
293		switch m[1] {
294		case "":
295		case "(":
296			sequenceStart = append(sequenceStart, len(e.sub))
297		case ")":
298			if len(sequenceStart) == 0 {
299				log.Fatalf("%s: unmatched closing parenthesis", e.name)
300			}
301			for i := sequenceStart[len(sequenceStart)-1]; i < len(e.sub); i++ {
302				e.sub[i].repeat = e.sub[i].repeat || repeat
303			}
304			sequenceStart = sequenceStart[:len(sequenceStart)-1]
305		default:
306			if in(b.info.skipElem, m[1]) {
307			} else if sub, ok := b.index[m[1]]; ok {
308				if !found[sub.name] {
309					e.sub = append(e.sub, struct {
310						e      *element
311						repeat bool
312					}{sub, repeat})
313					found[sub.name] = true
314					b.resolve(sub)
315				}
316			} else if m[1] == "#PCDATA" || m[1] == "ANY" {
317			} else if m[1] != "EMPTY" {
318				log.Fatalf("resolve:%s: element %q not found", e.name, m[1])
319			}
320		}
321		s = s[len(m[0]):]
322	}
323}
324
325// return true if s is contained in set.
326func in(set []string, s string) bool {
327	for _, v := range set {
328		if v == s {
329			return true
330		}
331	}
332	return false
333}
334
335var repl = strings.NewReplacer("-", " ", "_", " ")
336
337// title puts the first character or each character following '_' in title case and
338// removes all occurrences of '_'.
339func title(s string) string {
340	return strings.Replace(strings.Title(repl.Replace(s)), " ", "", -1)
341}
342
343// writeElem generates Go code for a single element, recursively.
344func (b *builder) writeElem(tab int, e *element) {
345	p := func(f string, x ...interface{}) {
346		f = strings.Replace(f, "\n", "\n"+strings.Repeat("\t", tab), -1)
347		fmt.Fprintf(b.w, f, x...)
348	}
349	if len(e.sub) == 0 && len(e.attr) == 0 {
350		p("Common")
351		return
352	}
353	p("struct {")
354	tab++
355	p("\nCommon")
356	for _, attr := range e.attr {
357		if !in(b.info.skipAttr, attr.name) {
358			p("\n%s string `xml:\"%s,attr\"`", title(attr.name), attr.name)
359		}
360	}
361	for _, sub := range e.sub {
362		if in(b.info.predefined, sub.e.name) {
363			p("\n%sElem", sub.e.name)
364			continue
365		}
366		if in(b.info.skipElem, sub.e.name) {
367			continue
368		}
369		p("\n%s ", title(sub.e.name))
370		if sub.repeat {
371			p("[]")
372		}
373		p("*")
374		if in(b.info.top, sub.e.name) {
375			p(title(sub.e.name))
376		} else {
377			b.writeElem(tab, sub.e)
378		}
379		p(" `xml:\"%s\"`", sub.e.name)
380	}
381	tab--
382	p("\n}")
383}
384
385// write generates the Go XML code.
386func (b *builder) write() {
387	for i, name := range b.info.top {
388		e := b.index[name]
389		if e != nil {
390			fmt.Fprintf(b.w, comments[name])
391			name := title(e.name)
392			if i == 0 {
393				name = b.info.root
394			}
395			fmt.Fprintf(b.w, "type %s ", name)
396			b.writeElem(0, e)
397			fmt.Fprint(b.w, "\n")
398		}
399	}
400}
401