1// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// +build ignore
6
7package main
8
9import (
10	"log"
11	"strings"
12	"unicode"
13
14	"golang.org/x/text/internal/gen"
15	"golang.org/x/text/internal/ucd"
16)
17
18// snippet is a slice of data; data is the concatenation of all of the names.
19type snippet struct {
20	offset int
21	length int
22	s      string
23}
24
25func makeTable0EntryDirect(rOffset, rLength, dOffset, dLength int) uint64 {
26	if rOffset >= 1<<bitsRuneOffset {
27		log.Fatalf("makeTable0EntryDirect: rOffset %d is too large", rOffset)
28	}
29	if rLength >= 1<<bitsRuneLength {
30		log.Fatalf("makeTable0EntryDirect: rLength %d is too large", rLength)
31	}
32	if dOffset >= 1<<bitsDataOffset {
33		log.Fatalf("makeTable0EntryDirect: dOffset %d is too large", dOffset)
34	}
35	if dLength >= 1<<bitsRuneLength {
36		log.Fatalf("makeTable0EntryDirect: dLength %d is too large", dLength)
37	}
38	return uint64(rOffset)<<shiftRuneOffset |
39		uint64(rLength)<<shiftRuneLength |
40		uint64(dOffset)<<shiftDataOffset |
41		uint64(dLength)<<shiftDataLength |
42		1 // Direct bit.
43}
44
45func makeTable0EntryIndirect(rOffset, rLength, dBase, t1Offset int) uint64 {
46	if rOffset >= 1<<bitsRuneOffset {
47		log.Fatalf("makeTable0EntryIndirect: rOffset %d is too large", rOffset)
48	}
49	if rLength >= 1<<bitsRuneLength {
50		log.Fatalf("makeTable0EntryIndirect: rLength %d is too large", rLength)
51	}
52	if dBase >= 1<<bitsDataBase {
53		log.Fatalf("makeTable0EntryIndirect: dBase %d is too large", dBase)
54	}
55	if t1Offset >= 1<<bitsTable1Offset {
56		log.Fatalf("makeTable0EntryIndirect: t1Offset %d is too large", t1Offset)
57	}
58	return uint64(rOffset)<<shiftRuneOffset |
59		uint64(rLength)<<shiftRuneLength |
60		uint64(dBase)<<shiftDataBase |
61		uint64(t1Offset)<<shiftTable1Offset |
62		0 // Direct bit.
63}
64
65func makeTable1Entry(x int) uint16 {
66	if x < 0 || 0xffff < x {
67		log.Fatalf("makeTable1Entry: entry %d is out of range", x)
68	}
69	return uint16(x)
70}
71
72var (
73	data     []byte
74	snippets = make([]snippet, 1+unicode.MaxRune)
75)
76
77func main() {
78	gen.Init()
79
80	names, counts := parse()
81	appendRepeatNames(names, counts)
82	appendUniqueNames(names, counts)
83
84	table0, table1 := makeTables()
85
86	gen.Repackage("gen_bits.go", "bits.go", "runenames")
87
88	w := gen.NewCodeWriter()
89	w.WriteVar("table0", table0)
90	w.WriteVar("table1", table1)
91	w.WriteConst("data", string(data))
92	w.WriteGoFile("tables.go", "runenames")
93}
94
95func parse() (names []string, counts map[string]int) {
96	names = make([]string, 1+unicode.MaxRune)
97	counts = map[string]int{}
98	ucd.Parse(gen.OpenUCDFile("UnicodeData.txt"), func(p *ucd.Parser) {
99		r, s := p.Rune(0), p.String(ucd.Name)
100		if s == "" {
101			return
102		}
103		if s[0] == '<' {
104			const first = ", First>"
105			if i := strings.Index(s, first); i >= 0 {
106				s = s[:i] + ">"
107			}
108		}
109		names[r] = s
110		counts[s]++
111	})
112	return names, counts
113}
114
115func appendRepeatNames(names []string, counts map[string]int) {
116	alreadySeen := map[string]snippet{}
117	for r, s := range names {
118		if s == "" || counts[s] == 1 {
119			continue
120		}
121		if s[0] != '<' {
122			log.Fatalf("Repeated name %q does not start with a '<'", s)
123		}
124
125		if z, ok := alreadySeen[s]; ok {
126			snippets[r] = z
127			continue
128		}
129
130		z := snippet{
131			offset: len(data),
132			length: len(s),
133			s:      s,
134		}
135		data = append(data, s...)
136		snippets[r] = z
137		alreadySeen[s] = z
138	}
139}
140
141func appendUniqueNames(names []string, counts map[string]int) {
142	for r, s := range names {
143		if s == "" || counts[s] != 1 {
144			continue
145		}
146		if s[0] == '<' {
147			log.Fatalf("Unique name %q starts with a '<'", s)
148		}
149
150		z := snippet{
151			offset: len(data),
152			length: len(s),
153			s:      s,
154		}
155		data = append(data, s...)
156		snippets[r] = z
157	}
158}
159
160func makeTables() (table0 []uint64, table1 []uint16) {
161	for i := 0; i < len(snippets); {
162		zi := snippets[i]
163		if zi == (snippet{}) {
164			i++
165			continue
166		}
167
168		// Look for repeat names. If we have one, we only need a table0 entry.
169		j := i + 1
170		for ; j < len(snippets) && zi == snippets[j]; j++ {
171		}
172		if j > i+1 {
173			table0 = append(table0, makeTable0EntryDirect(i, j-i, zi.offset, zi.length))
174			i = j
175			continue
176		}
177
178		// Otherwise, we have a run of unique names. We need one table0 entry
179		// and two or more table1 entries.
180		base := zi.offset &^ (1<<dataBaseUnit - 1)
181		t1Offset := len(table1) + 1
182		table1 = append(table1, makeTable1Entry(zi.offset-base))
183		table1 = append(table1, makeTable1Entry(zi.offset+zi.length-base))
184		for ; j < len(snippets) && snippets[j] != (snippet{}); j++ {
185			zj := snippets[j]
186			if data[zj.offset] == '<' {
187				break
188			}
189			table1 = append(table1, makeTable1Entry(zj.offset+zj.length-base))
190		}
191		table0 = append(table0, makeTable0EntryIndirect(i, j-i, base>>dataBaseUnit, t1Offset))
192		i = j
193	}
194	return table0, table1
195}
196