1// Copyright 2014 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package cases
6
7import "golang.org/x/text/transform"
8
9// A context is used for iterating over source bytes, fetching case info and
10// writing to a destination buffer.
11//
12// Casing operations may need more than one rune of context to decide how a rune
13// should be cased. Casing implementations should call checkpoint on context
14// whenever it is known to be safe to return the runes processed so far.
15//
16// It is recommended for implementations to not allow for more than 30 case
17// ignorables as lookahead (analogous to the limit in norm) and to use state if
18// unbounded lookahead is needed for cased runes.
19type context struct {
20	dst, src []byte
21	atEOF    bool
22
23	pDst int // pDst points past the last written rune in dst.
24	pSrc int // pSrc points to the start of the currently scanned rune.
25
26	// checkpoints safe to return in Transform, where nDst <= pDst and nSrc <= pSrc.
27	nDst, nSrc int
28	err        error
29
30	sz   int  // size of current rune
31	info info // case information of currently scanned rune
32
33	// State preserved across calls to Transform.
34	isMidWord bool // false if next cased letter needs to be title-cased.
35}
36
37func (c *context) Reset() {
38	c.isMidWord = false
39}
40
41// ret returns the return values for the Transform method. It checks whether
42// there were insufficient bytes in src to complete and introduces an error
43// accordingly, if necessary.
44func (c *context) ret() (nDst, nSrc int, err error) {
45	if c.err != nil || c.nSrc == len(c.src) {
46		return c.nDst, c.nSrc, c.err
47	}
48	// This point is only reached by mappers if there was no short destination
49	// buffer. This means that the source buffer was exhausted and that c.sz was
50	// set to 0 by next.
51	if c.atEOF && c.pSrc == len(c.src) {
52		return c.pDst, c.pSrc, nil
53	}
54	return c.nDst, c.nSrc, transform.ErrShortSrc
55}
56
57// retSpan returns the return values for the Span method. It checks whether
58// there were insufficient bytes in src to complete and introduces an error
59// accordingly, if necessary.
60func (c *context) retSpan() (n int, err error) {
61	_, nSrc, err := c.ret()
62	return nSrc, err
63}
64
65// checkpoint sets the return value buffer points for Transform to the current
66// positions.
67func (c *context) checkpoint() {
68	if c.err == nil {
69		c.nDst, c.nSrc = c.pDst, c.pSrc+c.sz
70	}
71}
72
73// unreadRune causes the last rune read by next to be reread on the next
74// invocation of next. Only one unreadRune may be called after a call to next.
75func (c *context) unreadRune() {
76	c.sz = 0
77}
78
79func (c *context) next() bool {
80	c.pSrc += c.sz
81	if c.pSrc == len(c.src) || c.err != nil {
82		c.info, c.sz = 0, 0
83		return false
84	}
85	v, sz := trie.lookup(c.src[c.pSrc:])
86	c.info, c.sz = info(v), sz
87	if c.sz == 0 {
88		if c.atEOF {
89			// A zero size means we have an incomplete rune. If we are atEOF,
90			// this means it is an illegal rune, which we will consume one
91			// byte at a time.
92			c.sz = 1
93		} else {
94			c.err = transform.ErrShortSrc
95			return false
96		}
97	}
98	return true
99}
100
101// writeBytes adds bytes to dst.
102func (c *context) writeBytes(b []byte) bool {
103	if len(c.dst)-c.pDst < len(b) {
104		c.err = transform.ErrShortDst
105		return false
106	}
107	// This loop is faster than using copy.
108	for _, ch := range b {
109		c.dst[c.pDst] = ch
110		c.pDst++
111	}
112	return true
113}
114
115// writeString writes the given string to dst.
116func (c *context) writeString(s string) bool {
117	if len(c.dst)-c.pDst < len(s) {
118		c.err = transform.ErrShortDst
119		return false
120	}
121	// This loop is faster than using copy.
122	for i := 0; i < len(s); i++ {
123		c.dst[c.pDst] = s[i]
124		c.pDst++
125	}
126	return true
127}
128
129// copy writes the current rune to dst.
130func (c *context) copy() bool {
131	return c.writeBytes(c.src[c.pSrc : c.pSrc+c.sz])
132}
133
134// copyXOR copies the current rune to dst and modifies it by applying the XOR
135// pattern of the case info. It is the responsibility of the caller to ensure
136// that this is a rune with a XOR pattern defined.
137func (c *context) copyXOR() bool {
138	if !c.copy() {
139		return false
140	}
141	if c.info&xorIndexBit == 0 {
142		// Fast path for 6-bit XOR pattern, which covers most cases.
143		c.dst[c.pDst-1] ^= byte(c.info >> xorShift)
144	} else {
145		// Interpret XOR bits as an index.
146		// TODO: test performance for unrolling this loop. Verify that we have
147		// at least two bytes and at most three.
148		idx := c.info >> xorShift
149		for p := c.pDst - 1; ; p-- {
150			c.dst[p] ^= xorData[idx]
151			idx--
152			if xorData[idx] == 0 {
153				break
154			}
155		}
156	}
157	return true
158}
159
160// hasPrefix returns true if src[pSrc:] starts with the given string.
161func (c *context) hasPrefix(s string) bool {
162	b := c.src[c.pSrc:]
163	if len(b) < len(s) {
164		return false
165	}
166	for i, c := range b[:len(s)] {
167		if c != s[i] {
168			return false
169		}
170	}
171	return true
172}
173
174// caseType returns an info with only the case bits, normalized to either
175// cLower, cUpper, cTitle or cUncased.
176func (c *context) caseType() info {
177	cm := c.info & 0x7
178	if cm < 4 {
179		return cm
180	}
181	if cm >= cXORCase {
182		// xor the last bit of the rune with the case type bits.
183		b := c.src[c.pSrc+c.sz-1]
184		return info(b&1) ^ cm&0x3
185	}
186	if cm == cIgnorableCased {
187		return cLower
188	}
189	return cUncased
190}
191
192// lower writes the lowercase version of the current rune to dst.
193func lower(c *context) bool {
194	ct := c.caseType()
195	if c.info&hasMappingMask == 0 || ct == cLower {
196		return c.copy()
197	}
198	if c.info&exceptionBit == 0 {
199		return c.copyXOR()
200	}
201	e := exceptions[c.info>>exceptionShift:]
202	offset := 2 + e[0]&lengthMask // size of header + fold string
203	if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange {
204		return c.writeString(e[offset : offset+nLower])
205	}
206	return c.copy()
207}
208
209func isLower(c *context) bool {
210	ct := c.caseType()
211	if c.info&hasMappingMask == 0 || ct == cLower {
212		return true
213	}
214	if c.info&exceptionBit == 0 {
215		c.err = transform.ErrEndOfSpan
216		return false
217	}
218	e := exceptions[c.info>>exceptionShift:]
219	if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange {
220		c.err = transform.ErrEndOfSpan
221		return false
222	}
223	return true
224}
225
226// upper writes the uppercase version of the current rune to dst.
227func upper(c *context) bool {
228	ct := c.caseType()
229	if c.info&hasMappingMask == 0 || ct == cUpper {
230		return c.copy()
231	}
232	if c.info&exceptionBit == 0 {
233		return c.copyXOR()
234	}
235	e := exceptions[c.info>>exceptionShift:]
236	offset := 2 + e[0]&lengthMask // size of header + fold string
237	// Get length of first special case mapping.
238	n := (e[1] >> lengthBits) & lengthMask
239	if ct == cTitle {
240		// The first special case mapping is for lower. Set n to the second.
241		if n == noChange {
242			n = 0
243		}
244		n, e = e[1]&lengthMask, e[n:]
245	}
246	if n != noChange {
247		return c.writeString(e[offset : offset+n])
248	}
249	return c.copy()
250}
251
252// isUpper writes the isUppercase version of the current rune to dst.
253func isUpper(c *context) bool {
254	ct := c.caseType()
255	if c.info&hasMappingMask == 0 || ct == cUpper {
256		return true
257	}
258	if c.info&exceptionBit == 0 {
259		c.err = transform.ErrEndOfSpan
260		return false
261	}
262	e := exceptions[c.info>>exceptionShift:]
263	// Get length of first special case mapping.
264	n := (e[1] >> lengthBits) & lengthMask
265	if ct == cTitle {
266		n = e[1] & lengthMask
267	}
268	if n != noChange {
269		c.err = transform.ErrEndOfSpan
270		return false
271	}
272	return true
273}
274
275// title writes the title case version of the current rune to dst.
276func title(c *context) bool {
277	ct := c.caseType()
278	if c.info&hasMappingMask == 0 || ct == cTitle {
279		return c.copy()
280	}
281	if c.info&exceptionBit == 0 {
282		if ct == cLower {
283			return c.copyXOR()
284		}
285		return c.copy()
286	}
287	// Get the exception data.
288	e := exceptions[c.info>>exceptionShift:]
289	offset := 2 + e[0]&lengthMask // size of header + fold string
290
291	nFirst := (e[1] >> lengthBits) & lengthMask
292	if nTitle := e[1] & lengthMask; nTitle != noChange {
293		if nFirst != noChange {
294			e = e[nFirst:]
295		}
296		return c.writeString(e[offset : offset+nTitle])
297	}
298	if ct == cLower && nFirst != noChange {
299		// Use the uppercase version instead.
300		return c.writeString(e[offset : offset+nFirst])
301	}
302	// Already in correct case.
303	return c.copy()
304}
305
306// isTitle reports whether the current rune is in title case.
307func isTitle(c *context) bool {
308	ct := c.caseType()
309	if c.info&hasMappingMask == 0 || ct == cTitle {
310		return true
311	}
312	if c.info&exceptionBit == 0 {
313		if ct == cLower {
314			c.err = transform.ErrEndOfSpan
315			return false
316		}
317		return true
318	}
319	// Get the exception data.
320	e := exceptions[c.info>>exceptionShift:]
321	if nTitle := e[1] & lengthMask; nTitle != noChange {
322		c.err = transform.ErrEndOfSpan
323		return false
324	}
325	nFirst := (e[1] >> lengthBits) & lengthMask
326	if ct == cLower && nFirst != noChange {
327		c.err = transform.ErrEndOfSpan
328		return false
329	}
330	return true
331}
332
333// foldFull writes the foldFull version of the current rune to dst.
334func foldFull(c *context) bool {
335	if c.info&hasMappingMask == 0 {
336		return c.copy()
337	}
338	ct := c.caseType()
339	if c.info&exceptionBit == 0 {
340		if ct != cLower || c.info&inverseFoldBit != 0 {
341			return c.copyXOR()
342		}
343		return c.copy()
344	}
345	e := exceptions[c.info>>exceptionShift:]
346	n := e[0] & lengthMask
347	if n == 0 {
348		if ct == cLower {
349			return c.copy()
350		}
351		n = (e[1] >> lengthBits) & lengthMask
352	}
353	return c.writeString(e[2 : 2+n])
354}
355
356// isFoldFull reports whether the current run is mapped to foldFull
357func isFoldFull(c *context) bool {
358	if c.info&hasMappingMask == 0 {
359		return true
360	}
361	ct := c.caseType()
362	if c.info&exceptionBit == 0 {
363		if ct != cLower || c.info&inverseFoldBit != 0 {
364			c.err = transform.ErrEndOfSpan
365			return false
366		}
367		return true
368	}
369	e := exceptions[c.info>>exceptionShift:]
370	n := e[0] & lengthMask
371	if n == 0 && ct == cLower {
372		return true
373	}
374	c.err = transform.ErrEndOfSpan
375	return false
376}
377