1// Code generated by running "go run gen.go -core" in golang.org/x/text. DO NOT EDIT.
2
3// Copyright 2013 The Go Authors. All rights reserved.
4// Use of this source code is governed by a BSD-style
5// license that can be found in the LICENSE file.
6
7// Package transform provides reader and writer wrappers that transform the
8// bytes passing through as well as various transformations. Example
9// transformations provided by other packages include normalization and
10// conversion between character sets.
11package transform // import "golang_org/x/text/transform"
12
13import (
14	"bytes"
15	"errors"
16	"io"
17	"unicode/utf8"
18)
19
20var (
21	// ErrShortDst means that the destination buffer was too short to
22	// receive all of the transformed bytes.
23	ErrShortDst = errors.New("transform: short destination buffer")
24
25	// ErrShortSrc means that the source buffer has insufficient data to
26	// complete the transformation.
27	ErrShortSrc = errors.New("transform: short source buffer")
28
29	// ErrEndOfSpan means that the input and output (the transformed input)
30	// are not identical.
31	ErrEndOfSpan = errors.New("transform: input and output are not identical")
32
33	// errInconsistentByteCount means that Transform returned success (nil
34	// error) but also returned nSrc inconsistent with the src argument.
35	errInconsistentByteCount = errors.New("transform: inconsistent byte count returned")
36
37	// errShortInternal means that an internal buffer is not large enough
38	// to make progress and the Transform operation must be aborted.
39	errShortInternal = errors.New("transform: short internal buffer")
40)
41
42// Transformer transforms bytes.
43type Transformer interface {
44	// Transform writes to dst the transformed bytes read from src, and
45	// returns the number of dst bytes written and src bytes read. The
46	// atEOF argument tells whether src represents the last bytes of the
47	// input.
48	//
49	// Callers should always process the nDst bytes produced and account
50	// for the nSrc bytes consumed before considering the error err.
51	//
52	// A nil error means that all of the transformed bytes (whether freshly
53	// transformed from src or left over from previous Transform calls)
54	// were written to dst. A nil error can be returned regardless of
55	// whether atEOF is true. If err is nil then nSrc must equal len(src);
56	// the converse is not necessarily true.
57	//
58	// ErrShortDst means that dst was too short to receive all of the
59	// transformed bytes. ErrShortSrc means that src had insufficient data
60	// to complete the transformation. If both conditions apply, then
61	// either error may be returned. Other than the error conditions listed
62	// here, implementations are free to report other errors that arise.
63	Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error)
64
65	// Reset resets the state and allows a Transformer to be reused.
66	Reset()
67}
68
69// SpanningTransformer extends the Transformer interface with a Span method
70// that determines how much of the input already conforms to the Transformer.
71type SpanningTransformer interface {
72	Transformer
73
74	// Span returns a position in src such that transforming src[:n] results in
75	// identical output src[:n] for these bytes. It does not necessarily return
76	// the largest such n. The atEOF argument tells whether src represents the
77	// last bytes of the input.
78	//
79	// Callers should always account for the n bytes consumed before
80	// considering the error err.
81	//
82	// A nil error means that all input bytes are known to be identical to the
83	// output produced by the Transformer. A nil error can be be returned
84	// regardless of whether atEOF is true. If err is nil, then then n must
85	// equal len(src); the converse is not necessarily true.
86	//
87	// ErrEndOfSpan means that the Transformer output may differ from the
88	// input after n bytes. Note that n may be len(src), meaning that the output
89	// would contain additional bytes after otherwise identical output.
90	// ErrShortSrc means that src had insufficient data to determine whether the
91	// remaining bytes would change. Other than the error conditions listed
92	// here, implementations are free to report other errors that arise.
93	//
94	// Calling Span can modify the Transformer state as a side effect. In
95	// effect, it does the transformation just as calling Transform would, only
96	// without copying to a destination buffer and only up to a point it can
97	// determine the input and output bytes are the same. This is obviously more
98	// limited than calling Transform, but can be more efficient in terms of
99	// copying and allocating buffers. Calls to Span and Transform may be
100	// interleaved.
101	Span(src []byte, atEOF bool) (n int, err error)
102}
103
104// NopResetter can be embedded by implementations of Transformer to add a nop
105// Reset method.
106type NopResetter struct{}
107
108// Reset implements the Reset method of the Transformer interface.
109func (NopResetter) Reset() {}
110
111// Reader wraps another io.Reader by transforming the bytes read.
112type Reader struct {
113	r   io.Reader
114	t   Transformer
115	err error
116
117	// dst[dst0:dst1] contains bytes that have been transformed by t but
118	// not yet copied out via Read.
119	dst        []byte
120	dst0, dst1 int
121
122	// src[src0:src1] contains bytes that have been read from r but not
123	// yet transformed through t.
124	src        []byte
125	src0, src1 int
126
127	// transformComplete is whether the transformation is complete,
128	// regardless of whether or not it was successful.
129	transformComplete bool
130}
131
132const defaultBufSize = 4096
133
134// NewReader returns a new Reader that wraps r by transforming the bytes read
135// via t. It calls Reset on t.
136func NewReader(r io.Reader, t Transformer) *Reader {
137	t.Reset()
138	return &Reader{
139		r:   r,
140		t:   t,
141		dst: make([]byte, defaultBufSize),
142		src: make([]byte, defaultBufSize),
143	}
144}
145
146// Read implements the io.Reader interface.
147func (r *Reader) Read(p []byte) (int, error) {
148	n, err := 0, error(nil)
149	for {
150		// Copy out any transformed bytes and return the final error if we are done.
151		if r.dst0 != r.dst1 {
152			n = copy(p, r.dst[r.dst0:r.dst1])
153			r.dst0 += n
154			if r.dst0 == r.dst1 && r.transformComplete {
155				return n, r.err
156			}
157			return n, nil
158		} else if r.transformComplete {
159			return 0, r.err
160		}
161
162		// Try to transform some source bytes, or to flush the transformer if we
163		// are out of source bytes. We do this even if r.r.Read returned an error.
164		// As the io.Reader documentation says, "process the n > 0 bytes returned
165		// before considering the error".
166		if r.src0 != r.src1 || r.err != nil {
167			r.dst0 = 0
168			r.dst1, n, err = r.t.Transform(r.dst, r.src[r.src0:r.src1], r.err == io.EOF)
169			r.src0 += n
170
171			switch {
172			case err == nil:
173				if r.src0 != r.src1 {
174					r.err = errInconsistentByteCount
175				}
176				// The Transform call was successful; we are complete if we
177				// cannot read more bytes into src.
178				r.transformComplete = r.err != nil
179				continue
180			case err == ErrShortDst && (r.dst1 != 0 || n != 0):
181				// Make room in dst by copying out, and try again.
182				continue
183			case err == ErrShortSrc && r.src1-r.src0 != len(r.src) && r.err == nil:
184				// Read more bytes into src via the code below, and try again.
185			default:
186				r.transformComplete = true
187				// The reader error (r.err) takes precedence over the
188				// transformer error (err) unless r.err is nil or io.EOF.
189				if r.err == nil || r.err == io.EOF {
190					r.err = err
191				}
192				continue
193			}
194		}
195
196		// Move any untransformed source bytes to the start of the buffer
197		// and read more bytes.
198		if r.src0 != 0 {
199			r.src0, r.src1 = 0, copy(r.src, r.src[r.src0:r.src1])
200		}
201		n, r.err = r.r.Read(r.src[r.src1:])
202		r.src1 += n
203	}
204}
205
206// TODO: implement ReadByte (and ReadRune??).
207
208// Writer wraps another io.Writer by transforming the bytes read.
209// The user needs to call Close to flush unwritten bytes that may
210// be buffered.
211type Writer struct {
212	w   io.Writer
213	t   Transformer
214	dst []byte
215
216	// src[:n] contains bytes that have not yet passed through t.
217	src []byte
218	n   int
219}
220
221// NewWriter returns a new Writer that wraps w by transforming the bytes written
222// via t. It calls Reset on t.
223func NewWriter(w io.Writer, t Transformer) *Writer {
224	t.Reset()
225	return &Writer{
226		w:   w,
227		t:   t,
228		dst: make([]byte, defaultBufSize),
229		src: make([]byte, defaultBufSize),
230	}
231}
232
233// Write implements the io.Writer interface. If there are not enough
234// bytes available to complete a Transform, the bytes will be buffered
235// for the next write. Call Close to convert the remaining bytes.
236func (w *Writer) Write(data []byte) (n int, err error) {
237	src := data
238	if w.n > 0 {
239		// Append bytes from data to the last remainder.
240		// TODO: limit the amount copied on first try.
241		n = copy(w.src[w.n:], data)
242		w.n += n
243		src = w.src[:w.n]
244	}
245	for {
246		nDst, nSrc, err := w.t.Transform(w.dst, src, false)
247		if _, werr := w.w.Write(w.dst[:nDst]); werr != nil {
248			return n, werr
249		}
250		src = src[nSrc:]
251		if w.n == 0 {
252			n += nSrc
253		} else if len(src) <= n {
254			// Enough bytes from w.src have been consumed. We make src point
255			// to data instead to reduce the copying.
256			w.n = 0
257			n -= len(src)
258			src = data[n:]
259			if n < len(data) && (err == nil || err == ErrShortSrc) {
260				continue
261			}
262		}
263		switch err {
264		case ErrShortDst:
265			// This error is okay as long as we are making progress.
266			if nDst > 0 || nSrc > 0 {
267				continue
268			}
269		case ErrShortSrc:
270			if len(src) < len(w.src) {
271				m := copy(w.src, src)
272				// If w.n > 0, bytes from data were already copied to w.src and n
273				// was already set to the number of bytes consumed.
274				if w.n == 0 {
275					n += m
276				}
277				w.n = m
278				err = nil
279			} else if nDst > 0 || nSrc > 0 {
280				// Not enough buffer to store the remainder. Keep processing as
281				// long as there is progress. Without this case, transforms that
282				// require a lookahead larger than the buffer may result in an
283				// error. This is not something one may expect to be common in
284				// practice, but it may occur when buffers are set to small
285				// sizes during testing.
286				continue
287			}
288		case nil:
289			if w.n > 0 {
290				err = errInconsistentByteCount
291			}
292		}
293		return n, err
294	}
295}
296
297// Close implements the io.Closer interface.
298func (w *Writer) Close() error {
299	src := w.src[:w.n]
300	for {
301		nDst, nSrc, err := w.t.Transform(w.dst, src, true)
302		if _, werr := w.w.Write(w.dst[:nDst]); werr != nil {
303			return werr
304		}
305		if err != ErrShortDst {
306			return err
307		}
308		src = src[nSrc:]
309	}
310}
311
312type nop struct{ NopResetter }
313
314func (nop) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
315	n := copy(dst, src)
316	if n < len(src) {
317		err = ErrShortDst
318	}
319	return n, n, err
320}
321
322func (nop) Span(src []byte, atEOF bool) (n int, err error) {
323	return len(src), nil
324}
325
326type discard struct{ NopResetter }
327
328func (discard) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
329	return 0, len(src), nil
330}
331
332var (
333	// Discard is a Transformer for which all Transform calls succeed
334	// by consuming all bytes and writing nothing.
335	Discard Transformer = discard{}
336
337	// Nop is a SpanningTransformer that copies src to dst.
338	Nop SpanningTransformer = nop{}
339)
340
341// chain is a sequence of links. A chain with N Transformers has N+1 links and
342// N+1 buffers. Of those N+1 buffers, the first and last are the src and dst
343// buffers given to chain.Transform and the middle N-1 buffers are intermediate
344// buffers owned by the chain. The i'th link transforms bytes from the i'th
345// buffer chain.link[i].b at read offset chain.link[i].p to the i+1'th buffer
346// chain.link[i+1].b at write offset chain.link[i+1].n, for i in [0, N).
347type chain struct {
348	link []link
349	err  error
350	// errStart is the index at which the error occurred plus 1. Processing
351	// errStart at this level at the next call to Transform. As long as
352	// errStart > 0, chain will not consume any more source bytes.
353	errStart int
354}
355
356func (c *chain) fatalError(errIndex int, err error) {
357	if i := errIndex + 1; i > c.errStart {
358		c.errStart = i
359		c.err = err
360	}
361}
362
363type link struct {
364	t Transformer
365	// b[p:n] holds the bytes to be transformed by t.
366	b []byte
367	p int
368	n int
369}
370
371func (l *link) src() []byte {
372	return l.b[l.p:l.n]
373}
374
375func (l *link) dst() []byte {
376	return l.b[l.n:]
377}
378
379// Chain returns a Transformer that applies t in sequence.
380func Chain(t ...Transformer) Transformer {
381	if len(t) == 0 {
382		return nop{}
383	}
384	c := &chain{link: make([]link, len(t)+1)}
385	for i, tt := range t {
386		c.link[i].t = tt
387	}
388	// Allocate intermediate buffers.
389	b := make([][defaultBufSize]byte, len(t)-1)
390	for i := range b {
391		c.link[i+1].b = b[i][:]
392	}
393	return c
394}
395
396// Reset resets the state of Chain. It calls Reset on all the Transformers.
397func (c *chain) Reset() {
398	for i, l := range c.link {
399		if l.t != nil {
400			l.t.Reset()
401		}
402		c.link[i].p, c.link[i].n = 0, 0
403	}
404}
405
406// TODO: make chain use Span (is going to be fun to implement!)
407
408// Transform applies the transformers of c in sequence.
409func (c *chain) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
410	// Set up src and dst in the chain.
411	srcL := &c.link[0]
412	dstL := &c.link[len(c.link)-1]
413	srcL.b, srcL.p, srcL.n = src, 0, len(src)
414	dstL.b, dstL.n = dst, 0
415	var lastFull, needProgress bool // for detecting progress
416
417	// i is the index of the next Transformer to apply, for i in [low, high].
418	// low is the lowest index for which c.link[low] may still produce bytes.
419	// high is the highest index for which c.link[high] has a Transformer.
420	// The error returned by Transform determines whether to increase or
421	// decrease i. We try to completely fill a buffer before converting it.
422	for low, i, high := c.errStart, c.errStart, len(c.link)-2; low <= i && i <= high; {
423		in, out := &c.link[i], &c.link[i+1]
424		nDst, nSrc, err0 := in.t.Transform(out.dst(), in.src(), atEOF && low == i)
425		out.n += nDst
426		in.p += nSrc
427		if i > 0 && in.p == in.n {
428			in.p, in.n = 0, 0
429		}
430		needProgress, lastFull = lastFull, false
431		switch err0 {
432		case ErrShortDst:
433			// Process the destination buffer next. Return if we are already
434			// at the high index.
435			if i == high {
436				return dstL.n, srcL.p, ErrShortDst
437			}
438			if out.n != 0 {
439				i++
440				// If the Transformer at the next index is not able to process any
441				// source bytes there is nothing that can be done to make progress
442				// and the bytes will remain unprocessed. lastFull is used to
443				// detect this and break out of the loop with a fatal error.
444				lastFull = true
445				continue
446			}
447			// The destination buffer was too small, but is completely empty.
448			// Return a fatal error as this transformation can never complete.
449			c.fatalError(i, errShortInternal)
450		case ErrShortSrc:
451			if i == 0 {
452				// Save ErrShortSrc in err. All other errors take precedence.
453				err = ErrShortSrc
454				break
455			}
456			// Source bytes were depleted before filling up the destination buffer.
457			// Verify we made some progress, move the remaining bytes to the errStart
458			// and try to get more source bytes.
459			if needProgress && nSrc == 0 || in.n-in.p == len(in.b) {
460				// There were not enough source bytes to proceed while the source
461				// buffer cannot hold any more bytes. Return a fatal error as this
462				// transformation can never complete.
463				c.fatalError(i, errShortInternal)
464				break
465			}
466			// in.b is an internal buffer and we can make progress.
467			in.p, in.n = 0, copy(in.b, in.src())
468			fallthrough
469		case nil:
470			// if i == low, we have depleted the bytes at index i or any lower levels.
471			// In that case we increase low and i. In all other cases we decrease i to
472			// fetch more bytes before proceeding to the next index.
473			if i > low {
474				i--
475				continue
476			}
477		default:
478			c.fatalError(i, err0)
479		}
480		// Exhausted level low or fatal error: increase low and continue
481		// to process the bytes accepted so far.
482		i++
483		low = i
484	}
485
486	// If c.errStart > 0, this means we found a fatal error.  We will clear
487	// all upstream buffers. At this point, no more progress can be made
488	// downstream, as Transform would have bailed while handling ErrShortDst.
489	if c.errStart > 0 {
490		for i := 1; i < c.errStart; i++ {
491			c.link[i].p, c.link[i].n = 0, 0
492		}
493		err, c.errStart, c.err = c.err, 0, nil
494	}
495	return dstL.n, srcL.p, err
496}
497
498// Deprecated: use runes.Remove instead.
499func RemoveFunc(f func(r rune) bool) Transformer {
500	return removeF(f)
501}
502
503type removeF func(r rune) bool
504
505func (removeF) Reset() {}
506
507// Transform implements the Transformer interface.
508func (t removeF) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
509	for r, sz := rune(0), 0; len(src) > 0; src = src[sz:] {
510
511		if r = rune(src[0]); r < utf8.RuneSelf {
512			sz = 1
513		} else {
514			r, sz = utf8.DecodeRune(src)
515
516			if sz == 1 {
517				// Invalid rune.
518				if !atEOF && !utf8.FullRune(src) {
519					err = ErrShortSrc
520					break
521				}
522				// We replace illegal bytes with RuneError. Not doing so might
523				// otherwise turn a sequence of invalid UTF-8 into valid UTF-8.
524				// The resulting byte sequence may subsequently contain runes
525				// for which t(r) is true that were passed unnoticed.
526				if !t(r) {
527					if nDst+3 > len(dst) {
528						err = ErrShortDst
529						break
530					}
531					nDst += copy(dst[nDst:], "\uFFFD")
532				}
533				nSrc++
534				continue
535			}
536		}
537
538		if !t(r) {
539			if nDst+sz > len(dst) {
540				err = ErrShortDst
541				break
542			}
543			nDst += copy(dst[nDst:], src[:sz])
544		}
545		nSrc += sz
546	}
547	return
548}
549
550// grow returns a new []byte that is longer than b, and copies the first n bytes
551// of b to the start of the new slice.
552func grow(b []byte, n int) []byte {
553	m := len(b)
554	if m <= 32 {
555		m = 64
556	} else if m <= 256 {
557		m *= 2
558	} else {
559		m += m >> 1
560	}
561	buf := make([]byte, m)
562	copy(buf, b[:n])
563	return buf
564}
565
566const initialBufSize = 128
567
568// String returns a string with the result of converting s[:n] using t, where
569// n <= len(s). If err == nil, n will be len(s). It calls Reset on t.
570func String(t Transformer, s string) (result string, n int, err error) {
571	t.Reset()
572	if s == "" {
573		// Fast path for the common case for empty input. Results in about a
574		// 86% reduction of running time for BenchmarkStringLowerEmpty.
575		if _, _, err := t.Transform(nil, nil, true); err == nil {
576			return "", 0, nil
577		}
578	}
579
580	// Allocate only once. Note that both dst and src escape when passed to
581	// Transform.
582	buf := [2 * initialBufSize]byte{}
583	dst := buf[:initialBufSize:initialBufSize]
584	src := buf[initialBufSize : 2*initialBufSize]
585
586	// The input string s is transformed in multiple chunks (starting with a
587	// chunk size of initialBufSize). nDst and nSrc are per-chunk (or
588	// per-Transform-call) indexes, pDst and pSrc are overall indexes.
589	nDst, nSrc := 0, 0
590	pDst, pSrc := 0, 0
591
592	// pPrefix is the length of a common prefix: the first pPrefix bytes of the
593	// result will equal the first pPrefix bytes of s. It is not guaranteed to
594	// be the largest such value, but if pPrefix, len(result) and len(s) are
595	// all equal after the final transform (i.e. calling Transform with atEOF
596	// being true returned nil error) then we don't need to allocate a new
597	// result string.
598	pPrefix := 0
599	for {
600		// Invariant: pDst == pPrefix && pSrc == pPrefix.
601
602		n := copy(src, s[pSrc:])
603		nDst, nSrc, err = t.Transform(dst, src[:n], pSrc+n == len(s))
604		pDst += nDst
605		pSrc += nSrc
606
607		// TODO:  let transformers implement an optional Spanner interface, akin
608		// to norm's QuickSpan. This would even allow us to avoid any allocation.
609		if !bytes.Equal(dst[:nDst], src[:nSrc]) {
610			break
611		}
612		pPrefix = pSrc
613		if err == ErrShortDst {
614			// A buffer can only be short if a transformer modifies its input.
615			break
616		} else if err == ErrShortSrc {
617			if nSrc == 0 {
618				// No progress was made.
619				break
620			}
621			// Equal so far and !atEOF, so continue checking.
622		} else if err != nil || pPrefix == len(s) {
623			return string(s[:pPrefix]), pPrefix, err
624		}
625	}
626	// Post-condition: pDst == pPrefix + nDst && pSrc == pPrefix + nSrc.
627
628	// We have transformed the first pSrc bytes of the input s to become pDst
629	// transformed bytes. Those transformed bytes are discontiguous: the first
630	// pPrefix of them equal s[:pPrefix] and the last nDst of them equal
631	// dst[:nDst]. We copy them around, into a new dst buffer if necessary, so
632	// that they become one contiguous slice: dst[:pDst].
633	if pPrefix != 0 {
634		newDst := dst
635		if pDst > len(newDst) {
636			newDst = make([]byte, len(s)+nDst-nSrc)
637		}
638		copy(newDst[pPrefix:pDst], dst[:nDst])
639		copy(newDst[:pPrefix], s[:pPrefix])
640		dst = newDst
641	}
642
643	// Prevent duplicate Transform calls with atEOF being true at the end of
644	// the input. Also return if we have an unrecoverable error.
645	if (err == nil && pSrc == len(s)) ||
646		(err != nil && err != ErrShortDst && err != ErrShortSrc) {
647		return string(dst[:pDst]), pSrc, err
648	}
649
650	// Transform the remaining input, growing dst and src buffers as necessary.
651	for {
652		n := copy(src, s[pSrc:])
653		nDst, nSrc, err := t.Transform(dst[pDst:], src[:n], pSrc+n == len(s))
654		pDst += nDst
655		pSrc += nSrc
656
657		// If we got ErrShortDst or ErrShortSrc, do not grow as long as we can
658		// make progress. This may avoid excessive allocations.
659		if err == ErrShortDst {
660			if nDst == 0 {
661				dst = grow(dst, pDst)
662			}
663		} else if err == ErrShortSrc {
664			if nSrc == 0 {
665				src = grow(src, 0)
666			}
667		} else if err != nil || pSrc == len(s) {
668			return string(dst[:pDst]), pSrc, err
669		}
670	}
671}
672
673// Bytes returns a new byte slice with the result of converting b[:n] using t,
674// where n <= len(b). If err == nil, n will be len(b). It calls Reset on t.
675func Bytes(t Transformer, b []byte) (result []byte, n int, err error) {
676	return doAppend(t, 0, make([]byte, len(b)), b)
677}
678
679// Append appends the result of converting src[:n] using t to dst, where
680// n <= len(src), If err == nil, n will be len(src). It calls Reset on t.
681func Append(t Transformer, dst, src []byte) (result []byte, n int, err error) {
682	if len(dst) == cap(dst) {
683		n := len(src) + len(dst) // It is okay for this to be 0.
684		b := make([]byte, n)
685		dst = b[:copy(b, dst)]
686	}
687	return doAppend(t, len(dst), dst[:cap(dst)], src)
688}
689
690func doAppend(t Transformer, pDst int, dst, src []byte) (result []byte, n int, err error) {
691	t.Reset()
692	pSrc := 0
693	for {
694		nDst, nSrc, err := t.Transform(dst[pDst:], src[pSrc:], true)
695		pDst += nDst
696		pSrc += nSrc
697		if err != ErrShortDst {
698			return dst[:pDst], pSrc, err
699		}
700
701		// Grow the destination buffer, but do not grow as long as we can make
702		// progress. This may avoid excessive allocations.
703		if nDst == 0 {
704			dst = grow(dst, pDst)
705		}
706	}
707}
708