1// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:generate go run gen.go gen_trieval.go gen_ranges.go
6
7// Package bidi contains functionality for bidirectional text support.
8//
9// See https://www.unicode.org/reports/tr9.
10//
11// NOTE: UNDER CONSTRUCTION. This API may change in backwards incompatible ways
12// and without notice.
13package bidi // import "golang.org/x/text/unicode/bidi"
14
15// TODO
16// - Transformer for reordering?
17// - Transformer (validator, really) for Bidi Rule.
18
19import (
20	"bytes"
21)
22
23// This API tries to avoid dealing with embedding levels for now. Under the hood
24// these will be computed, but the question is to which extent the user should
25// know they exist. We should at some point allow the user to specify an
26// embedding hierarchy, though.
27
28// A Direction indicates the overall flow of text.
29type Direction int
30
31const (
32	// LeftToRight indicates the text contains no right-to-left characters and
33	// that either there are some left-to-right characters or the option
34	// DefaultDirection(LeftToRight) was passed.
35	LeftToRight Direction = iota
36
37	// RightToLeft indicates the text contains no left-to-right characters and
38	// that either there are some right-to-left characters or the option
39	// DefaultDirection(RightToLeft) was passed.
40	RightToLeft
41
42	// Mixed indicates text contains both left-to-right and right-to-left
43	// characters.
44	Mixed
45
46	// Neutral means that text contains no left-to-right and right-to-left
47	// characters and that no default direction has been set.
48	Neutral
49)
50
51type options struct {
52	defaultDirection Direction
53}
54
55// An Option is an option for Bidi processing.
56type Option func(*options)
57
58// ICU allows the user to define embedding levels. This may be used, for example,
59// to use hierarchical structure of markup languages to define embeddings.
60// The following option may be a way to expose this functionality in this API.
61// // LevelFunc sets a function that associates nesting levels with the given text.
62// // The levels function will be called with monotonically increasing values for p.
63// func LevelFunc(levels func(p int) int) Option {
64// 	panic("unimplemented")
65// }
66
67// DefaultDirection sets the default direction for a Paragraph. The direction is
68// overridden if the text contains directional characters.
69func DefaultDirection(d Direction) Option {
70	return func(opts *options) {
71		opts.defaultDirection = d
72	}
73}
74
75// A Paragraph holds a single Paragraph for Bidi processing.
76type Paragraph struct {
77	p          []byte
78	o          Ordering
79	opts       []Option
80	types      []Class
81	pairTypes  []bracketType
82	pairValues []rune
83	runes      []rune
84	options    options
85}
86
87// Initialize the p.pairTypes, p.pairValues and p.types from the input previously
88// set by p.SetBytes() or p.SetString(). Also limit the input up to (and including) a paragraph
89// separator (bidi class B).
90//
91// The function p.Order() needs these values to be set, so this preparation could be postponed.
92// But since the SetBytes and SetStrings functions return the length of the input up to the paragraph
93// separator, the whole input needs to be processed anyway and should not be done twice.
94//
95// The function has the same return values as SetBytes() / SetString()
96func (p *Paragraph) prepareInput() (n int, err error) {
97	p.runes = bytes.Runes(p.p)
98	bytecount := 0
99	// clear slices from previous SetString or SetBytes
100	p.pairTypes = nil
101	p.pairValues = nil
102	p.types = nil
103
104	for _, r := range p.runes {
105		props, i := LookupRune(r)
106		bytecount += i
107		cls := props.Class()
108		if cls == B {
109			return bytecount, nil
110		}
111		p.types = append(p.types, cls)
112		if props.IsOpeningBracket() {
113			p.pairTypes = append(p.pairTypes, bpOpen)
114			p.pairValues = append(p.pairValues, r)
115		} else if props.IsBracket() {
116			// this must be a closing bracket,
117			// since IsOpeningBracket is not true
118			p.pairTypes = append(p.pairTypes, bpClose)
119			p.pairValues = append(p.pairValues, r)
120		} else {
121			p.pairTypes = append(p.pairTypes, bpNone)
122			p.pairValues = append(p.pairValues, 0)
123		}
124	}
125	return bytecount, nil
126}
127
128// SetBytes configures p for the given paragraph text. It replaces text
129// previously set by SetBytes or SetString. If b contains a paragraph separator
130// it will only process the first paragraph and report the number of bytes
131// consumed from b including this separator. Error may be non-nil if options are
132// given.
133func (p *Paragraph) SetBytes(b []byte, opts ...Option) (n int, err error) {
134	p.p = b
135	p.opts = opts
136	return p.prepareInput()
137}
138
139// SetString configures s for the given paragraph text. It replaces text
140// previously set by SetBytes or SetString. If s contains a paragraph separator
141// it will only process the first paragraph and report the number of bytes
142// consumed from s including this separator. Error may be non-nil if options are
143// given.
144func (p *Paragraph) SetString(s string, opts ...Option) (n int, err error) {
145	p.p = []byte(s)
146	p.opts = opts
147	return p.prepareInput()
148}
149
150// IsLeftToRight reports whether the principle direction of rendering for this
151// paragraphs is left-to-right. If this returns false, the principle direction
152// of rendering is right-to-left.
153func (p *Paragraph) IsLeftToRight() bool {
154	return p.Direction() == LeftToRight
155}
156
157// Direction returns the direction of the text of this paragraph.
158//
159// The direction may be LeftToRight, RightToLeft, Mixed, or Neutral.
160func (p *Paragraph) Direction() Direction {
161	return p.o.Direction()
162}
163
164// TODO: what happens if the position is > len(input)? This should return an error.
165
166// RunAt reports the Run at the given position of the input text.
167//
168// This method can be used for computing line breaks on paragraphs.
169func (p *Paragraph) RunAt(pos int) Run {
170	c := 0
171	runNumber := 0
172	for i, r := range p.o.runes {
173		c += len(r)
174		if pos < c {
175			runNumber = i
176		}
177	}
178	return p.o.Run(runNumber)
179}
180
181func calculateOrdering(levels []level, runes []rune) Ordering {
182	var curDir Direction
183
184	prevDir := Neutral
185	prevI := 0
186
187	o := Ordering{}
188	// lvl = 0,2,4,...: left to right
189	// lvl = 1,3,5,...: right to left
190	for i, lvl := range levels {
191		if lvl%2 == 0 {
192			curDir = LeftToRight
193		} else {
194			curDir = RightToLeft
195		}
196		if curDir != prevDir {
197			if i > 0 {
198				o.runes = append(o.runes, runes[prevI:i])
199				o.directions = append(o.directions, prevDir)
200				o.startpos = append(o.startpos, prevI)
201			}
202			prevI = i
203			prevDir = curDir
204		}
205	}
206	o.runes = append(o.runes, runes[prevI:])
207	o.directions = append(o.directions, prevDir)
208	o.startpos = append(o.startpos, prevI)
209	return o
210}
211
212// Order computes the visual ordering of all the runs in a Paragraph.
213func (p *Paragraph) Order() (Ordering, error) {
214	if len(p.types) == 0 {
215		return Ordering{}, nil
216	}
217
218	for _, fn := range p.opts {
219		fn(&p.options)
220	}
221	lvl := level(-1)
222	if p.options.defaultDirection == RightToLeft {
223		lvl = 1
224	}
225	para, err := newParagraph(p.types, p.pairTypes, p.pairValues, lvl)
226	if err != nil {
227		return Ordering{}, err
228	}
229
230	levels := para.getLevels([]int{len(p.types)})
231
232	p.o = calculateOrdering(levels, p.runes)
233	return p.o, nil
234}
235
236// Line computes the visual ordering of runs for a single line starting and
237// ending at the given positions in the original text.
238func (p *Paragraph) Line(start, end int) (Ordering, error) {
239	lineTypes := p.types[start:end]
240	para, err := newParagraph(lineTypes, p.pairTypes[start:end], p.pairValues[start:end], -1)
241	if err != nil {
242		return Ordering{}, err
243	}
244	levels := para.getLevels([]int{len(lineTypes)})
245	o := calculateOrdering(levels, p.runes[start:end])
246	return o, nil
247}
248
249// An Ordering holds the computed visual order of runs of a Paragraph. Calling
250// SetBytes or SetString on the originating Paragraph invalidates an Ordering.
251// The methods of an Ordering should only be called by one goroutine at a time.
252type Ordering struct {
253	runes      [][]rune
254	directions []Direction
255	startpos   []int
256}
257
258// Direction reports the directionality of the runs.
259//
260// The direction may be LeftToRight, RightToLeft, Mixed, or Neutral.
261func (o *Ordering) Direction() Direction {
262	return o.directions[0]
263}
264
265// NumRuns returns the number of runs.
266func (o *Ordering) NumRuns() int {
267	return len(o.runes)
268}
269
270// Run returns the ith run within the ordering.
271func (o *Ordering) Run(i int) Run {
272	r := Run{
273		runes:     o.runes[i],
274		direction: o.directions[i],
275		startpos:  o.startpos[i],
276	}
277	return r
278}
279
280// TODO: perhaps with options.
281// // Reorder creates a reader that reads the runes in visual order per character.
282// // Modifiers remain after the runes they modify.
283// func (l *Runs) Reorder() io.Reader {
284// 	panic("unimplemented")
285// }
286
287// A Run is a continuous sequence of characters of a single direction.
288type Run struct {
289	runes     []rune
290	direction Direction
291	startpos  int
292}
293
294// String returns the text of the run in its original order.
295func (r *Run) String() string {
296	return string(r.runes)
297}
298
299// Bytes returns the text of the run in its original order.
300func (r *Run) Bytes() []byte {
301	return []byte(r.String())
302}
303
304// TODO: methods for
305// - Display order
306// - headers and footers
307// - bracket replacement.
308
309// Direction reports the direction of the run.
310func (r *Run) Direction() Direction {
311	return r.direction
312}
313
314// Pos returns the position of the Run within the text passed to SetBytes or SetString of the
315// originating Paragraph value.
316func (r *Run) Pos() (start, end int) {
317	return r.startpos, r.startpos + len(r.runes) - 1
318}
319
320// AppendReverse reverses the order of characters of in, appends them to out,
321// and returns the result. Modifiers will still follow the runes they modify.
322// Brackets are replaced with their counterparts.
323func AppendReverse(out, in []byte) []byte {
324	ret := make([]byte, len(in)+len(out))
325	copy(ret, out)
326	inRunes := bytes.Runes(in)
327
328	for i, r := range inRunes {
329		prop, _ := LookupRune(r)
330		if prop.IsBracket() {
331			inRunes[i] = prop.reverseBracket(r)
332		}
333	}
334
335	for i, j := 0, len(inRunes)-1; i < j; i, j = i+1, j-1 {
336		inRunes[i], inRunes[j] = inRunes[j], inRunes[i]
337	}
338	copy(ret[len(out):], string(inRunes))
339
340	return ret
341}
342
343// ReverseString reverses the order of characters in s and returns a new string.
344// Modifiers will still follow the runes they modify. Brackets are replaced with
345// their counterparts.
346func ReverseString(s string) string {
347	input := []rune(s)
348	li := len(input)
349	ret := make([]rune, li)
350	for i, r := range input {
351		prop, _ := LookupRune(r)
352		if prop.IsBracket() {
353			ret[li-i-1] = prop.reverseBracket(r)
354		} else {
355			ret[li-i-1] = r
356		}
357	}
358	return string(ret)
359}
360