1// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package bidi
6
7import "log"
8
9// This implementation is a port based on the reference implementation found at:
10// http://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/
11//
12// described in Unicode Bidirectional Algorithm (UAX #9).
13//
14// Input:
15// There are two levels of input to the algorithm, since clients may prefer to
16// supply some information from out-of-band sources rather than relying on the
17// default behavior.
18//
19// - Bidi class array
20// - Bidi class array, with externally supplied base line direction
21//
22// Output:
23// Output is separated into several stages:
24//
25//  - levels array over entire paragraph
26//  - reordering array over entire paragraph
27//  - levels array over line
28//  - reordering array over line
29//
30// Note that for conformance to the Unicode Bidirectional Algorithm,
31// implementations are only required to generate correct reordering and
32// character directionality (odd or even levels) over a line. Generating
33// identical level arrays over a line is not required. Bidi explicit format
34// codes (LRE, RLE, LRO, RLO, PDF) and BN can be assigned arbitrary levels and
35// positions as long as the rest of the input is properly reordered.
36//
37// As the algorithm is defined to operate on a single paragraph at a time, this
38// implementation is written to handle single paragraphs. Thus rule P1 is
39// presumed by this implementation-- the data provided to the implementation is
40// assumed to be a single paragraph, and either contains no 'B' codes, or a
41// single 'B' code at the end of the input. 'B' is allowed as input to
42// illustrate how the algorithm assigns it a level.
43//
44// Also note that rules L3 and L4 depend on the rendering engine that uses the
45// result of the bidi algorithm. This implementation assumes that the rendering
46// engine expects combining marks in visual order (e.g. to the left of their
47// base character in RTL runs) and that it adjusts the glyphs used to render
48// mirrored characters that are in RTL runs so that they render appropriately.
49
50// level is the embedding level of a character. Even embedding levels indicate
51// left-to-right order and odd levels indicate right-to-left order. The special
52// level of -1 is reserved for undefined order.
53type level int8
54
55const implicitLevel level = -1
56
57// in returns if x is equal to any of the values in set.
58func (c Class) in(set ...Class) bool {
59	for _, s := range set {
60		if c == s {
61			return true
62		}
63	}
64	return false
65}
66
67// A paragraph contains the state of a paragraph.
68type paragraph struct {
69	initialTypes []Class
70
71	// Arrays of properties needed for paired bracket evaluation in N0
72	pairTypes  []bracketType // paired Bracket types for paragraph
73	pairValues []rune        // rune for opening bracket or pbOpen and pbClose; 0 for pbNone
74
75	embeddingLevel level // default: = implicitLevel;
76
77	// at the paragraph levels
78	resultTypes  []Class
79	resultLevels []level
80
81	// Index of matching PDI for isolate initiator characters. For other
82	// characters, the value of matchingPDI will be set to -1. For isolate
83	// initiators with no matching PDI, matchingPDI will be set to the length of
84	// the input string.
85	matchingPDI []int
86
87	// Index of matching isolate initiator for PDI characters. For other
88	// characters, and for PDIs with no matching isolate initiator, the value of
89	// matchingIsolateInitiator will be set to -1.
90	matchingIsolateInitiator []int
91}
92
93// newParagraph initializes a paragraph. The user needs to supply a few arrays
94// corresponding to the preprocessed text input. The types correspond to the
95// Unicode BiDi classes for each rune. pairTypes indicates the bracket type for
96// each rune. pairValues provides a unique bracket class identifier for each
97// rune (suggested is the rune of the open bracket for opening and matching
98// close brackets, after normalization). The embedding levels are optional, but
99// may be supplied to encode embedding levels of styled text.
100//
101// TODO: return an error.
102func newParagraph(types []Class, pairTypes []bracketType, pairValues []rune, levels level) *paragraph {
103	validateTypes(types)
104	validatePbTypes(pairTypes)
105	validatePbValues(pairValues, pairTypes)
106	validateParagraphEmbeddingLevel(levels)
107
108	p := &paragraph{
109		initialTypes:   append([]Class(nil), types...),
110		embeddingLevel: levels,
111
112		pairTypes:  pairTypes,
113		pairValues: pairValues,
114
115		resultTypes: append([]Class(nil), types...),
116	}
117	p.run()
118	return p
119}
120
121func (p *paragraph) Len() int { return len(p.initialTypes) }
122
123// The algorithm. Does not include line-based processing (Rules L1, L2).
124// These are applied later in the line-based phase of the algorithm.
125func (p *paragraph) run() {
126	p.determineMatchingIsolates()
127
128	// 1) determining the paragraph level
129	// Rule P1 is the requirement for entering this algorithm.
130	// Rules P2, P3.
131	// If no externally supplied paragraph embedding level, use default.
132	if p.embeddingLevel == implicitLevel {
133		p.embeddingLevel = p.determineParagraphEmbeddingLevel(0, p.Len())
134	}
135
136	// Initialize result levels to paragraph embedding level.
137	p.resultLevels = make([]level, p.Len())
138	setLevels(p.resultLevels, p.embeddingLevel)
139
140	// 2) Explicit levels and directions
141	// Rules X1-X8.
142	p.determineExplicitEmbeddingLevels()
143
144	// Rule X9.
145	// We do not remove the embeddings, the overrides, the PDFs, and the BNs
146	// from the string explicitly. But they are not copied into isolating run
147	// sequences when they are created, so they are removed for all
148	// practical purposes.
149
150	// Rule X10.
151	// Run remainder of algorithm one isolating run sequence at a time
152	for _, seq := range p.determineIsolatingRunSequences() {
153		// 3) resolving weak types
154		// Rules W1-W7.
155		seq.resolveWeakTypes()
156
157		// 4a) resolving paired brackets
158		// Rule N0
159		resolvePairedBrackets(seq)
160
161		// 4b) resolving neutral types
162		// Rules N1-N3.
163		seq.resolveNeutralTypes()
164
165		// 5) resolving implicit embedding levels
166		// Rules I1, I2.
167		seq.resolveImplicitLevels()
168
169		// Apply the computed levels and types
170		seq.applyLevelsAndTypes()
171	}
172
173	// Assign appropriate levels to 'hide' LREs, RLEs, LROs, RLOs, PDFs, and
174	// BNs. This is for convenience, so the resulting level array will have
175	// a value for every character.
176	p.assignLevelsToCharactersRemovedByX9()
177}
178
179// determineMatchingIsolates determines the matching PDI for each isolate
180// initiator and vice versa.
181//
182// Definition BD9.
183//
184// At the end of this function:
185//
186//  - The member variable matchingPDI is set to point to the index of the
187//    matching PDI character for each isolate initiator character. If there is
188//    no matching PDI, it is set to the length of the input text. For other
189//    characters, it is set to -1.
190//  - The member variable matchingIsolateInitiator is set to point to the
191//    index of the matching isolate initiator character for each PDI character.
192//    If there is no matching isolate initiator, or the character is not a PDI,
193//    it is set to -1.
194func (p *paragraph) determineMatchingIsolates() {
195	p.matchingPDI = make([]int, p.Len())
196	p.matchingIsolateInitiator = make([]int, p.Len())
197
198	for i := range p.matchingIsolateInitiator {
199		p.matchingIsolateInitiator[i] = -1
200	}
201
202	for i := range p.matchingPDI {
203		p.matchingPDI[i] = -1
204
205		if t := p.resultTypes[i]; t.in(LRI, RLI, FSI) {
206			depthCounter := 1
207			for j := i + 1; j < p.Len(); j++ {
208				if u := p.resultTypes[j]; u.in(LRI, RLI, FSI) {
209					depthCounter++
210				} else if u == PDI {
211					if depthCounter--; depthCounter == 0 {
212						p.matchingPDI[i] = j
213						p.matchingIsolateInitiator[j] = i
214						break
215					}
216				}
217			}
218			if p.matchingPDI[i] == -1 {
219				p.matchingPDI[i] = p.Len()
220			}
221		}
222	}
223}
224
225// determineParagraphEmbeddingLevel reports the resolved paragraph direction of
226// the substring limited by the given range [start, end).
227//
228// Determines the paragraph level based on rules P2, P3. This is also used
229// in rule X5c to find if an FSI should resolve to LRI or RLI.
230func (p *paragraph) determineParagraphEmbeddingLevel(start, end int) level {
231	var strongType Class = unknownClass
232
233	// Rule P2.
234	for i := start; i < end; i++ {
235		if t := p.resultTypes[i]; t.in(L, AL, R) {
236			strongType = t
237			break
238		} else if t.in(FSI, LRI, RLI) {
239			i = p.matchingPDI[i] // skip over to the matching PDI
240			if i > end {
241				log.Panic("assert (i <= end)")
242			}
243		}
244	}
245	// Rule P3.
246	switch strongType {
247	case unknownClass: // none found
248		// default embedding level when no strong types found is 0.
249		return 0
250	case L:
251		return 0
252	default: // AL, R
253		return 1
254	}
255}
256
257const maxDepth = 125
258
259// This stack will store the embedding levels and override and isolated
260// statuses
261type directionalStatusStack struct {
262	stackCounter        int
263	embeddingLevelStack [maxDepth + 1]level
264	overrideStatusStack [maxDepth + 1]Class
265	isolateStatusStack  [maxDepth + 1]bool
266}
267
268func (s *directionalStatusStack) empty()     { s.stackCounter = 0 }
269func (s *directionalStatusStack) pop()       { s.stackCounter-- }
270func (s *directionalStatusStack) depth() int { return s.stackCounter }
271
272func (s *directionalStatusStack) push(level level, overrideStatus Class, isolateStatus bool) {
273	s.embeddingLevelStack[s.stackCounter] = level
274	s.overrideStatusStack[s.stackCounter] = overrideStatus
275	s.isolateStatusStack[s.stackCounter] = isolateStatus
276	s.stackCounter++
277}
278
279func (s *directionalStatusStack) lastEmbeddingLevel() level {
280	return s.embeddingLevelStack[s.stackCounter-1]
281}
282
283func (s *directionalStatusStack) lastDirectionalOverrideStatus() Class {
284	return s.overrideStatusStack[s.stackCounter-1]
285}
286
287func (s *directionalStatusStack) lastDirectionalIsolateStatus() bool {
288	return s.isolateStatusStack[s.stackCounter-1]
289}
290
291// Determine explicit levels using rules X1 - X8
292func (p *paragraph) determineExplicitEmbeddingLevels() {
293	var stack directionalStatusStack
294	var overflowIsolateCount, overflowEmbeddingCount, validIsolateCount int
295
296	// Rule X1.
297	stack.push(p.embeddingLevel, ON, false)
298
299	for i, t := range p.resultTypes {
300		// Rules X2, X3, X4, X5, X5a, X5b, X5c
301		switch t {
302		case RLE, LRE, RLO, LRO, RLI, LRI, FSI:
303			isIsolate := t.in(RLI, LRI, FSI)
304			isRTL := t.in(RLE, RLO, RLI)
305
306			// override if this is an FSI that resolves to RLI
307			if t == FSI {
308				isRTL = (p.determineParagraphEmbeddingLevel(i+1, p.matchingPDI[i]) == 1)
309			}
310			if isIsolate {
311				p.resultLevels[i] = stack.lastEmbeddingLevel()
312				if stack.lastDirectionalOverrideStatus() != ON {
313					p.resultTypes[i] = stack.lastDirectionalOverrideStatus()
314				}
315			}
316
317			var newLevel level
318			if isRTL {
319				// least greater odd
320				newLevel = (stack.lastEmbeddingLevel() + 1) | 1
321			} else {
322				// least greater even
323				newLevel = (stack.lastEmbeddingLevel() + 2) &^ 1
324			}
325
326			if newLevel <= maxDepth && overflowIsolateCount == 0 && overflowEmbeddingCount == 0 {
327				if isIsolate {
328					validIsolateCount++
329				}
330				// Push new embedding level, override status, and isolated
331				// status.
332				// No check for valid stack counter, since the level check
333				// suffices.
334				switch t {
335				case LRO:
336					stack.push(newLevel, L, isIsolate)
337				case RLO:
338					stack.push(newLevel, R, isIsolate)
339				default:
340					stack.push(newLevel, ON, isIsolate)
341				}
342				// Not really part of the spec
343				if !isIsolate {
344					p.resultLevels[i] = newLevel
345				}
346			} else {
347				// This is an invalid explicit formatting character,
348				// so apply the "Otherwise" part of rules X2-X5b.
349				if isIsolate {
350					overflowIsolateCount++
351				} else { // !isIsolate
352					if overflowIsolateCount == 0 {
353						overflowEmbeddingCount++
354					}
355				}
356			}
357
358		// Rule X6a
359		case PDI:
360			if overflowIsolateCount > 0 {
361				overflowIsolateCount--
362			} else if validIsolateCount == 0 {
363				// do nothing
364			} else {
365				overflowEmbeddingCount = 0
366				for !stack.lastDirectionalIsolateStatus() {
367					stack.pop()
368				}
369				stack.pop()
370				validIsolateCount--
371			}
372			p.resultLevels[i] = stack.lastEmbeddingLevel()
373
374		// Rule X7
375		case PDF:
376			// Not really part of the spec
377			p.resultLevels[i] = stack.lastEmbeddingLevel()
378
379			if overflowIsolateCount > 0 {
380				// do nothing
381			} else if overflowEmbeddingCount > 0 {
382				overflowEmbeddingCount--
383			} else if !stack.lastDirectionalIsolateStatus() && stack.depth() >= 2 {
384				stack.pop()
385			}
386
387		case B: // paragraph separator.
388			// Rule X8.
389
390			// These values are reset for clarity, in this implementation B
391			// can only occur as the last code in the array.
392			stack.empty()
393			overflowIsolateCount = 0
394			overflowEmbeddingCount = 0
395			validIsolateCount = 0
396			p.resultLevels[i] = p.embeddingLevel
397
398		default:
399			p.resultLevels[i] = stack.lastEmbeddingLevel()
400			if stack.lastDirectionalOverrideStatus() != ON {
401				p.resultTypes[i] = stack.lastDirectionalOverrideStatus()
402			}
403		}
404	}
405}
406
407type isolatingRunSequence struct {
408	p *paragraph
409
410	indexes []int // indexes to the original string
411
412	types          []Class // type of each character using the index
413	resolvedLevels []level // resolved levels after application of rules
414	level          level
415	sos, eos       Class
416}
417
418func (i *isolatingRunSequence) Len() int { return len(i.indexes) }
419
420func maxLevel(a, b level) level {
421	if a > b {
422		return a
423	}
424	return b
425}
426
427// Rule X10, second bullet: Determine the start-of-sequence (sos) and end-of-sequence (eos) types,
428// 			 either L or R, for each isolating run sequence.
429func (p *paragraph) isolatingRunSequence(indexes []int) *isolatingRunSequence {
430	length := len(indexes)
431	types := make([]Class, length)
432	for i, x := range indexes {
433		types[i] = p.resultTypes[x]
434	}
435
436	// assign level, sos and eos
437	prevChar := indexes[0] - 1
438	for prevChar >= 0 && isRemovedByX9(p.initialTypes[prevChar]) {
439		prevChar--
440	}
441	prevLevel := p.embeddingLevel
442	if prevChar >= 0 {
443		prevLevel = p.resultLevels[prevChar]
444	}
445
446	var succLevel level
447	lastType := types[length-1]
448	if lastType.in(LRI, RLI, FSI) {
449		succLevel = p.embeddingLevel
450	} else {
451		// the first character after the end of run sequence
452		limit := indexes[length-1] + 1
453		for ; limit < p.Len() && isRemovedByX9(p.initialTypes[limit]); limit++ {
454
455		}
456		succLevel = p.embeddingLevel
457		if limit < p.Len() {
458			succLevel = p.resultLevels[limit]
459		}
460	}
461	level := p.resultLevels[indexes[0]]
462	return &isolatingRunSequence{
463		p:       p,
464		indexes: indexes,
465		types:   types,
466		level:   level,
467		sos:     typeForLevel(maxLevel(prevLevel, level)),
468		eos:     typeForLevel(maxLevel(succLevel, level)),
469	}
470}
471
472// Resolving weak types Rules W1-W7.
473//
474// Note that some weak types (EN, AN) remain after this processing is
475// complete.
476func (s *isolatingRunSequence) resolveWeakTypes() {
477
478	// on entry, only these types remain
479	s.assertOnly(L, R, AL, EN, ES, ET, AN, CS, B, S, WS, ON, NSM, LRI, RLI, FSI, PDI)
480
481	// Rule W1.
482	// Changes all NSMs.
483	preceedingCharacterType := s.sos
484	for i, t := range s.types {
485		if t == NSM {
486			s.types[i] = preceedingCharacterType
487		} else {
488			if t.in(LRI, RLI, FSI, PDI) {
489				preceedingCharacterType = ON
490			}
491			preceedingCharacterType = t
492		}
493	}
494
495	// Rule W2.
496	// EN does not change at the start of the run, because sos != AL.
497	for i, t := range s.types {
498		if t == EN {
499			for j := i - 1; j >= 0; j-- {
500				if t := s.types[j]; t.in(L, R, AL) {
501					if t == AL {
502						s.types[i] = AN
503					}
504					break
505				}
506			}
507		}
508	}
509
510	// Rule W3.
511	for i, t := range s.types {
512		if t == AL {
513			s.types[i] = R
514		}
515	}
516
517	// Rule W4.
518	// Since there must be values on both sides for this rule to have an
519	// effect, the scan skips the first and last value.
520	//
521	// Although the scan proceeds left to right, and changes the type
522	// values in a way that would appear to affect the computations
523	// later in the scan, there is actually no problem. A change in the
524	// current value can only affect the value to its immediate right,
525	// and only affect it if it is ES or CS. But the current value can
526	// only change if the value to its right is not ES or CS. Thus
527	// either the current value will not change, or its change will have
528	// no effect on the remainder of the analysis.
529
530	for i := 1; i < s.Len()-1; i++ {
531		t := s.types[i]
532		if t == ES || t == CS {
533			prevSepType := s.types[i-1]
534			succSepType := s.types[i+1]
535			if prevSepType == EN && succSepType == EN {
536				s.types[i] = EN
537			} else if s.types[i] == CS && prevSepType == AN && succSepType == AN {
538				s.types[i] = AN
539			}
540		}
541	}
542
543	// Rule W5.
544	for i, t := range s.types {
545		if t == ET {
546			// locate end of sequence
547			runStart := i
548			runEnd := s.findRunLimit(runStart, ET)
549
550			// check values at ends of sequence
551			t := s.sos
552			if runStart > 0 {
553				t = s.types[runStart-1]
554			}
555			if t != EN {
556				t = s.eos
557				if runEnd < len(s.types) {
558					t = s.types[runEnd]
559				}
560			}
561			if t == EN {
562				setTypes(s.types[runStart:runEnd], EN)
563			}
564			// continue at end of sequence
565			i = runEnd
566		}
567	}
568
569	// Rule W6.
570	for i, t := range s.types {
571		if t.in(ES, ET, CS) {
572			s.types[i] = ON
573		}
574	}
575
576	// Rule W7.
577	for i, t := range s.types {
578		if t == EN {
579			// set default if we reach start of run
580			prevStrongType := s.sos
581			for j := i - 1; j >= 0; j-- {
582				t = s.types[j]
583				if t == L || t == R { // AL's have been changed to R
584					prevStrongType = t
585					break
586				}
587			}
588			if prevStrongType == L {
589				s.types[i] = L
590			}
591		}
592	}
593}
594
595// 6) resolving neutral types Rules N1-N2.
596func (s *isolatingRunSequence) resolveNeutralTypes() {
597
598	// on entry, only these types can be in resultTypes
599	s.assertOnly(L, R, EN, AN, B, S, WS, ON, RLI, LRI, FSI, PDI)
600
601	for i, t := range s.types {
602		switch t {
603		case WS, ON, B, S, RLI, LRI, FSI, PDI:
604			// find bounds of run of neutrals
605			runStart := i
606			runEnd := s.findRunLimit(runStart, B, S, WS, ON, RLI, LRI, FSI, PDI)
607
608			// determine effective types at ends of run
609			var leadType, trailType Class
610
611			// Note that the character found can only be L, R, AN, or
612			// EN.
613			if runStart == 0 {
614				leadType = s.sos
615			} else {
616				leadType = s.types[runStart-1]
617				if leadType.in(AN, EN) {
618					leadType = R
619				}
620			}
621			if runEnd == len(s.types) {
622				trailType = s.eos
623			} else {
624				trailType = s.types[runEnd]
625				if trailType.in(AN, EN) {
626					trailType = R
627				}
628			}
629
630			var resolvedType Class
631			if leadType == trailType {
632				// Rule N1.
633				resolvedType = leadType
634			} else {
635				// Rule N2.
636				// Notice the embedding level of the run is used, not
637				// the paragraph embedding level.
638				resolvedType = typeForLevel(s.level)
639			}
640
641			setTypes(s.types[runStart:runEnd], resolvedType)
642
643			// skip over run of (former) neutrals
644			i = runEnd
645		}
646	}
647}
648
649func setLevels(levels []level, newLevel level) {
650	for i := range levels {
651		levels[i] = newLevel
652	}
653}
654
655func setTypes(types []Class, newType Class) {
656	for i := range types {
657		types[i] = newType
658	}
659}
660
661// 7) resolving implicit embedding levels Rules I1, I2.
662func (s *isolatingRunSequence) resolveImplicitLevels() {
663
664	// on entry, only these types can be in resultTypes
665	s.assertOnly(L, R, EN, AN)
666
667	s.resolvedLevels = make([]level, len(s.types))
668	setLevels(s.resolvedLevels, s.level)
669
670	if (s.level & 1) == 0 { // even level
671		for i, t := range s.types {
672			// Rule I1.
673			if t == L {
674				// no change
675			} else if t == R {
676				s.resolvedLevels[i] += 1
677			} else { // t == AN || t == EN
678				s.resolvedLevels[i] += 2
679			}
680		}
681	} else { // odd level
682		for i, t := range s.types {
683			// Rule I2.
684			if t == R {
685				// no change
686			} else { // t == L || t == AN || t == EN
687				s.resolvedLevels[i] += 1
688			}
689		}
690	}
691}
692
693// Applies the levels and types resolved in rules W1-I2 to the
694// resultLevels array.
695func (s *isolatingRunSequence) applyLevelsAndTypes() {
696	for i, x := range s.indexes {
697		s.p.resultTypes[x] = s.types[i]
698		s.p.resultLevels[x] = s.resolvedLevels[i]
699	}
700}
701
702// Return the limit of the run consisting only of the types in validSet
703// starting at index. This checks the value at index, and will return
704// index if that value is not in validSet.
705func (s *isolatingRunSequence) findRunLimit(index int, validSet ...Class) int {
706loop:
707	for ; index < len(s.types); index++ {
708		t := s.types[index]
709		for _, valid := range validSet {
710			if t == valid {
711				continue loop
712			}
713		}
714		return index // didn't find a match in validSet
715	}
716	return len(s.types)
717}
718
719// Algorithm validation. Assert that all values in types are in the
720// provided set.
721func (s *isolatingRunSequence) assertOnly(codes ...Class) {
722loop:
723	for i, t := range s.types {
724		for _, c := range codes {
725			if t == c {
726				continue loop
727			}
728		}
729		log.Panicf("invalid bidi code %s present in assertOnly at position %d", t, s.indexes[i])
730	}
731}
732
733// determineLevelRuns returns an array of level runs. Each level run is
734// described as an array of indexes into the input string.
735//
736// Determines the level runs. Rule X9 will be applied in determining the
737// runs, in the way that makes sure the characters that are supposed to be
738// removed are not included in the runs.
739func (p *paragraph) determineLevelRuns() [][]int {
740	run := []int{}
741	allRuns := [][]int{}
742	currentLevel := implicitLevel
743
744	for i := range p.initialTypes {
745		if !isRemovedByX9(p.initialTypes[i]) {
746			if p.resultLevels[i] != currentLevel {
747				// we just encountered a new run; wrap up last run
748				if currentLevel >= 0 { // only wrap it up if there was a run
749					allRuns = append(allRuns, run)
750					run = nil
751				}
752				// Start new run
753				currentLevel = p.resultLevels[i]
754			}
755			run = append(run, i)
756		}
757	}
758	// Wrap up the final run, if any
759	if len(run) > 0 {
760		allRuns = append(allRuns, run)
761	}
762	return allRuns
763}
764
765// Definition BD13. Determine isolating run sequences.
766func (p *paragraph) determineIsolatingRunSequences() []*isolatingRunSequence {
767	levelRuns := p.determineLevelRuns()
768
769	// Compute the run that each character belongs to
770	runForCharacter := make([]int, p.Len())
771	for i, run := range levelRuns {
772		for _, index := range run {
773			runForCharacter[index] = i
774		}
775	}
776
777	sequences := []*isolatingRunSequence{}
778
779	var currentRunSequence []int
780
781	for _, run := range levelRuns {
782		first := run[0]
783		if p.initialTypes[first] != PDI || p.matchingIsolateInitiator[first] == -1 {
784			currentRunSequence = nil
785			// int run = i;
786			for {
787				// Copy this level run into currentRunSequence
788				currentRunSequence = append(currentRunSequence, run...)
789
790				last := currentRunSequence[len(currentRunSequence)-1]
791				lastT := p.initialTypes[last]
792				if lastT.in(LRI, RLI, FSI) && p.matchingPDI[last] != p.Len() {
793					run = levelRuns[runForCharacter[p.matchingPDI[last]]]
794				} else {
795					break
796				}
797			}
798			sequences = append(sequences, p.isolatingRunSequence(currentRunSequence))
799		}
800	}
801	return sequences
802}
803
804// Assign level information to characters removed by rule X9. This is for
805// ease of relating the level information to the original input data. Note
806// that the levels assigned to these codes are arbitrary, they're chosen so
807// as to avoid breaking level runs.
808func (p *paragraph) assignLevelsToCharactersRemovedByX9() {
809	for i, t := range p.initialTypes {
810		if t.in(LRE, RLE, LRO, RLO, PDF, BN) {
811			p.resultTypes[i] = t
812			p.resultLevels[i] = -1
813		}
814	}
815	// now propagate forward the levels information (could have
816	// propagated backward, the main thing is not to introduce a level
817	// break where one doesn't already exist).
818
819	if p.resultLevels[0] == -1 {
820		p.resultLevels[0] = p.embeddingLevel
821	}
822	for i := 1; i < len(p.initialTypes); i++ {
823		if p.resultLevels[i] == -1 {
824			p.resultLevels[i] = p.resultLevels[i-1]
825		}
826	}
827	// Embedding information is for informational purposes only so need not be
828	// adjusted.
829}
830
831//
832// Output
833//
834
835// getLevels computes levels array breaking lines at offsets in linebreaks.
836// Rule L1.
837//
838// The linebreaks array must include at least one value. The values must be
839// in strictly increasing order (no duplicates) between 1 and the length of
840// the text, inclusive. The last value must be the length of the text.
841func (p *paragraph) getLevels(linebreaks []int) []level {
842	// Note that since the previous processing has removed all
843	// P, S, and WS values from resultTypes, the values referred to
844	// in these rules are the initial types, before any processing
845	// has been applied (including processing of overrides).
846	//
847	// This example implementation has reinserted explicit format codes
848	// and BN, in order that the levels array correspond to the
849	// initial text. Their final placement is not normative.
850	// These codes are treated like WS in this implementation,
851	// so they don't interrupt sequences of WS.
852
853	validateLineBreaks(linebreaks, p.Len())
854
855	result := append([]level(nil), p.resultLevels...)
856
857	// don't worry about linebreaks since if there is a break within
858	// a series of WS values preceding S, the linebreak itself
859	// causes the reset.
860	for i, t := range p.initialTypes {
861		if t.in(B, S) {
862			// Rule L1, clauses one and two.
863			result[i] = p.embeddingLevel
864
865			// Rule L1, clause three.
866			for j := i - 1; j >= 0; j-- {
867				if isWhitespace(p.initialTypes[j]) { // including format codes
868					result[j] = p.embeddingLevel
869				} else {
870					break
871				}
872			}
873		}
874	}
875
876	// Rule L1, clause four.
877	start := 0
878	for _, limit := range linebreaks {
879		for j := limit - 1; j >= start; j-- {
880			if isWhitespace(p.initialTypes[j]) { // including format codes
881				result[j] = p.embeddingLevel
882			} else {
883				break
884			}
885		}
886		start = limit
887	}
888
889	return result
890}
891
892// getReordering returns the reordering of lines from a visual index to a
893// logical index for line breaks at the given offsets.
894//
895// Lines are concatenated from left to right. So for example, the fifth
896// character from the left on the third line is
897//
898// 		getReordering(linebreaks)[linebreaks[1] + 4]
899//
900// (linebreaks[1] is the position after the last character of the second
901// line, which is also the index of the first character on the third line,
902// and adding four gets the fifth character from the left).
903//
904// The linebreaks array must include at least one value. The values must be
905// in strictly increasing order (no duplicates) between 1 and the length of
906// the text, inclusive. The last value must be the length of the text.
907func (p *paragraph) getReordering(linebreaks []int) []int {
908	validateLineBreaks(linebreaks, p.Len())
909
910	return computeMultilineReordering(p.getLevels(linebreaks), linebreaks)
911}
912
913// Return multiline reordering array for a given level array. Reordering
914// does not occur across a line break.
915func computeMultilineReordering(levels []level, linebreaks []int) []int {
916	result := make([]int, len(levels))
917
918	start := 0
919	for _, limit := range linebreaks {
920		tempLevels := make([]level, limit-start)
921		copy(tempLevels, levels[start:])
922
923		for j, order := range computeReordering(tempLevels) {
924			result[start+j] = order + start
925		}
926		start = limit
927	}
928	return result
929}
930
931// Return reordering array for a given level array. This reorders a single
932// line. The reordering is a visual to logical map. For example, the
933// leftmost char is string.charAt(order[0]). Rule L2.
934func computeReordering(levels []level) []int {
935	result := make([]int, len(levels))
936	// initialize order
937	for i := range result {
938		result[i] = i
939	}
940
941	// locate highest level found on line.
942	// Note the rules say text, but no reordering across line bounds is
943	// performed, so this is sufficient.
944	highestLevel := level(0)
945	lowestOddLevel := level(maxDepth + 2)
946	for _, level := range levels {
947		if level > highestLevel {
948			highestLevel = level
949		}
950		if level&1 != 0 && level < lowestOddLevel {
951			lowestOddLevel = level
952		}
953	}
954
955	for level := highestLevel; level >= lowestOddLevel; level-- {
956		for i := 0; i < len(levels); i++ {
957			if levels[i] >= level {
958				// find range of text at or above this level
959				start := i
960				limit := i + 1
961				for limit < len(levels) && levels[limit] >= level {
962					limit++
963				}
964
965				for j, k := start, limit-1; j < k; j, k = j+1, k-1 {
966					result[j], result[k] = result[k], result[j]
967				}
968				// skip to end of level run
969				i = limit
970			}
971		}
972	}
973
974	return result
975}
976
977// isWhitespace reports whether the type is considered a whitespace type for the
978// line break rules.
979func isWhitespace(c Class) bool {
980	switch c {
981	case LRE, RLE, LRO, RLO, PDF, LRI, RLI, FSI, PDI, BN, WS:
982		return true
983	}
984	return false
985}
986
987// isRemovedByX9 reports whether the type is one of the types removed in X9.
988func isRemovedByX9(c Class) bool {
989	switch c {
990	case LRE, RLE, LRO, RLO, PDF, BN:
991		return true
992	}
993	return false
994}
995
996// typeForLevel reports the strong type (L or R) corresponding to the level.
997func typeForLevel(level level) Class {
998	if (level & 0x1) == 0 {
999		return L
1000	}
1001	return R
1002}
1003
1004// TODO: change validation to not panic
1005
1006func validateTypes(types []Class) {
1007	if len(types) == 0 {
1008		log.Panic("types is null")
1009	}
1010	for i, t := range types[:len(types)-1] {
1011		if t == B {
1012			log.Panicf("B type before end of paragraph at index: %d", i)
1013		}
1014	}
1015}
1016
1017func validateParagraphEmbeddingLevel(embeddingLevel level) {
1018	if embeddingLevel != implicitLevel &&
1019		embeddingLevel != 0 &&
1020		embeddingLevel != 1 {
1021		log.Panicf("illegal paragraph embedding level: %d", embeddingLevel)
1022	}
1023}
1024
1025func validateLineBreaks(linebreaks []int, textLength int) {
1026	prev := 0
1027	for i, next := range linebreaks {
1028		if next <= prev {
1029			log.Panicf("bad linebreak: %d at index: %d", next, i)
1030		}
1031		prev = next
1032	}
1033	if prev != textLength {
1034		log.Panicf("last linebreak was %d, want %d", prev, textLength)
1035	}
1036}
1037
1038func validatePbTypes(pairTypes []bracketType) {
1039	if len(pairTypes) == 0 {
1040		log.Panic("pairTypes is null")
1041	}
1042	for i, pt := range pairTypes {
1043		switch pt {
1044		case bpNone, bpOpen, bpClose:
1045		default:
1046			log.Panicf("illegal pairType value at %d: %v", i, pairTypes[i])
1047		}
1048	}
1049}
1050
1051func validatePbValues(pairValues []rune, pairTypes []bracketType) {
1052	if pairValues == nil {
1053		log.Panic("pairValues is null")
1054	}
1055	if len(pairTypes) != len(pairValues) {
1056		log.Panic("pairTypes is different length from pairValues")
1057	}
1058}
1059