1// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package bidi
6
7import (
8	"fmt"
9	"log"
10)
11
12// This implementation is a port based on the reference implementation found at:
13// https://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/
14//
15// described in Unicode Bidirectional Algorithm (UAX #9).
16//
17// Input:
18// There are two levels of input to the algorithm, since clients may prefer to
19// supply some information from out-of-band sources rather than relying on the
20// default behavior.
21//
22// - Bidi class array
23// - Bidi class array, with externally supplied base line direction
24//
25// Output:
26// Output is separated into several stages:
27//
28//  - levels array over entire paragraph
29//  - reordering array over entire paragraph
30//  - levels array over line
31//  - reordering array over line
32//
33// Note that for conformance to the Unicode Bidirectional Algorithm,
34// implementations are only required to generate correct reordering and
35// character directionality (odd or even levels) over a line. Generating
36// identical level arrays over a line is not required. Bidi explicit format
37// codes (LRE, RLE, LRO, RLO, PDF) and BN can be assigned arbitrary levels and
38// positions as long as the rest of the input is properly reordered.
39//
40// As the algorithm is defined to operate on a single paragraph at a time, this
41// implementation is written to handle single paragraphs. Thus rule P1 is
42// presumed by this implementation-- the data provided to the implementation is
43// assumed to be a single paragraph, and either contains no 'B' codes, or a
44// single 'B' code at the end of the input. 'B' is allowed as input to
45// illustrate how the algorithm assigns it a level.
46//
47// Also note that rules L3 and L4 depend on the rendering engine that uses the
48// result of the bidi algorithm. This implementation assumes that the rendering
49// engine expects combining marks in visual order (e.g. to the left of their
50// base character in RTL runs) and that it adjusts the glyphs used to render
51// mirrored characters that are in RTL runs so that they render appropriately.
52
53// level is the embedding level of a character. Even embedding levels indicate
54// left-to-right order and odd levels indicate right-to-left order. The special
55// level of -1 is reserved for undefined order.
56type level int8
57
58const implicitLevel level = -1
59
60// in returns if x is equal to any of the values in set.
61func (c Class) in(set ...Class) bool {
62	for _, s := range set {
63		if c == s {
64			return true
65		}
66	}
67	return false
68}
69
70// A paragraph contains the state of a paragraph.
71type paragraph struct {
72	initialTypes []Class
73
74	// Arrays of properties needed for paired bracket evaluation in N0
75	pairTypes  []bracketType // paired Bracket types for paragraph
76	pairValues []rune        // rune for opening bracket or pbOpen and pbClose; 0 for pbNone
77
78	embeddingLevel level // default: = implicitLevel;
79
80	// at the paragraph levels
81	resultTypes  []Class
82	resultLevels []level
83
84	// Index of matching PDI for isolate initiator characters. For other
85	// characters, the value of matchingPDI will be set to -1. For isolate
86	// initiators with no matching PDI, matchingPDI will be set to the length of
87	// the input string.
88	matchingPDI []int
89
90	// Index of matching isolate initiator for PDI characters. For other
91	// characters, and for PDIs with no matching isolate initiator, the value of
92	// matchingIsolateInitiator will be set to -1.
93	matchingIsolateInitiator []int
94}
95
96// newParagraph initializes a paragraph. The user needs to supply a few arrays
97// corresponding to the preprocessed text input. The types correspond to the
98// Unicode BiDi classes for each rune. pairTypes indicates the bracket type for
99// each rune. pairValues provides a unique bracket class identifier for each
100// rune (suggested is the rune of the open bracket for opening and matching
101// close brackets, after normalization). The embedding levels are optional, but
102// may be supplied to encode embedding levels of styled text.
103func newParagraph(types []Class, pairTypes []bracketType, pairValues []rune, levels level) (*paragraph, error) {
104	var err error
105	if err = validateTypes(types); err != nil {
106		return nil, err
107	}
108	if err = validatePbTypes(pairTypes); err != nil {
109		return nil, err
110	}
111	if err = validatePbValues(pairValues, pairTypes); err != nil {
112		return nil, err
113	}
114	if err = validateParagraphEmbeddingLevel(levels); err != nil {
115		return nil, err
116	}
117
118	p := &paragraph{
119		initialTypes:   append([]Class(nil), types...),
120		embeddingLevel: levels,
121
122		pairTypes:  pairTypes,
123		pairValues: pairValues,
124
125		resultTypes: append([]Class(nil), types...),
126	}
127	p.run()
128	return p, nil
129}
130
131func (p *paragraph) Len() int { return len(p.initialTypes) }
132
133// The algorithm. Does not include line-based processing (Rules L1, L2).
134// These are applied later in the line-based phase of the algorithm.
135func (p *paragraph) run() {
136	p.determineMatchingIsolates()
137
138	// 1) determining the paragraph level
139	// Rule P1 is the requirement for entering this algorithm.
140	// Rules P2, P3.
141	// If no externally supplied paragraph embedding level, use default.
142	if p.embeddingLevel == implicitLevel {
143		p.embeddingLevel = p.determineParagraphEmbeddingLevel(0, p.Len())
144	}
145
146	// Initialize result levels to paragraph embedding level.
147	p.resultLevels = make([]level, p.Len())
148	setLevels(p.resultLevels, p.embeddingLevel)
149
150	// 2) Explicit levels and directions
151	// Rules X1-X8.
152	p.determineExplicitEmbeddingLevels()
153
154	// Rule X9.
155	// We do not remove the embeddings, the overrides, the PDFs, and the BNs
156	// from the string explicitly. But they are not copied into isolating run
157	// sequences when they are created, so they are removed for all
158	// practical purposes.
159
160	// Rule X10.
161	// Run remainder of algorithm one isolating run sequence at a time
162	for _, seq := range p.determineIsolatingRunSequences() {
163		// 3) resolving weak types
164		// Rules W1-W7.
165		seq.resolveWeakTypes()
166
167		// 4a) resolving paired brackets
168		// Rule N0
169		resolvePairedBrackets(seq)
170
171		// 4b) resolving neutral types
172		// Rules N1-N3.
173		seq.resolveNeutralTypes()
174
175		// 5) resolving implicit embedding levels
176		// Rules I1, I2.
177		seq.resolveImplicitLevels()
178
179		// Apply the computed levels and types
180		seq.applyLevelsAndTypes()
181	}
182
183	// Assign appropriate levels to 'hide' LREs, RLEs, LROs, RLOs, PDFs, and
184	// BNs. This is for convenience, so the resulting level array will have
185	// a value for every character.
186	p.assignLevelsToCharactersRemovedByX9()
187}
188
189// determineMatchingIsolates determines the matching PDI for each isolate
190// initiator and vice versa.
191//
192// Definition BD9.
193//
194// At the end of this function:
195//
196//  - The member variable matchingPDI is set to point to the index of the
197//    matching PDI character for each isolate initiator character. If there is
198//    no matching PDI, it is set to the length of the input text. For other
199//    characters, it is set to -1.
200//  - The member variable matchingIsolateInitiator is set to point to the
201//    index of the matching isolate initiator character for each PDI character.
202//    If there is no matching isolate initiator, or the character is not a PDI,
203//    it is set to -1.
204func (p *paragraph) determineMatchingIsolates() {
205	p.matchingPDI = make([]int, p.Len())
206	p.matchingIsolateInitiator = make([]int, p.Len())
207
208	for i := range p.matchingIsolateInitiator {
209		p.matchingIsolateInitiator[i] = -1
210	}
211
212	for i := range p.matchingPDI {
213		p.matchingPDI[i] = -1
214
215		if t := p.resultTypes[i]; t.in(LRI, RLI, FSI) {
216			depthCounter := 1
217			for j := i + 1; j < p.Len(); j++ {
218				if u := p.resultTypes[j]; u.in(LRI, RLI, FSI) {
219					depthCounter++
220				} else if u == PDI {
221					if depthCounter--; depthCounter == 0 {
222						p.matchingPDI[i] = j
223						p.matchingIsolateInitiator[j] = i
224						break
225					}
226				}
227			}
228			if p.matchingPDI[i] == -1 {
229				p.matchingPDI[i] = p.Len()
230			}
231		}
232	}
233}
234
235// determineParagraphEmbeddingLevel reports the resolved paragraph direction of
236// the substring limited by the given range [start, end).
237//
238// Determines the paragraph level based on rules P2, P3. This is also used
239// in rule X5c to find if an FSI should resolve to LRI or RLI.
240func (p *paragraph) determineParagraphEmbeddingLevel(start, end int) level {
241	var strongType Class = unknownClass
242
243	// Rule P2.
244	for i := start; i < end; i++ {
245		if t := p.resultTypes[i]; t.in(L, AL, R) {
246			strongType = t
247			break
248		} else if t.in(FSI, LRI, RLI) {
249			i = p.matchingPDI[i] // skip over to the matching PDI
250			if i > end {
251				log.Panic("assert (i <= end)")
252			}
253		}
254	}
255	// Rule P3.
256	switch strongType {
257	case unknownClass: // none found
258		// default embedding level when no strong types found is 0.
259		return 0
260	case L:
261		return 0
262	default: // AL, R
263		return 1
264	}
265}
266
267const maxDepth = 125
268
269// This stack will store the embedding levels and override and isolated
270// statuses
271type directionalStatusStack struct {
272	stackCounter        int
273	embeddingLevelStack [maxDepth + 1]level
274	overrideStatusStack [maxDepth + 1]Class
275	isolateStatusStack  [maxDepth + 1]bool
276}
277
278func (s *directionalStatusStack) empty()     { s.stackCounter = 0 }
279func (s *directionalStatusStack) pop()       { s.stackCounter-- }
280func (s *directionalStatusStack) depth() int { return s.stackCounter }
281
282func (s *directionalStatusStack) push(level level, overrideStatus Class, isolateStatus bool) {
283	s.embeddingLevelStack[s.stackCounter] = level
284	s.overrideStatusStack[s.stackCounter] = overrideStatus
285	s.isolateStatusStack[s.stackCounter] = isolateStatus
286	s.stackCounter++
287}
288
289func (s *directionalStatusStack) lastEmbeddingLevel() level {
290	return s.embeddingLevelStack[s.stackCounter-1]
291}
292
293func (s *directionalStatusStack) lastDirectionalOverrideStatus() Class {
294	return s.overrideStatusStack[s.stackCounter-1]
295}
296
297func (s *directionalStatusStack) lastDirectionalIsolateStatus() bool {
298	return s.isolateStatusStack[s.stackCounter-1]
299}
300
301// Determine explicit levels using rules X1 - X8
302func (p *paragraph) determineExplicitEmbeddingLevels() {
303	var stack directionalStatusStack
304	var overflowIsolateCount, overflowEmbeddingCount, validIsolateCount int
305
306	// Rule X1.
307	stack.push(p.embeddingLevel, ON, false)
308
309	for i, t := range p.resultTypes {
310		// Rules X2, X3, X4, X5, X5a, X5b, X5c
311		switch t {
312		case RLE, LRE, RLO, LRO, RLI, LRI, FSI:
313			isIsolate := t.in(RLI, LRI, FSI)
314			isRTL := t.in(RLE, RLO, RLI)
315
316			// override if this is an FSI that resolves to RLI
317			if t == FSI {
318				isRTL = (p.determineParagraphEmbeddingLevel(i+1, p.matchingPDI[i]) == 1)
319			}
320			if isIsolate {
321				p.resultLevels[i] = stack.lastEmbeddingLevel()
322				if stack.lastDirectionalOverrideStatus() != ON {
323					p.resultTypes[i] = stack.lastDirectionalOverrideStatus()
324				}
325			}
326
327			var newLevel level
328			if isRTL {
329				// least greater odd
330				newLevel = (stack.lastEmbeddingLevel() + 1) | 1
331			} else {
332				// least greater even
333				newLevel = (stack.lastEmbeddingLevel() + 2) &^ 1
334			}
335
336			if newLevel <= maxDepth && overflowIsolateCount == 0 && overflowEmbeddingCount == 0 {
337				if isIsolate {
338					validIsolateCount++
339				}
340				// Push new embedding level, override status, and isolated
341				// status.
342				// No check for valid stack counter, since the level check
343				// suffices.
344				switch t {
345				case LRO:
346					stack.push(newLevel, L, isIsolate)
347				case RLO:
348					stack.push(newLevel, R, isIsolate)
349				default:
350					stack.push(newLevel, ON, isIsolate)
351				}
352				// Not really part of the spec
353				if !isIsolate {
354					p.resultLevels[i] = newLevel
355				}
356			} else {
357				// This is an invalid explicit formatting character,
358				// so apply the "Otherwise" part of rules X2-X5b.
359				if isIsolate {
360					overflowIsolateCount++
361				} else { // !isIsolate
362					if overflowIsolateCount == 0 {
363						overflowEmbeddingCount++
364					}
365				}
366			}
367
368		// Rule X6a
369		case PDI:
370			if overflowIsolateCount > 0 {
371				overflowIsolateCount--
372			} else if validIsolateCount == 0 {
373				// do nothing
374			} else {
375				overflowEmbeddingCount = 0
376				for !stack.lastDirectionalIsolateStatus() {
377					stack.pop()
378				}
379				stack.pop()
380				validIsolateCount--
381			}
382			p.resultLevels[i] = stack.lastEmbeddingLevel()
383
384		// Rule X7
385		case PDF:
386			// Not really part of the spec
387			p.resultLevels[i] = stack.lastEmbeddingLevel()
388
389			if overflowIsolateCount > 0 {
390				// do nothing
391			} else if overflowEmbeddingCount > 0 {
392				overflowEmbeddingCount--
393			} else if !stack.lastDirectionalIsolateStatus() && stack.depth() >= 2 {
394				stack.pop()
395			}
396
397		case B: // paragraph separator.
398			// Rule X8.
399
400			// These values are reset for clarity, in this implementation B
401			// can only occur as the last code in the array.
402			stack.empty()
403			overflowIsolateCount = 0
404			overflowEmbeddingCount = 0
405			validIsolateCount = 0
406			p.resultLevels[i] = p.embeddingLevel
407
408		default:
409			p.resultLevels[i] = stack.lastEmbeddingLevel()
410			if stack.lastDirectionalOverrideStatus() != ON {
411				p.resultTypes[i] = stack.lastDirectionalOverrideStatus()
412			}
413		}
414	}
415}
416
417type isolatingRunSequence struct {
418	p *paragraph
419
420	indexes []int // indexes to the original string
421
422	types          []Class // type of each character using the index
423	resolvedLevels []level // resolved levels after application of rules
424	level          level
425	sos, eos       Class
426}
427
428func (i *isolatingRunSequence) Len() int { return len(i.indexes) }
429
430func maxLevel(a, b level) level {
431	if a > b {
432		return a
433	}
434	return b
435}
436
437// Rule X10, second bullet: Determine the start-of-sequence (sos) and end-of-sequence (eos) types,
438// 			 either L or R, for each isolating run sequence.
439func (p *paragraph) isolatingRunSequence(indexes []int) *isolatingRunSequence {
440	length := len(indexes)
441	types := make([]Class, length)
442	for i, x := range indexes {
443		types[i] = p.resultTypes[x]
444	}
445
446	// assign level, sos and eos
447	prevChar := indexes[0] - 1
448	for prevChar >= 0 && isRemovedByX9(p.initialTypes[prevChar]) {
449		prevChar--
450	}
451	prevLevel := p.embeddingLevel
452	if prevChar >= 0 {
453		prevLevel = p.resultLevels[prevChar]
454	}
455
456	var succLevel level
457	lastType := types[length-1]
458	if lastType.in(LRI, RLI, FSI) {
459		succLevel = p.embeddingLevel
460	} else {
461		// the first character after the end of run sequence
462		limit := indexes[length-1] + 1
463		for ; limit < p.Len() && isRemovedByX9(p.initialTypes[limit]); limit++ {
464
465		}
466		succLevel = p.embeddingLevel
467		if limit < p.Len() {
468			succLevel = p.resultLevels[limit]
469		}
470	}
471	level := p.resultLevels[indexes[0]]
472	return &isolatingRunSequence{
473		p:       p,
474		indexes: indexes,
475		types:   types,
476		level:   level,
477		sos:     typeForLevel(maxLevel(prevLevel, level)),
478		eos:     typeForLevel(maxLevel(succLevel, level)),
479	}
480}
481
482// Resolving weak types Rules W1-W7.
483//
484// Note that some weak types (EN, AN) remain after this processing is
485// complete.
486func (s *isolatingRunSequence) resolveWeakTypes() {
487
488	// on entry, only these types remain
489	s.assertOnly(L, R, AL, EN, ES, ET, AN, CS, B, S, WS, ON, NSM, LRI, RLI, FSI, PDI)
490
491	// Rule W1.
492	// Changes all NSMs.
493	precedingCharacterType := s.sos
494	for i, t := range s.types {
495		if t == NSM {
496			s.types[i] = precedingCharacterType
497		} else {
498			if t.in(LRI, RLI, FSI, PDI) {
499				precedingCharacterType = ON
500			}
501			precedingCharacterType = t
502		}
503	}
504
505	// Rule W2.
506	// EN does not change at the start of the run, because sos != AL.
507	for i, t := range s.types {
508		if t == EN {
509			for j := i - 1; j >= 0; j-- {
510				if t := s.types[j]; t.in(L, R, AL) {
511					if t == AL {
512						s.types[i] = AN
513					}
514					break
515				}
516			}
517		}
518	}
519
520	// Rule W3.
521	for i, t := range s.types {
522		if t == AL {
523			s.types[i] = R
524		}
525	}
526
527	// Rule W4.
528	// Since there must be values on both sides for this rule to have an
529	// effect, the scan skips the first and last value.
530	//
531	// Although the scan proceeds left to right, and changes the type
532	// values in a way that would appear to affect the computations
533	// later in the scan, there is actually no problem. A change in the
534	// current value can only affect the value to its immediate right,
535	// and only affect it if it is ES or CS. But the current value can
536	// only change if the value to its right is not ES or CS. Thus
537	// either the current value will not change, or its change will have
538	// no effect on the remainder of the analysis.
539
540	for i := 1; i < s.Len()-1; i++ {
541		t := s.types[i]
542		if t == ES || t == CS {
543			prevSepType := s.types[i-1]
544			succSepType := s.types[i+1]
545			if prevSepType == EN && succSepType == EN {
546				s.types[i] = EN
547			} else if s.types[i] == CS && prevSepType == AN && succSepType == AN {
548				s.types[i] = AN
549			}
550		}
551	}
552
553	// Rule W5.
554	for i, t := range s.types {
555		if t == ET {
556			// locate end of sequence
557			runStart := i
558			runEnd := s.findRunLimit(runStart, ET)
559
560			// check values at ends of sequence
561			t := s.sos
562			if runStart > 0 {
563				t = s.types[runStart-1]
564			}
565			if t != EN {
566				t = s.eos
567				if runEnd < len(s.types) {
568					t = s.types[runEnd]
569				}
570			}
571			if t == EN {
572				setTypes(s.types[runStart:runEnd], EN)
573			}
574			// continue at end of sequence
575			i = runEnd
576		}
577	}
578
579	// Rule W6.
580	for i, t := range s.types {
581		if t.in(ES, ET, CS) {
582			s.types[i] = ON
583		}
584	}
585
586	// Rule W7.
587	for i, t := range s.types {
588		if t == EN {
589			// set default if we reach start of run
590			prevStrongType := s.sos
591			for j := i - 1; j >= 0; j-- {
592				t = s.types[j]
593				if t == L || t == R { // AL's have been changed to R
594					prevStrongType = t
595					break
596				}
597			}
598			if prevStrongType == L {
599				s.types[i] = L
600			}
601		}
602	}
603}
604
605// 6) resolving neutral types Rules N1-N2.
606func (s *isolatingRunSequence) resolveNeutralTypes() {
607
608	// on entry, only these types can be in resultTypes
609	s.assertOnly(L, R, EN, AN, B, S, WS, ON, RLI, LRI, FSI, PDI)
610
611	for i, t := range s.types {
612		switch t {
613		case WS, ON, B, S, RLI, LRI, FSI, PDI:
614			// find bounds of run of neutrals
615			runStart := i
616			runEnd := s.findRunLimit(runStart, B, S, WS, ON, RLI, LRI, FSI, PDI)
617
618			// determine effective types at ends of run
619			var leadType, trailType Class
620
621			// Note that the character found can only be L, R, AN, or
622			// EN.
623			if runStart == 0 {
624				leadType = s.sos
625			} else {
626				leadType = s.types[runStart-1]
627				if leadType.in(AN, EN) {
628					leadType = R
629				}
630			}
631			if runEnd == len(s.types) {
632				trailType = s.eos
633			} else {
634				trailType = s.types[runEnd]
635				if trailType.in(AN, EN) {
636					trailType = R
637				}
638			}
639
640			var resolvedType Class
641			if leadType == trailType {
642				// Rule N1.
643				resolvedType = leadType
644			} else {
645				// Rule N2.
646				// Notice the embedding level of the run is used, not
647				// the paragraph embedding level.
648				resolvedType = typeForLevel(s.level)
649			}
650
651			setTypes(s.types[runStart:runEnd], resolvedType)
652
653			// skip over run of (former) neutrals
654			i = runEnd
655		}
656	}
657}
658
659func setLevels(levels []level, newLevel level) {
660	for i := range levels {
661		levels[i] = newLevel
662	}
663}
664
665func setTypes(types []Class, newType Class) {
666	for i := range types {
667		types[i] = newType
668	}
669}
670
671// 7) resolving implicit embedding levels Rules I1, I2.
672func (s *isolatingRunSequence) resolveImplicitLevels() {
673
674	// on entry, only these types can be in resultTypes
675	s.assertOnly(L, R, EN, AN)
676
677	s.resolvedLevels = make([]level, len(s.types))
678	setLevels(s.resolvedLevels, s.level)
679
680	if (s.level & 1) == 0 { // even level
681		for i, t := range s.types {
682			// Rule I1.
683			if t == L {
684				// no change
685			} else if t == R {
686				s.resolvedLevels[i] += 1
687			} else { // t == AN || t == EN
688				s.resolvedLevels[i] += 2
689			}
690		}
691	} else { // odd level
692		for i, t := range s.types {
693			// Rule I2.
694			if t == R {
695				// no change
696			} else { // t == L || t == AN || t == EN
697				s.resolvedLevels[i] += 1
698			}
699		}
700	}
701}
702
703// Applies the levels and types resolved in rules W1-I2 to the
704// resultLevels array.
705func (s *isolatingRunSequence) applyLevelsAndTypes() {
706	for i, x := range s.indexes {
707		s.p.resultTypes[x] = s.types[i]
708		s.p.resultLevels[x] = s.resolvedLevels[i]
709	}
710}
711
712// Return the limit of the run consisting only of the types in validSet
713// starting at index. This checks the value at index, and will return
714// index if that value is not in validSet.
715func (s *isolatingRunSequence) findRunLimit(index int, validSet ...Class) int {
716loop:
717	for ; index < len(s.types); index++ {
718		t := s.types[index]
719		for _, valid := range validSet {
720			if t == valid {
721				continue loop
722			}
723		}
724		return index // didn't find a match in validSet
725	}
726	return len(s.types)
727}
728
729// Algorithm validation. Assert that all values in types are in the
730// provided set.
731func (s *isolatingRunSequence) assertOnly(codes ...Class) {
732loop:
733	for i, t := range s.types {
734		for _, c := range codes {
735			if t == c {
736				continue loop
737			}
738		}
739		log.Panicf("invalid bidi code %v present in assertOnly at position %d", t, s.indexes[i])
740	}
741}
742
743// determineLevelRuns returns an array of level runs. Each level run is
744// described as an array of indexes into the input string.
745//
746// Determines the level runs. Rule X9 will be applied in determining the
747// runs, in the way that makes sure the characters that are supposed to be
748// removed are not included in the runs.
749func (p *paragraph) determineLevelRuns() [][]int {
750	run := []int{}
751	allRuns := [][]int{}
752	currentLevel := implicitLevel
753
754	for i := range p.initialTypes {
755		if !isRemovedByX9(p.initialTypes[i]) {
756			if p.resultLevels[i] != currentLevel {
757				// we just encountered a new run; wrap up last run
758				if currentLevel >= 0 { // only wrap it up if there was a run
759					allRuns = append(allRuns, run)
760					run = nil
761				}
762				// Start new run
763				currentLevel = p.resultLevels[i]
764			}
765			run = append(run, i)
766		}
767	}
768	// Wrap up the final run, if any
769	if len(run) > 0 {
770		allRuns = append(allRuns, run)
771	}
772	return allRuns
773}
774
775// Definition BD13. Determine isolating run sequences.
776func (p *paragraph) determineIsolatingRunSequences() []*isolatingRunSequence {
777	levelRuns := p.determineLevelRuns()
778
779	// Compute the run that each character belongs to
780	runForCharacter := make([]int, p.Len())
781	for i, run := range levelRuns {
782		for _, index := range run {
783			runForCharacter[index] = i
784		}
785	}
786
787	sequences := []*isolatingRunSequence{}
788
789	var currentRunSequence []int
790
791	for _, run := range levelRuns {
792		first := run[0]
793		if p.initialTypes[first] != PDI || p.matchingIsolateInitiator[first] == -1 {
794			currentRunSequence = nil
795			// int run = i;
796			for {
797				// Copy this level run into currentRunSequence
798				currentRunSequence = append(currentRunSequence, run...)
799
800				last := currentRunSequence[len(currentRunSequence)-1]
801				lastT := p.initialTypes[last]
802				if lastT.in(LRI, RLI, FSI) && p.matchingPDI[last] != p.Len() {
803					run = levelRuns[runForCharacter[p.matchingPDI[last]]]
804				} else {
805					break
806				}
807			}
808			sequences = append(sequences, p.isolatingRunSequence(currentRunSequence))
809		}
810	}
811	return sequences
812}
813
814// Assign level information to characters removed by rule X9. This is for
815// ease of relating the level information to the original input data. Note
816// that the levels assigned to these codes are arbitrary, they're chosen so
817// as to avoid breaking level runs.
818func (p *paragraph) assignLevelsToCharactersRemovedByX9() {
819	for i, t := range p.initialTypes {
820		if t.in(LRE, RLE, LRO, RLO, PDF, BN) {
821			p.resultTypes[i] = t
822			p.resultLevels[i] = -1
823		}
824	}
825	// now propagate forward the levels information (could have
826	// propagated backward, the main thing is not to introduce a level
827	// break where one doesn't already exist).
828
829	if p.resultLevels[0] == -1 {
830		p.resultLevels[0] = p.embeddingLevel
831	}
832	for i := 1; i < len(p.initialTypes); i++ {
833		if p.resultLevels[i] == -1 {
834			p.resultLevels[i] = p.resultLevels[i-1]
835		}
836	}
837	// Embedding information is for informational purposes only so need not be
838	// adjusted.
839}
840
841//
842// Output
843//
844
845// getLevels computes levels array breaking lines at offsets in linebreaks.
846// Rule L1.
847//
848// The linebreaks array must include at least one value. The values must be
849// in strictly increasing order (no duplicates) between 1 and the length of
850// the text, inclusive. The last value must be the length of the text.
851func (p *paragraph) getLevels(linebreaks []int) []level {
852	// Note that since the previous processing has removed all
853	// P, S, and WS values from resultTypes, the values referred to
854	// in these rules are the initial types, before any processing
855	// has been applied (including processing of overrides).
856	//
857	// This example implementation has reinserted explicit format codes
858	// and BN, in order that the levels array correspond to the
859	// initial text. Their final placement is not normative.
860	// These codes are treated like WS in this implementation,
861	// so they don't interrupt sequences of WS.
862
863	validateLineBreaks(linebreaks, p.Len())
864
865	result := append([]level(nil), p.resultLevels...)
866
867	// don't worry about linebreaks since if there is a break within
868	// a series of WS values preceding S, the linebreak itself
869	// causes the reset.
870	for i, t := range p.initialTypes {
871		if t.in(B, S) {
872			// Rule L1, clauses one and two.
873			result[i] = p.embeddingLevel
874
875			// Rule L1, clause three.
876			for j := i - 1; j >= 0; j-- {
877				if isWhitespace(p.initialTypes[j]) { // including format codes
878					result[j] = p.embeddingLevel
879				} else {
880					break
881				}
882			}
883		}
884	}
885
886	// Rule L1, clause four.
887	start := 0
888	for _, limit := range linebreaks {
889		for j := limit - 1; j >= start; j-- {
890			if isWhitespace(p.initialTypes[j]) { // including format codes
891				result[j] = p.embeddingLevel
892			} else {
893				break
894			}
895		}
896		start = limit
897	}
898
899	return result
900}
901
902// getReordering returns the reordering of lines from a visual index to a
903// logical index for line breaks at the given offsets.
904//
905// Lines are concatenated from left to right. So for example, the fifth
906// character from the left on the third line is
907//
908// 		getReordering(linebreaks)[linebreaks[1] + 4]
909//
910// (linebreaks[1] is the position after the last character of the second
911// line, which is also the index of the first character on the third line,
912// and adding four gets the fifth character from the left).
913//
914// The linebreaks array must include at least one value. The values must be
915// in strictly increasing order (no duplicates) between 1 and the length of
916// the text, inclusive. The last value must be the length of the text.
917func (p *paragraph) getReordering(linebreaks []int) []int {
918	validateLineBreaks(linebreaks, p.Len())
919
920	return computeMultilineReordering(p.getLevels(linebreaks), linebreaks)
921}
922
923// Return multiline reordering array for a given level array. Reordering
924// does not occur across a line break.
925func computeMultilineReordering(levels []level, linebreaks []int) []int {
926	result := make([]int, len(levels))
927
928	start := 0
929	for _, limit := range linebreaks {
930		tempLevels := make([]level, limit-start)
931		copy(tempLevels, levels[start:])
932
933		for j, order := range computeReordering(tempLevels) {
934			result[start+j] = order + start
935		}
936		start = limit
937	}
938	return result
939}
940
941// Return reordering array for a given level array. This reorders a single
942// line. The reordering is a visual to logical map. For example, the
943// leftmost char is string.charAt(order[0]). Rule L2.
944func computeReordering(levels []level) []int {
945	result := make([]int, len(levels))
946	// initialize order
947	for i := range result {
948		result[i] = i
949	}
950
951	// locate highest level found on line.
952	// Note the rules say text, but no reordering across line bounds is
953	// performed, so this is sufficient.
954	highestLevel := level(0)
955	lowestOddLevel := level(maxDepth + 2)
956	for _, level := range levels {
957		if level > highestLevel {
958			highestLevel = level
959		}
960		if level&1 != 0 && level < lowestOddLevel {
961			lowestOddLevel = level
962		}
963	}
964
965	for level := highestLevel; level >= lowestOddLevel; level-- {
966		for i := 0; i < len(levels); i++ {
967			if levels[i] >= level {
968				// find range of text at or above this level
969				start := i
970				limit := i + 1
971				for limit < len(levels) && levels[limit] >= level {
972					limit++
973				}
974
975				for j, k := start, limit-1; j < k; j, k = j+1, k-1 {
976					result[j], result[k] = result[k], result[j]
977				}
978				// skip to end of level run
979				i = limit
980			}
981		}
982	}
983
984	return result
985}
986
987// isWhitespace reports whether the type is considered a whitespace type for the
988// line break rules.
989func isWhitespace(c Class) bool {
990	switch c {
991	case LRE, RLE, LRO, RLO, PDF, LRI, RLI, FSI, PDI, BN, WS:
992		return true
993	}
994	return false
995}
996
997// isRemovedByX9 reports whether the type is one of the types removed in X9.
998func isRemovedByX9(c Class) bool {
999	switch c {
1000	case LRE, RLE, LRO, RLO, PDF, BN:
1001		return true
1002	}
1003	return false
1004}
1005
1006// typeForLevel reports the strong type (L or R) corresponding to the level.
1007func typeForLevel(level level) Class {
1008	if (level & 0x1) == 0 {
1009		return L
1010	}
1011	return R
1012}
1013
1014func validateTypes(types []Class) error {
1015	if len(types) == 0 {
1016		return fmt.Errorf("types is null")
1017	}
1018	for i, t := range types[:len(types)-1] {
1019		if t == B {
1020			return fmt.Errorf("B type before end of paragraph at index: %d", i)
1021		}
1022	}
1023	return nil
1024}
1025
1026func validateParagraphEmbeddingLevel(embeddingLevel level) error {
1027	if embeddingLevel != implicitLevel &&
1028		embeddingLevel != 0 &&
1029		embeddingLevel != 1 {
1030		return fmt.Errorf("illegal paragraph embedding level: %d", embeddingLevel)
1031	}
1032	return nil
1033}
1034
1035func validateLineBreaks(linebreaks []int, textLength int) error {
1036	prev := 0
1037	for i, next := range linebreaks {
1038		if next <= prev {
1039			return fmt.Errorf("bad linebreak: %d at index: %d", next, i)
1040		}
1041		prev = next
1042	}
1043	if prev != textLength {
1044		return fmt.Errorf("last linebreak was %d, want %d", prev, textLength)
1045	}
1046	return nil
1047}
1048
1049func validatePbTypes(pairTypes []bracketType) error {
1050	if len(pairTypes) == 0 {
1051		return fmt.Errorf("pairTypes is null")
1052	}
1053	for i, pt := range pairTypes {
1054		switch pt {
1055		case bpNone, bpOpen, bpClose:
1056		default:
1057			return fmt.Errorf("illegal pairType value at %d: %v", i, pairTypes[i])
1058		}
1059	}
1060	return nil
1061}
1062
1063func validatePbValues(pairValues []rune, pairTypes []bracketType) error {
1064	if pairValues == nil {
1065		return fmt.Errorf("pairValues is null")
1066	}
1067	if len(pairTypes) != len(pairValues) {
1068		return fmt.Errorf("pairTypes is different length from pairValues")
1069	}
1070	return nil
1071}
1072