1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 *
6 *   Copyright (C) 1999-2015, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 ******************************************************************************
10 *   file name:  ubidi.c
11 *   encoding:   UTF-8
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 1999jul27
16 *   created by: Markus W. Scherer, updated by Matitiahu Allouche
17 *
18 */
19 
20 #include "cmemory.h"
21 #include "unicode/utypes.h"
22 #include "unicode/ustring.h"
23 #include "unicode/uchar.h"
24 #include "unicode/ubidi.h"
25 #include "unicode/utf16.h"
26 #include "ubidi_props.h"
27 #include "ubidiimp.h"
28 #include "uassert.h"
29 
30 /*
31  * General implementation notes:
32  *
33  * Throughout the implementation, there are comments like (W2) that refer to
34  * rules of the BiDi algorithm, in this example to the second rule of the
35  * resolution of weak types.
36  *
37  * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
38  * character according to UTF-16, the second UChar gets the directional property of
39  * the entire character assigned, while the first one gets a BN, a boundary
40  * neutral, type, which is ignored by most of the algorithm according to
41  * rule (X9) and the implementation suggestions of the BiDi algorithm.
42  *
43  * Later, adjustWSLevels() will set the level for each BN to that of the
44  * following character (UChar), which results in surrogate pairs getting the
45  * same level on each of their surrogates.
46  *
47  * In a UTF-8 implementation, the same thing could be done: the last byte of
48  * a multi-byte sequence would get the "real" property, while all previous
49  * bytes of that sequence would get BN.
50  *
51  * It is not possible to assign all those parts of a character the same real
52  * property because this would fail in the resolution of weak types with rules
53  * that look at immediately surrounding types.
54  *
55  * As a related topic, this implementation does not remove Boundary Neutral
56  * types from the input, but ignores them wherever this is relevant.
57  * For example, the loop for the resolution of the weak types reads
58  * types until it finds a non-BN.
59  * Also, explicit embedding codes are neither changed into BN nor removed.
60  * They are only treated the same way real BNs are.
61  * As stated before, adjustWSLevels() takes care of them at the end.
62  * For the purpose of conformance, the levels of all these codes
63  * do not matter.
64  *
65  * Note that this implementation modifies the dirProps
66  * after the initial setup, when applying X5c (replace FSI by LRI or RLI),
67  * X6, N0 (replace paired brackets by L or R).
68  *
69  * In this implementation, the resolution of weak types (W1 to W6),
70  * neutrals (N1 and N2), and the assignment of the resolved level (In)
71  * are all done in one single loop, in resolveImplicitLevels().
72  * Changes of dirProp values are done on the fly, without writing
73  * them back to the dirProps array.
74  *
75  *
76  * This implementation contains code that allows to bypass steps of the
77  * algorithm that are not needed on the specific paragraph
78  * in order to speed up the most common cases considerably,
79  * like text that is entirely LTR, or RTL text without numbers.
80  *
81  * Most of this is done by setting a bit for each directional property
82  * in a flags variable and later checking for whether there are
83  * any LTR characters or any RTL characters, or both, whether
84  * there are any explicit embedding codes, etc.
85  *
86  * If the (Xn) steps are performed, then the flags are re-evaluated,
87  * because they will then not contain the embedding codes any more
88  * and will be adjusted for override codes, so that subsequently
89  * more bypassing may be possible than what the initial flags suggested.
90  *
91  * If the text is not mixed-directional, then the
92  * algorithm steps for the weak type resolution are not performed,
93  * and all levels are set to the paragraph level.
94  *
95  * If there are no explicit embedding codes, then the (Xn) steps
96  * are not performed.
97  *
98  * If embedding levels are supplied as a parameter, then all
99  * explicit embedding codes are ignored, and the (Xn) steps
100  * are not performed.
101  *
102  * White Space types could get the level of the run they belong to,
103  * and are checked with a test of (flags&MASK_EMBEDDING) to
104  * consider if the paragraph direction should be considered in
105  * the flags variable.
106  *
107  * If there are no White Space types in the paragraph, then
108  * (L1) is not necessary in adjustWSLevels().
109  */
110 
111 /* to avoid some conditional statements, use tiny constant arrays */
112 static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) };
113 static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) };
114 static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) };
115 
116 #define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
117 #define DIRPROP_FLAG_E(level)  flagE[(level)&1]
118 #define DIRPROP_FLAG_O(level)  flagO[(level)&1]
119 
120 #define DIR_FROM_STRONG(strong) ((strong)==L ? L : R)
121 
122 #define NO_OVERRIDE(level)  ((level)&~UBIDI_LEVEL_OVERRIDE)
123 
124 /* UBiDi object management -------------------------------------------------- */
125 
126 U_CAPI UBiDi * U_EXPORT2
ubidi_open(void)127 ubidi_open(void)
128 {
129     UErrorCode errorCode=U_ZERO_ERROR;
130     return ubidi_openSized(0, 0, &errorCode);
131 }
132 
133 U_CAPI UBiDi * U_EXPORT2
ubidi_openSized(int32_t maxLength,int32_t maxRunCount,UErrorCode * pErrorCode)134 ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) {
135     UBiDi *pBiDi;
136 
137     /* check the argument values */
138     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
139         return NULL;
140     } else if(maxLength<0 || maxRunCount<0) {
141         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
142         return NULL;    /* invalid arguments */
143     }
144 
145     /* allocate memory for the object */
146     pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi));
147     if(pBiDi==NULL) {
148         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
149         return NULL;
150     }
151 
152     /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */
153     uprv_memset(pBiDi, 0, sizeof(UBiDi));
154 
155     /* allocate memory for arrays as requested */
156     if(maxLength>0) {
157         if( !getInitialDirPropsMemory(pBiDi, maxLength) ||
158             !getInitialLevelsMemory(pBiDi, maxLength)
159         ) {
160             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
161         }
162     } else {
163         pBiDi->mayAllocateText=TRUE;
164     }
165 
166     if(maxRunCount>0) {
167         if(maxRunCount==1) {
168             /* use simpleRuns[] */
169             pBiDi->runsSize=sizeof(Run);
170         } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) {
171             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
172         }
173     } else {
174         pBiDi->mayAllocateRuns=TRUE;
175     }
176 
177     if(U_SUCCESS(*pErrorCode)) {
178         return pBiDi;
179     } else {
180         ubidi_close(pBiDi);
181         return NULL;
182     }
183 }
184 
185 /*
186  * We are allowed to allocate memory if memory==NULL or
187  * mayAllocate==TRUE for each array that we need.
188  * We also try to grow memory as needed if we
189  * allocate it.
190  *
191  * Assume sizeNeeded>0.
192  * If *pMemory!=NULL, then assume *pSize>0.
193  *
194  * ### this realloc() may unnecessarily copy the old data,
195  * which we know we don't need any more;
196  * is this the best way to do this??
197  */
198 U_CFUNC UBool
ubidi_getMemory(BidiMemoryForAllocation * bidiMem,int32_t * pSize,UBool mayAllocate,int32_t sizeNeeded)199 ubidi_getMemory(BidiMemoryForAllocation *bidiMem, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded) {
200     void **pMemory = (void **)bidiMem;
201     /* check for existing memory */
202     if(*pMemory==NULL) {
203         /* we need to allocate memory */
204         if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) {
205             *pSize=sizeNeeded;
206             return TRUE;
207         } else {
208             return FALSE;
209         }
210     } else {
211         if(sizeNeeded<=*pSize) {
212             /* there is already enough memory */
213             return TRUE;
214         }
215         else if(!mayAllocate) {
216             /* not enough memory, and we must not allocate */
217             return FALSE;
218         } else {
219             /* we try to grow */
220             void *memory;
221             /* in most cases, we do not need the copy-old-data part of
222              * realloc, but it is needed when adding runs using getRunsMemory()
223              * in setParaRunsOnly()
224              */
225             if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) {
226                 *pMemory=memory;
227                 *pSize=sizeNeeded;
228                 return TRUE;
229             } else {
230                 /* we failed to grow */
231                 return FALSE;
232             }
233         }
234     }
235 }
236 
237 U_CAPI void U_EXPORT2
ubidi_close(UBiDi * pBiDi)238 ubidi_close(UBiDi *pBiDi) {
239     if(pBiDi!=NULL) {
240         pBiDi->pParaBiDi=NULL;          /* in case one tries to reuse this block */
241         if(pBiDi->dirPropsMemory!=NULL) {
242             uprv_free(pBiDi->dirPropsMemory);
243         }
244         if(pBiDi->levelsMemory!=NULL) {
245             uprv_free(pBiDi->levelsMemory);
246         }
247         if(pBiDi->openingsMemory!=NULL) {
248             uprv_free(pBiDi->openingsMemory);
249         }
250         if(pBiDi->parasMemory!=NULL) {
251             uprv_free(pBiDi->parasMemory);
252         }
253         if(pBiDi->runsMemory!=NULL) {
254             uprv_free(pBiDi->runsMemory);
255         }
256         if(pBiDi->isolatesMemory!=NULL) {
257             uprv_free(pBiDi->isolatesMemory);
258         }
259         if(pBiDi->insertPoints.points!=NULL) {
260             uprv_free(pBiDi->insertPoints.points);
261         }
262 
263         uprv_free(pBiDi);
264     }
265 }
266 
267 /* set to approximate "inverse BiDi" ---------------------------------------- */
268 
269 U_CAPI void U_EXPORT2
ubidi_setInverse(UBiDi * pBiDi,UBool isInverse)270 ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) {
271     if(pBiDi!=NULL) {
272         pBiDi->isInverse=isInverse;
273         pBiDi->reorderingMode = isInverse ? UBIDI_REORDER_INVERSE_NUMBERS_AS_L
274                                           : UBIDI_REORDER_DEFAULT;
275     }
276 }
277 
278 U_CAPI UBool U_EXPORT2
ubidi_isInverse(UBiDi * pBiDi)279 ubidi_isInverse(UBiDi *pBiDi) {
280     if(pBiDi!=NULL) {
281         return pBiDi->isInverse;
282     } else {
283         return FALSE;
284     }
285 }
286 
287 /* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
288  * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre
289  * concept of RUNS_ONLY which is a double operation.
290  * It could be advantageous to divide this into 3 concepts:
291  * a) Operation: direct / inverse / RUNS_ONLY
292  * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R
293  * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL
294  * This would allow combinations not possible today like RUNS_ONLY with
295  * NUMBERS_SPECIAL.
296  * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and
297  * REMOVE_CONTROLS for the inverse step.
298  * Not all combinations would be supported, and probably not all do make sense.
299  * This would need to document which ones are supported and what are the
300  * fallbacks for unsupported combinations.
301  */
302 U_CAPI void U_EXPORT2
ubidi_setReorderingMode(UBiDi * pBiDi,UBiDiReorderingMode reorderingMode)303 ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) {
304     if ((pBiDi!=NULL) && (reorderingMode >= UBIDI_REORDER_DEFAULT)
305                         && (reorderingMode < UBIDI_REORDER_COUNT)) {
306         pBiDi->reorderingMode = reorderingMode;
307         pBiDi->isInverse = (UBool)(reorderingMode == UBIDI_REORDER_INVERSE_NUMBERS_AS_L);
308     }
309 }
310 
311 U_CAPI UBiDiReorderingMode U_EXPORT2
ubidi_getReorderingMode(UBiDi * pBiDi)312 ubidi_getReorderingMode(UBiDi *pBiDi) {
313     if (pBiDi!=NULL) {
314         return pBiDi->reorderingMode;
315     } else {
316         return UBIDI_REORDER_DEFAULT;
317     }
318 }
319 
320 U_CAPI void U_EXPORT2
ubidi_setReorderingOptions(UBiDi * pBiDi,uint32_t reorderingOptions)321 ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) {
322     if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
323         reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
324     }
325     if (pBiDi!=NULL) {
326         pBiDi->reorderingOptions=reorderingOptions;
327     }
328 }
329 
330 U_CAPI uint32_t U_EXPORT2
ubidi_getReorderingOptions(UBiDi * pBiDi)331 ubidi_getReorderingOptions(UBiDi *pBiDi) {
332     if (pBiDi!=NULL) {
333         return pBiDi->reorderingOptions;
334     } else {
335         return 0;
336     }
337 }
338 
339 U_CAPI UBiDiDirection U_EXPORT2
ubidi_getBaseDirection(const UChar * text,int32_t length)340 ubidi_getBaseDirection(const UChar *text,
341 int32_t length){
342 
343     int32_t i;
344     UChar32 uchar;
345     UCharDirection dir;
346 
347     if( text==NULL || length<-1 ){
348         return UBIDI_NEUTRAL;
349     }
350 
351     if(length==-1) {
352         length=u_strlen(text);
353     }
354 
355     for( i = 0 ; i < length; ) {
356         /* i is incremented by U16_NEXT */
357         U16_NEXT(text, i, length, uchar);
358         dir = u_charDirection(uchar);
359         if( dir == U_LEFT_TO_RIGHT )
360                 return UBIDI_LTR;
361         if( dir == U_RIGHT_TO_LEFT || dir ==U_RIGHT_TO_LEFT_ARABIC )
362                 return UBIDI_RTL;
363     }
364     return UBIDI_NEUTRAL;
365 }
366 
367 /* perform (P2)..(P3) ------------------------------------------------------- */
368 
369 /**
370  * Returns the directionality of the first strong character
371  * after the last B in prologue, if any.
372  * Requires prologue!=null.
373  */
374 static DirProp
firstL_R_AL(UBiDi * pBiDi)375 firstL_R_AL(UBiDi *pBiDi) {
376     const UChar *text=pBiDi->prologue;
377     int32_t length=pBiDi->proLength;
378     int32_t i;
379     UChar32 uchar;
380     DirProp dirProp, result=ON;
381     for(i=0; i<length; ) {
382         /* i is incremented by U16_NEXT */
383         U16_NEXT(text, i, length, uchar);
384         dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
385         if(result==ON) {
386             if(dirProp==L || dirProp==R || dirProp==AL) {
387                 result=dirProp;
388             }
389         } else {
390             if(dirProp==B) {
391                 result=ON;
392             }
393         }
394     }
395     return result;
396 }
397 
398 /*
399  * Check that there are enough entries in the array pointed to by pBiDi->paras
400  */
401 static UBool
checkParaCount(UBiDi * pBiDi)402 checkParaCount(UBiDi *pBiDi) {
403     int32_t count=pBiDi->paraCount;
404     if(pBiDi->paras==pBiDi->simpleParas) {
405         if(count<=SIMPLE_PARAS_COUNT)
406             return TRUE;
407         if(!getInitialParasMemory(pBiDi, SIMPLE_PARAS_COUNT * 2))
408             return FALSE;
409         pBiDi->paras=pBiDi->parasMemory;
410         uprv_memcpy(pBiDi->parasMemory, pBiDi->simpleParas, SIMPLE_PARAS_COUNT * sizeof(Para));
411         return TRUE;
412     }
413     if(!getInitialParasMemory(pBiDi, count * 2))
414         return FALSE;
415     pBiDi->paras=pBiDi->parasMemory;
416     return TRUE;
417 }
418 
419 /*
420  * Get the directional properties for the text, calculate the flags bit-set, and
421  * determine the paragraph level if necessary (in pBiDi->paras[i].level).
422  * FSI initiators are also resolved and their dirProp replaced with LRI or RLI.
423  * When encountering an FSI, it is initially replaced with an LRI, which is the
424  * default. Only if a strong R or AL is found within its scope will the LRI be
425  * replaced by an RLI.
426  */
427 static UBool
getDirProps(UBiDi * pBiDi)428 getDirProps(UBiDi *pBiDi) {
429     const UChar *text=pBiDi->text;
430     DirProp *dirProps=pBiDi->dirPropsMemory;    /* pBiDi->dirProps is const */
431 
432     int32_t i=0, originalLength=pBiDi->originalLength;
433     Flags flags=0;      /* collect all directionalities in the text */
434     UChar32 uchar;
435     DirProp dirProp=0, defaultParaLevel=0;  /* initialize to avoid compiler warnings */
436     UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel);
437     /* for inverse BiDi, the default para level is set to RTL if there is a
438        strong R or AL character at either end of the text                            */
439     UBool isDefaultLevelInverse=isDefaultLevel && (UBool)
440             (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
441              pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL);
442     int32_t lastArabicPos=-1;
443     int32_t controlCount=0;
444     UBool removeBiDiControls = (UBool)(pBiDi->reorderingOptions &
445                                        UBIDI_OPTION_REMOVE_CONTROLS);
446 
447     enum State {
448          NOT_SEEKING_STRONG,            /* 0: not contextual paraLevel, not after FSI */
449          SEEKING_STRONG_FOR_PARA,       /* 1: looking for first strong char in para */
450          SEEKING_STRONG_FOR_FSI,        /* 2: looking for first strong after FSI */
451          LOOKING_FOR_PDI                /* 3: found strong after FSI, looking for PDI */
452     };
453     State state;
454     DirProp lastStrong=ON;              /* for default level & inverse BiDi */
455     /* The following stacks are used to manage isolate sequences. Those
456        sequences may be nested, but obviously never more deeply than the
457        maximum explicit embedding level.
458        lastStack is the index of the last used entry in the stack. A value of -1
459        means that there is no open isolate sequence.
460        lastStack is reset to -1 on paragraph boundaries. */
461     /* The following stack contains the position of the initiator of
462        each open isolate sequence */
463     int32_t isolateStartStack[UBIDI_MAX_EXPLICIT_LEVEL+1];
464     /* The following stack contains the last known state before
465        encountering the initiator of an isolate sequence */
466     State  previousStateStack[UBIDI_MAX_EXPLICIT_LEVEL+1];
467     int32_t stackLast=-1;
468 
469     if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING)
470         pBiDi->length=0;
471     defaultParaLevel=pBiDi->paraLevel&1;
472     if(isDefaultLevel) {
473         pBiDi->paras[0].level=defaultParaLevel;
474         lastStrong=defaultParaLevel;
475         if(pBiDi->proLength>0 &&                    /* there is a prologue */
476            (dirProp=firstL_R_AL(pBiDi))!=ON) {  /* with a strong character */
477             if(dirProp==L)
478                 pBiDi->paras[0].level=0;    /* set the default para level */
479             else
480                 pBiDi->paras[0].level=1;    /* set the default para level */
481             state=NOT_SEEKING_STRONG;
482         } else {
483             state=SEEKING_STRONG_FOR_PARA;
484         }
485     } else {
486         pBiDi->paras[0].level=pBiDi->paraLevel;
487         state=NOT_SEEKING_STRONG;
488     }
489     /* count paragraphs and determine the paragraph level (P2..P3) */
490     /*
491      * see comment in ubidi.h:
492      * the UBIDI_DEFAULT_XXX values are designed so that
493      * their bit 0 alone yields the intended default
494      */
495     for( /* i=0 above */ ; i<originalLength; ) {
496         /* i is incremented by U16_NEXT */
497         U16_NEXT(text, i, originalLength, uchar);
498         flags|=DIRPROP_FLAG(dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar));
499         dirProps[i-1]=dirProp;
500         if(uchar>0xffff) {  /* set the lead surrogate's property to BN */
501             flags|=DIRPROP_FLAG(BN);
502             dirProps[i-2]=BN;
503         }
504         if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar))
505             controlCount++;
506         if(dirProp==L) {
507             if(state==SEEKING_STRONG_FOR_PARA) {
508                 pBiDi->paras[pBiDi->paraCount-1].level=0;
509                 state=NOT_SEEKING_STRONG;
510             }
511             else if(state==SEEKING_STRONG_FOR_FSI) {
512                 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
513                     /* no need for next statement, already set by default */
514                     /* dirProps[isolateStartStack[stackLast]]=LRI; */
515                     flags|=DIRPROP_FLAG(LRI);
516                 }
517                 state=LOOKING_FOR_PDI;
518             }
519             lastStrong=L;
520             continue;
521         }
522         if(dirProp==R || dirProp==AL) {
523             if(state==SEEKING_STRONG_FOR_PARA) {
524                 pBiDi->paras[pBiDi->paraCount-1].level=1;
525                 state=NOT_SEEKING_STRONG;
526             }
527             else if(state==SEEKING_STRONG_FOR_FSI) {
528                 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
529                     dirProps[isolateStartStack[stackLast]]=RLI;
530                     flags|=DIRPROP_FLAG(RLI);
531                 }
532                 state=LOOKING_FOR_PDI;
533             }
534             lastStrong=R;
535             if(dirProp==AL)
536                 lastArabicPos=i-1;
537             continue;
538         }
539         if(dirProp>=FSI && dirProp<=RLI) {  /* FSI, LRI or RLI */
540             stackLast++;
541             if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
542                 isolateStartStack[stackLast]=i-1;
543                 previousStateStack[stackLast]=state;
544             }
545             if(dirProp==FSI) {
546                 dirProps[i-1]=LRI;      /* default if no strong char */
547                 state=SEEKING_STRONG_FOR_FSI;
548             }
549             else
550                 state=LOOKING_FOR_PDI;
551             continue;
552         }
553         if(dirProp==PDI) {
554             if(state==SEEKING_STRONG_FOR_FSI) {
555                 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
556                     /* no need for next statement, already set by default */
557                     /* dirProps[isolateStartStack[stackLast]]=LRI; */
558                     flags|=DIRPROP_FLAG(LRI);
559                 }
560             }
561             if(stackLast>=0) {
562                 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL)
563                     state=previousStateStack[stackLast];
564                 stackLast--;
565             }
566             continue;
567         }
568         if(dirProp==B) {
569             if(i<originalLength && uchar==CR && text[i]==LF) /* do nothing on the CR */
570                 continue;
571             pBiDi->paras[pBiDi->paraCount-1].limit=i;
572             if(isDefaultLevelInverse && lastStrong==R)
573                 pBiDi->paras[pBiDi->paraCount-1].level=1;
574             if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
575                 /* When streaming, we only process whole paragraphs
576                    thus some updates are only done on paragraph boundaries */
577                 pBiDi->length=i;        /* i is index to next character */
578                 pBiDi->controlCount=controlCount;
579             }
580             if(i<originalLength) {              /* B not last char in text */
581                 pBiDi->paraCount++;
582                 if(checkParaCount(pBiDi)==FALSE)    /* not enough memory for a new para entry */
583                     return FALSE;
584                 if(isDefaultLevel) {
585                     pBiDi->paras[pBiDi->paraCount-1].level=defaultParaLevel;
586                     state=SEEKING_STRONG_FOR_PARA;
587                     lastStrong=defaultParaLevel;
588                 } else {
589                     pBiDi->paras[pBiDi->paraCount-1].level=pBiDi->paraLevel;
590                     state=NOT_SEEKING_STRONG;
591                 }
592                 stackLast=-1;
593             }
594             continue;
595         }
596     }
597     /* Ignore still open isolate sequences with overflow */
598     if(stackLast>UBIDI_MAX_EXPLICIT_LEVEL) {
599         stackLast=UBIDI_MAX_EXPLICIT_LEVEL;
600         state=SEEKING_STRONG_FOR_FSI;   /* to be on the safe side */
601     }
602     /* Resolve direction of still unresolved open FSI sequences */
603     while(stackLast>=0) {
604         if(state==SEEKING_STRONG_FOR_FSI) {
605             /* no need for next statement, already set by default */
606             /* dirProps[isolateStartStack[stackLast]]=LRI; */
607             flags|=DIRPROP_FLAG(LRI);
608             break;
609         }
610         state=previousStateStack[stackLast];
611         stackLast--;
612     }
613     /* When streaming, ignore text after the last paragraph separator */
614     if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
615         if(pBiDi->length<originalLength)
616             pBiDi->paraCount--;
617     } else {
618         pBiDi->paras[pBiDi->paraCount-1].limit=originalLength;
619         pBiDi->controlCount=controlCount;
620     }
621     /* For inverse bidi, default para direction is RTL if there is
622        a strong R or AL at either end of the paragraph */
623     if(isDefaultLevelInverse && lastStrong==R) {
624         pBiDi->paras[pBiDi->paraCount-1].level=1;
625     }
626     if(isDefaultLevel) {
627         pBiDi->paraLevel=static_cast<UBiDiLevel>(pBiDi->paras[0].level);
628     }
629     /* The following is needed to resolve the text direction for default level
630        paragraphs containing no strong character */
631     for(i=0; i<pBiDi->paraCount; i++)
632         flags|=DIRPROP_FLAG_LR(pBiDi->paras[i].level);
633 
634     if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
635         flags|=DIRPROP_FLAG(L);
636     }
637     pBiDi->flags=flags;
638     pBiDi->lastArabicPos=lastArabicPos;
639     return TRUE;
640 }
641 
642 /* determine the paragraph level at position index */
643 U_CFUNC UBiDiLevel
ubidi_getParaLevelAtIndex(const UBiDi * pBiDi,int32_t pindex)644 ubidi_getParaLevelAtIndex(const UBiDi *pBiDi, int32_t pindex) {
645     int32_t i;
646     for(i=0; i<pBiDi->paraCount; i++)
647         if(pindex<pBiDi->paras[i].limit)
648             break;
649     if(i>=pBiDi->paraCount)
650         i=pBiDi->paraCount-1;
651     return (UBiDiLevel)(pBiDi->paras[i].level);
652 }
653 
654 /* Functions for handling paired brackets ----------------------------------- */
655 
656 /* In the isoRuns array, the first entry is used for text outside of any
657    isolate sequence.  Higher entries are used for each more deeply nested
658    isolate sequence. isoRunLast is the index of the last used entry.  The
659    openings array is used to note the data of opening brackets not yet
660    matched by a closing bracket, or matched but still susceptible to change
661    level.
662    Each isoRun entry contains the index of the first and
663    one-after-last openings entries for pending opening brackets it
664    contains.  The next openings entry to use is the one-after-last of the
665    most deeply nested isoRun entry.
666    isoRun entries also contain their current embedding level and the last
667    encountered strong character, since these will be needed to resolve
668    the level of paired brackets.  */
669 
670 static void
bracketInit(UBiDi * pBiDi,BracketData * bd)671 bracketInit(UBiDi *pBiDi, BracketData *bd) {
672     bd->pBiDi=pBiDi;
673     bd->isoRunLast=0;
674     bd->isoRuns[0].start=0;
675     bd->isoRuns[0].limit=0;
676     bd->isoRuns[0].level=GET_PARALEVEL(pBiDi, 0);
677     UBiDiLevel t = GET_PARALEVEL(pBiDi, 0) & 1;
678     bd->isoRuns[0].lastStrong = bd->isoRuns[0].lastBase = t;
679     bd->isoRuns[0].contextDir = (UBiDiDirection)t;
680     bd->isoRuns[0].contextPos=0;
681     if(pBiDi->openingsMemory) {
682         bd->openings=pBiDi->openingsMemory;
683         bd->openingsCount=pBiDi->openingsSize / sizeof(Opening);
684     } else {
685         bd->openings=bd->simpleOpenings;
686         bd->openingsCount=SIMPLE_OPENINGS_COUNT;
687     }
688     bd->isNumbersSpecial=bd->pBiDi->reorderingMode==UBIDI_REORDER_NUMBERS_SPECIAL ||
689                          bd->pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL;
690 }
691 
692 /* paragraph boundary */
693 static void
bracketProcessB(BracketData * bd,UBiDiLevel level)694 bracketProcessB(BracketData *bd, UBiDiLevel level) {
695     bd->isoRunLast=0;
696     bd->isoRuns[0].limit=0;
697     bd->isoRuns[0].level=level;
698     bd->isoRuns[0].lastStrong=bd->isoRuns[0].lastBase=level&1;
699     bd->isoRuns[0].contextDir=(UBiDiDirection)(level&1);
700     bd->isoRuns[0].contextPos=0;
701 }
702 
703 /* LRE, LRO, RLE, RLO, PDF */
704 static void
bracketProcessBoundary(BracketData * bd,int32_t lastCcPos,UBiDiLevel contextLevel,UBiDiLevel embeddingLevel)705 bracketProcessBoundary(BracketData *bd, int32_t lastCcPos,
706                        UBiDiLevel contextLevel, UBiDiLevel embeddingLevel) {
707     IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
708     DirProp *dirProps=bd->pBiDi->dirProps;
709     if(DIRPROP_FLAG(dirProps[lastCcPos])&MASK_ISO)  /* after an isolate */
710         return;
711     if(NO_OVERRIDE(embeddingLevel)>NO_OVERRIDE(contextLevel))   /* not a PDF */
712         contextLevel=embeddingLevel;
713     pLastIsoRun->limit=pLastIsoRun->start;
714     pLastIsoRun->level=embeddingLevel;
715     pLastIsoRun->lastStrong=pLastIsoRun->lastBase=contextLevel&1;
716     pLastIsoRun->contextDir=(UBiDiDirection)(contextLevel&1);
717     pLastIsoRun->contextPos=(UBiDiDirection)lastCcPos;
718 }
719 
720 /* LRI or RLI */
721 static void
bracketProcessLRI_RLI(BracketData * bd,UBiDiLevel level)722 bracketProcessLRI_RLI(BracketData *bd, UBiDiLevel level) {
723     IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
724     int16_t lastLimit;
725     pLastIsoRun->lastBase=ON;
726     lastLimit=pLastIsoRun->limit;
727     bd->isoRunLast++;
728     pLastIsoRun++;
729     pLastIsoRun->start=pLastIsoRun->limit=lastLimit;
730     pLastIsoRun->level=level;
731     pLastIsoRun->lastStrong=pLastIsoRun->lastBase=level&1;
732     pLastIsoRun->contextDir=(UBiDiDirection)(level&1);
733     pLastIsoRun->contextPos=0;
734 }
735 
736 /* PDI */
737 static void
bracketProcessPDI(BracketData * bd)738 bracketProcessPDI(BracketData *bd) {
739     IsoRun *pLastIsoRun;
740     bd->isoRunLast--;
741     pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
742     pLastIsoRun->lastBase=ON;
743 }
744 
745 /* newly found opening bracket: create an openings entry */
746 static UBool                            /* return TRUE if success */
bracketAddOpening(BracketData * bd,UChar match,int32_t position)747 bracketAddOpening(BracketData *bd, UChar match, int32_t position) {
748     IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
749     Opening *pOpening;
750     if(pLastIsoRun->limit>=bd->openingsCount) {  /* no available new entry */
751         UBiDi *pBiDi=bd->pBiDi;
752         if(!getInitialOpeningsMemory(pBiDi, pLastIsoRun->limit * 2))
753             return FALSE;
754         if(bd->openings==bd->simpleOpenings)
755             uprv_memcpy(pBiDi->openingsMemory, bd->simpleOpenings,
756                         SIMPLE_OPENINGS_COUNT * sizeof(Opening));
757         bd->openings=pBiDi->openingsMemory;     /* may have changed */
758         bd->openingsCount=pBiDi->openingsSize / sizeof(Opening);
759     }
760     pOpening=&bd->openings[pLastIsoRun->limit];
761     pOpening->position=position;
762     pOpening->match=match;
763     pOpening->contextDir=pLastIsoRun->contextDir;
764     pOpening->contextPos=pLastIsoRun->contextPos;
765     pOpening->flags=0;
766     pLastIsoRun->limit++;
767     return TRUE;
768 }
769 
770 /* change N0c1 to N0c2 when a preceding bracket is assigned the embedding level */
771 static void
fixN0c(BracketData * bd,int32_t openingIndex,int32_t newPropPosition,DirProp newProp)772 fixN0c(BracketData *bd, int32_t openingIndex, int32_t newPropPosition, DirProp newProp) {
773     /* This function calls itself recursively */
774     IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
775     Opening *qOpening;
776     DirProp *dirProps=bd->pBiDi->dirProps;
777     int32_t k, openingPosition, closingPosition;
778     for(k=openingIndex+1, qOpening=&bd->openings[k]; k<pLastIsoRun->limit; k++, qOpening++) {
779         if(qOpening->match>=0)      /* not an N0c match */
780             continue;
781         if(newPropPosition<qOpening->contextPos)
782             break;
783         if(newPropPosition>=qOpening->position)
784             continue;
785         if(newProp==qOpening->contextDir)
786             break;
787         openingPosition=qOpening->position;
788         dirProps[openingPosition]=newProp;
789         closingPosition=-(qOpening->match);
790         dirProps[closingPosition]=newProp;
791         qOpening->match=0;                      /* prevent further changes */
792         fixN0c(bd, k, openingPosition, newProp);
793         fixN0c(bd, k, closingPosition, newProp);
794     }
795 }
796 
797 /* process closing bracket */
798 static DirProp              /* return L or R if N0b or N0c, ON if N0d */
bracketProcessClosing(BracketData * bd,int32_t openIdx,int32_t position)799 bracketProcessClosing(BracketData *bd, int32_t openIdx, int32_t position) {
800     IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
801     Opening *pOpening, *qOpening;
802     UBiDiDirection direction;
803     UBool stable;
804     DirProp newProp;
805     pOpening=&bd->openings[openIdx];
806     direction=(UBiDiDirection)(pLastIsoRun->level&1);
807     stable=TRUE;            /* assume stable until proved otherwise */
808 
809     /* The stable flag is set when brackets are paired and their
810        level is resolved and cannot be changed by what will be
811        found later in the source string.
812        An unstable match can occur only when applying N0c, where
813        the resolved level depends on the preceding context, and
814        this context may be affected by text occurring later.
815        Example: RTL paragraph containing:  abc[(latin) HEBREW]
816        When the closing parenthesis is encountered, it appears
817        that N0c1 must be applied since 'abc' sets an opposite
818        direction context and both parentheses receive level 2.
819        However, when the closing square bracket is processed,
820        N0b applies because of 'HEBREW' being included within the
821        brackets, thus the square brackets are treated like R and
822        receive level 1. However, this changes the preceding
823        context of the opening parenthesis, and it now appears
824        that N0c2 must be applied to the parentheses rather than
825        N0c1. */
826 
827     if((direction==0 && pOpening->flags&FOUND_L) ||
828        (direction==1 && pOpening->flags&FOUND_R)) {                         /* N0b */
829         newProp=static_cast<DirProp>(direction);
830     }
831     else if(pOpening->flags&(FOUND_L|FOUND_R)) {                            /* N0c */
832         /* it is stable if there is no containing pair or in
833            conditions too complicated and not worth checking */
834         stable=(openIdx==pLastIsoRun->start);
835         if(direction!=pOpening->contextDir)
836             newProp= static_cast<DirProp>(pOpening->contextDir);           /* N0c1 */
837         else
838             newProp= static_cast<DirProp>(direction);                      /* N0c2 */
839     } else {
840         /* forget this and any brackets nested within this pair */
841         pLastIsoRun->limit= static_cast<uint16_t>(openIdx);
842         return ON;                                                          /* N0d */
843     }
844     bd->pBiDi->dirProps[pOpening->position]=newProp;
845     bd->pBiDi->dirProps[position]=newProp;
846     /* Update nested N0c pairs that may be affected */
847     fixN0c(bd, openIdx, pOpening->position, newProp);
848     if(stable) {
849         pLastIsoRun->limit= static_cast<uint16_t>(openIdx); /* forget any brackets nested within this pair */
850         /* remove lower located synonyms if any */
851         while(pLastIsoRun->limit>pLastIsoRun->start &&
852               bd->openings[pLastIsoRun->limit-1].position==pOpening->position)
853             pLastIsoRun->limit--;
854     } else {
855         int32_t k;
856         pOpening->match=-position;
857         /* neutralize lower located synonyms if any */
858         k=openIdx-1;
859         while(k>=pLastIsoRun->start &&
860               bd->openings[k].position==pOpening->position)
861             bd->openings[k--].match=0;
862         /* neutralize any unmatched opening between the current pair;
863            this will also neutralize higher located synonyms if any */
864         for(k=openIdx+1; k<pLastIsoRun->limit; k++) {
865             qOpening=&bd->openings[k];
866             if(qOpening->position>=position)
867                 break;
868             if(qOpening->match>0)
869                 qOpening->match=0;
870         }
871     }
872     return newProp;
873 }
874 
875 /* handle strong characters, digits and candidates for closing brackets */
876 static UBool                            /* return TRUE if success */
bracketProcessChar(BracketData * bd,int32_t position)877 bracketProcessChar(BracketData *bd, int32_t position) {
878     IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
879     DirProp *dirProps, dirProp, newProp;
880     UBiDiLevel level;
881     dirProps=bd->pBiDi->dirProps;
882     dirProp=dirProps[position];
883     if(dirProp==ON) {
884         UChar c, match;
885         int32_t idx;
886         /* First see if it is a matching closing bracket. Hopefully, this is
887            more efficient than checking if it is a closing bracket at all */
888         c=bd->pBiDi->text[position];
889         for(idx=pLastIsoRun->limit-1; idx>=pLastIsoRun->start; idx--) {
890             if(bd->openings[idx].match!=c)
891                 continue;
892             /* We have a match */
893             newProp=bracketProcessClosing(bd, idx, position);
894             if(newProp==ON) {           /* N0d */
895                 c=0;        /* prevent handling as an opening */
896                 break;
897             }
898             pLastIsoRun->lastBase=ON;
899             pLastIsoRun->contextDir=(UBiDiDirection)newProp;
900             pLastIsoRun->contextPos=position;
901             level=bd->pBiDi->levels[position];
902             if(level&UBIDI_LEVEL_OVERRIDE) {    /* X4, X5 */
903                 uint16_t flag;
904                 int32_t i;
905                 newProp=level&1;
906                 pLastIsoRun->lastStrong=newProp;
907                 flag=DIRPROP_FLAG(newProp);
908                 for(i=pLastIsoRun->start; i<idx; i++)
909                     bd->openings[i].flags|=flag;
910                 /* matching brackets are not overridden by LRO/RLO */
911                 bd->pBiDi->levels[position]&=~UBIDI_LEVEL_OVERRIDE;
912             }
913             /* matching brackets are not overridden by LRO/RLO */
914             bd->pBiDi->levels[bd->openings[idx].position]&=~UBIDI_LEVEL_OVERRIDE;
915             return TRUE;
916         }
917         /* We get here only if the ON character is not a matching closing
918            bracket or it is a case of N0d */
919         /* Now see if it is an opening bracket */
920         if(c)
921             match= static_cast<UChar>(u_getBidiPairedBracket(c));    /* get the matching char */
922         else
923             match=0;
924         if(match!=c &&                  /* has a matching char */
925            ubidi_getPairedBracketType(c)==U_BPT_OPEN) { /* opening bracket */
926             /* special case: process synonyms
927                create an opening entry for each synonym */
928             if(match==0x232A) {     /* RIGHT-POINTING ANGLE BRACKET */
929                 if(!bracketAddOpening(bd, 0x3009, position))
930                     return FALSE;
931             }
932             else if(match==0x3009) {         /* RIGHT ANGLE BRACKET */
933                 if(!bracketAddOpening(bd, 0x232A, position))
934                     return FALSE;
935             }
936             if(!bracketAddOpening(bd, match, position))
937                 return FALSE;
938         }
939     }
940     level=bd->pBiDi->levels[position];
941     if(level&UBIDI_LEVEL_OVERRIDE) {    /* X4, X5 */
942         newProp=level&1;
943         if(dirProp!=S && dirProp!=WS && dirProp!=ON)
944             dirProps[position]=newProp;
945         pLastIsoRun->lastBase=newProp;
946         pLastIsoRun->lastStrong=newProp;
947         pLastIsoRun->contextDir=(UBiDiDirection)newProp;
948         pLastIsoRun->contextPos=position;
949     }
950     else if(dirProp<=R || dirProp==AL) {
951         newProp= static_cast<DirProp>(DIR_FROM_STRONG(dirProp));
952         pLastIsoRun->lastBase=dirProp;
953         pLastIsoRun->lastStrong=dirProp;
954         pLastIsoRun->contextDir=(UBiDiDirection)newProp;
955         pLastIsoRun->contextPos=position;
956     }
957     else if(dirProp==EN) {
958         pLastIsoRun->lastBase=EN;
959         if(pLastIsoRun->lastStrong==L) {
960             newProp=L;                  /* W7 */
961             if(!bd->isNumbersSpecial)
962                 dirProps[position]=ENL;
963             pLastIsoRun->contextDir=(UBiDiDirection)L;
964             pLastIsoRun->contextPos=position;
965         }
966         else {
967             newProp=R;                  /* N0 */
968             if(pLastIsoRun->lastStrong==AL)
969                 dirProps[position]=AN;  /* W2 */
970             else
971                 dirProps[position]=ENR;
972             pLastIsoRun->contextDir=(UBiDiDirection)R;
973             pLastIsoRun->contextPos=position;
974         }
975     }
976     else if(dirProp==AN) {
977         newProp=R;                      /* N0 */
978         pLastIsoRun->lastBase=AN;
979         pLastIsoRun->contextDir=(UBiDiDirection)R;
980         pLastIsoRun->contextPos=position;
981     }
982     else if(dirProp==NSM) {
983         /* if the last real char was ON, change NSM to ON so that it
984            will stay ON even if the last real char is a bracket which
985            may be changed to L or R */
986         newProp=pLastIsoRun->lastBase;
987         if(newProp==ON)
988             dirProps[position]=newProp;
989     }
990     else {
991         newProp=dirProp;
992         pLastIsoRun->lastBase=dirProp;
993     }
994     if(newProp<=R || newProp==AL) {
995         int32_t i;
996         uint16_t flag=DIRPROP_FLAG(DIR_FROM_STRONG(newProp));
997         for(i=pLastIsoRun->start; i<pLastIsoRun->limit; i++)
998             if(position>bd->openings[i].position)
999                 bd->openings[i].flags|=flag;
1000     }
1001     return TRUE;
1002 }
1003 
1004 /* perform (X1)..(X9) ------------------------------------------------------- */
1005 
1006 /* determine if the text is mixed-directional or single-directional */
1007 static UBiDiDirection
directionFromFlags(UBiDi * pBiDi)1008 directionFromFlags(UBiDi *pBiDi) {
1009     Flags flags=pBiDi->flags;
1010     /* if the text contains AN and neutrals, then some neutrals may become RTL */
1011     if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) {
1012         return UBIDI_LTR;
1013     } else if(!(flags&MASK_LTR)) {
1014         return UBIDI_RTL;
1015     } else {
1016         return UBIDI_MIXED;
1017     }
1018 }
1019 
1020 /*
1021  * Resolve the explicit levels as specified by explicit embedding codes.
1022  * Recalculate the flags to have them reflect the real properties
1023  * after taking the explicit embeddings into account.
1024  *
1025  * The BiDi algorithm is designed to result in the same behavior whether embedding
1026  * levels are externally specified (from "styled text", supposedly the preferred
1027  * method) or set by explicit embedding codes (LRx, RLx, PDF, FSI, PDI) in the plain text.
1028  * That is why (X9) instructs to remove all not-isolate explicit codes (and BN).
1029  * However, in a real implementation, the removal of these codes and their index
1030  * positions in the plain text is undesirable since it would result in
1031  * reallocated, reindexed text.
1032  * Instead, this implementation leaves the codes in there and just ignores them
1033  * in the subsequent processing.
1034  * In order to get the same reordering behavior, positions with a BN or a not-isolate
1035  * explicit embedding code just get the same level assigned as the last "real"
1036  * character.
1037  *
1038  * Some implementations, not this one, then overwrite some of these
1039  * directionality properties at "real" same-level-run boundaries by
1040  * L or R codes so that the resolution of weak types can be performed on the
1041  * entire paragraph at once instead of having to parse it once more and
1042  * perform that resolution on same-level-runs.
1043  * This limits the scope of the implicit rules in effectively
1044  * the same way as the run limits.
1045  *
1046  * Instead, this implementation does not modify these codes, except for
1047  * paired brackets whose properties (ON) may be replaced by L or R.
1048  * On one hand, the paragraph has to be scanned for same-level-runs, but
1049  * on the other hand, this saves another loop to reset these codes,
1050  * or saves making and modifying a copy of dirProps[].
1051  *
1052  *
1053  * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm.
1054  *
1055  *
1056  * Handling the stack of explicit levels (Xn):
1057  *
1058  * With the BiDi stack of explicit levels, as pushed with each
1059  * LRE, RLE, LRO, RLO, LRI, RLI and FSI and popped with each PDF and PDI,
1060  * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL.
1061  *
1062  * In order to have a correct push-pop semantics even in the case of overflows,
1063  * overflow counters and a valid isolate counter are used as described in UAX#9
1064  * section 3.3.2 "Explicit Levels and Directions".
1065  *
1066  * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
1067  *
1068  * Returns normally the direction; -1 if there was a memory shortage
1069  *
1070  */
1071 static UBiDiDirection
resolveExplicitLevels(UBiDi * pBiDi,UErrorCode * pErrorCode)1072 resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
1073     DirProp *dirProps=pBiDi->dirProps;
1074     UBiDiLevel *levels=pBiDi->levels;
1075     const UChar *text=pBiDi->text;
1076 
1077     int32_t i=0, length=pBiDi->length;
1078     Flags flags=pBiDi->flags;       /* collect all directionalities in the text */
1079     DirProp dirProp;
1080     UBiDiLevel level=GET_PARALEVEL(pBiDi, 0);
1081     UBiDiDirection direction;
1082     pBiDi->isolateCount=0;
1083 
1084     if(U_FAILURE(*pErrorCode)) { return UBIDI_LTR; }
1085 
1086     /* determine if the text is mixed-directional or single-directional */
1087     direction=directionFromFlags(pBiDi);
1088 
1089     /* we may not need to resolve any explicit levels */
1090     if((direction!=UBIDI_MIXED)) {
1091         /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
1092         return direction;
1093     }
1094     if(pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL) {
1095         /* inverse BiDi: mixed, but all characters are at the same embedding level */
1096         /* set all levels to the paragraph level */
1097         int32_t paraIndex, start, limit;
1098         for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) {
1099             if(paraIndex==0)
1100                 start=0;
1101             else
1102                 start=pBiDi->paras[paraIndex-1].limit;
1103             limit=pBiDi->paras[paraIndex].limit;
1104             level= static_cast<UBiDiLevel>(pBiDi->paras[paraIndex].level);
1105             for(i=start; i<limit; i++)
1106                 levels[i]=level;
1107         }
1108         return direction;   /* no bracket matching for inverse BiDi */
1109     }
1110     if(!(flags&(MASK_EXPLICIT|MASK_ISO))) {
1111         /* no embeddings, set all levels to the paragraph level */
1112         /* we still have to perform bracket matching */
1113         int32_t paraIndex, start, limit;
1114         BracketData bracketData;
1115         bracketInit(pBiDi, &bracketData);
1116         for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) {
1117             if(paraIndex==0)
1118                 start=0;
1119             else
1120                 start=pBiDi->paras[paraIndex-1].limit;
1121             limit=pBiDi->paras[paraIndex].limit;
1122             level= static_cast<UBiDiLevel>(pBiDi->paras[paraIndex].level);
1123             for(i=start; i<limit; i++) {
1124                 levels[i]=level;
1125                 dirProp=dirProps[i];
1126                 if(dirProp==BN)
1127                     continue;
1128                 if(dirProp==B) {
1129                     if((i+1)<length) {
1130                         if(text[i]==CR && text[i+1]==LF)
1131                             continue;   /* skip CR when followed by LF */
1132                         bracketProcessB(&bracketData, level);
1133                     }
1134                     continue;
1135                 }
1136                 if(!bracketProcessChar(&bracketData, i)) {
1137                     *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1138                     return UBIDI_LTR;
1139                 }
1140             }
1141         }
1142         return direction;
1143     }
1144     {
1145         /* continue to perform (Xn) */
1146 
1147         /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
1148         /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */
1149         UBiDiLevel embeddingLevel=level, newLevel;
1150         UBiDiLevel previousLevel=level;     /* previous level for regular (not CC) characters */
1151         int32_t lastCcPos=0;                /* index of last effective LRx,RLx, PDx */
1152 
1153         /* The following stack remembers the embedding level and the ISOLATE flag of level runs.
1154            stackLast points to its current entry. */
1155         uint16_t stack[UBIDI_MAX_EXPLICIT_LEVEL+2];   /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL
1156                                                         but we need one more entry as base */
1157         uint32_t stackLast=0;
1158         int32_t overflowIsolateCount=0;
1159         int32_t overflowEmbeddingCount=0;
1160         int32_t validIsolateCount=0;
1161         BracketData bracketData;
1162         bracketInit(pBiDi, &bracketData);
1163         stack[0]=level;     /* initialize base entry to para level, no override, no isolate */
1164 
1165         /* recalculate the flags */
1166         flags=0;
1167 
1168         for(i=0; i<length; ++i) {
1169             dirProp=dirProps[i];
1170             switch(dirProp) {
1171             case LRE:
1172             case RLE:
1173             case LRO:
1174             case RLO:
1175                 /* (X2, X3, X4, X5) */
1176                 flags|=DIRPROP_FLAG(BN);
1177                 levels[i]=previousLevel;
1178                 if (dirProp==LRE || dirProp==LRO)
1179                     /* least greater even level */
1180                     newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1));
1181                 else
1182                     /* least greater odd level */
1183                     newLevel=(UBiDiLevel)((NO_OVERRIDE(embeddingLevel)+1)|1);
1184                 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 &&
1185                                                          overflowEmbeddingCount==0) {
1186                     lastCcPos=i;
1187                     embeddingLevel=newLevel;
1188                     if(dirProp==LRO || dirProp==RLO)
1189                         embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
1190                     stackLast++;
1191                     stack[stackLast]=embeddingLevel;
1192                     /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE and RLE
1193                        since this has already been done for newLevel which is
1194                        the source for embeddingLevel.
1195                      */
1196                 } else {
1197                     if(overflowIsolateCount==0)
1198                         overflowEmbeddingCount++;
1199                 }
1200                 break;
1201             case PDF:
1202                 /* (X7) */
1203                 flags|=DIRPROP_FLAG(BN);
1204                 levels[i]=previousLevel;
1205                 /* handle all the overflow cases first */
1206                 if(overflowIsolateCount) {
1207                     break;
1208                 }
1209                 if(overflowEmbeddingCount) {
1210                     overflowEmbeddingCount--;
1211                     break;
1212                 }
1213                 if(stackLast>0 && stack[stackLast]<ISOLATE) {   /* not an isolate entry */
1214                     lastCcPos=i;
1215                     stackLast--;
1216                     embeddingLevel=(UBiDiLevel)stack[stackLast];
1217                 }
1218                 break;
1219             case LRI:
1220             case RLI:
1221                 flags|=(DIRPROP_FLAG(ON)|DIRPROP_FLAG_LR(embeddingLevel));
1222                 levels[i]=NO_OVERRIDE(embeddingLevel);
1223                 if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
1224                     bracketProcessBoundary(&bracketData, lastCcPos,
1225                                            previousLevel, embeddingLevel);
1226                     flags|=DIRPROP_FLAG_MULTI_RUNS;
1227                 }
1228                 previousLevel=embeddingLevel;
1229                 /* (X5a, X5b) */
1230                 if(dirProp==LRI)
1231                     /* least greater even level */
1232                     newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1));
1233                 else
1234                     /* least greater odd level */
1235                     newLevel=(UBiDiLevel)((NO_OVERRIDE(embeddingLevel)+1)|1);
1236                 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 &&
1237                                                          overflowEmbeddingCount==0) {
1238                     flags|=DIRPROP_FLAG(dirProp);
1239                     lastCcPos=i;
1240                     validIsolateCount++;
1241                     if(validIsolateCount>pBiDi->isolateCount)
1242                         pBiDi->isolateCount=validIsolateCount;
1243                     embeddingLevel=newLevel;
1244                     /* we can increment stackLast without checking because newLevel
1245                        will exceed UBIDI_MAX_EXPLICIT_LEVEL before stackLast overflows */
1246                     stackLast++;
1247                     stack[stackLast]=embeddingLevel+ISOLATE;
1248                     bracketProcessLRI_RLI(&bracketData, embeddingLevel);
1249                 } else {
1250                     /* make it WS so that it is handled by adjustWSLevels() */
1251                     dirProps[i]=WS;
1252                     overflowIsolateCount++;
1253                 }
1254                 break;
1255             case PDI:
1256                 if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
1257                     bracketProcessBoundary(&bracketData, lastCcPos,
1258                                            previousLevel, embeddingLevel);
1259                     flags|=DIRPROP_FLAG_MULTI_RUNS;
1260                 }
1261                 /* (X6a) */
1262                 if(overflowIsolateCount) {
1263                     overflowIsolateCount--;
1264                     /* make it WS so that it is handled by adjustWSLevels() */
1265                     dirProps[i]=WS;
1266                 }
1267                 else if(validIsolateCount) {
1268                     flags|=DIRPROP_FLAG(PDI);
1269                     lastCcPos=i;
1270                     overflowEmbeddingCount=0;
1271                     while(stack[stackLast]<ISOLATE) /* pop embedding entries */
1272                         stackLast--;                /* until the last isolate entry */
1273                     stackLast--;                    /* pop also the last isolate entry */
1274                     validIsolateCount--;
1275                     bracketProcessPDI(&bracketData);
1276                 } else
1277                     /* make it WS so that it is handled by adjustWSLevels() */
1278                     dirProps[i]=WS;
1279                 embeddingLevel=(UBiDiLevel)stack[stackLast]&~ISOLATE;
1280                 flags|=(DIRPROP_FLAG(ON)|DIRPROP_FLAG_LR(embeddingLevel));
1281                 previousLevel=embeddingLevel;
1282                 levels[i]=NO_OVERRIDE(embeddingLevel);
1283                 break;
1284             case B:
1285                 flags|=DIRPROP_FLAG(B);
1286                 levels[i]=GET_PARALEVEL(pBiDi, i);
1287                 if((i+1)<length) {
1288                     if(text[i]==CR && text[i+1]==LF)
1289                         break;          /* skip CR when followed by LF */
1290                     overflowEmbeddingCount=overflowIsolateCount=0;
1291                     validIsolateCount=0;
1292                     stackLast=0;
1293                     previousLevel=embeddingLevel=GET_PARALEVEL(pBiDi, i+1);
1294                     stack[0]=embeddingLevel; /* initialize base entry to para level, no override, no isolate */
1295                     bracketProcessB(&bracketData, embeddingLevel);
1296                 }
1297                 break;
1298             case BN:
1299                 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
1300                 /* they will get their levels set correctly in adjustWSLevels() */
1301                 levels[i]=previousLevel;
1302                 flags|=DIRPROP_FLAG(BN);
1303                 break;
1304             default:
1305                 /* all other types are normal characters and get the "real" level */
1306                 if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
1307                     bracketProcessBoundary(&bracketData, lastCcPos,
1308                                            previousLevel, embeddingLevel);
1309                     flags|=DIRPROP_FLAG_MULTI_RUNS;
1310                     if(embeddingLevel&UBIDI_LEVEL_OVERRIDE)
1311                         flags|=DIRPROP_FLAG_O(embeddingLevel);
1312                     else
1313                         flags|=DIRPROP_FLAG_E(embeddingLevel);
1314                 }
1315                 previousLevel=embeddingLevel;
1316                 levels[i]=embeddingLevel;
1317                 if(!bracketProcessChar(&bracketData, i))
1318                     return (UBiDiDirection)-1;
1319                 /* the dirProp may have been changed in bracketProcessChar() */
1320                 flags|=DIRPROP_FLAG(dirProps[i]);
1321                 break;
1322             }
1323         }
1324         if(flags&MASK_EMBEDDING)
1325             flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
1326         if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B)))
1327             flags|=DIRPROP_FLAG(L);
1328         /* again, determine if the text is mixed-directional or single-directional */
1329         pBiDi->flags=flags;
1330         direction=directionFromFlags(pBiDi);
1331     }
1332     return direction;
1333 }
1334 
1335 /*
1336  * Use a pre-specified embedding levels array:
1337  *
1338  * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
1339  * ignore all explicit codes (X9),
1340  * and check all the preset levels.
1341  *
1342  * Recalculate the flags to have them reflect the real properties
1343  * after taking the explicit embeddings into account.
1344  */
1345 static UBiDiDirection
checkExplicitLevels(UBiDi * pBiDi,UErrorCode * pErrorCode)1346 checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
1347     DirProp *dirProps=pBiDi->dirProps;
1348     UBiDiLevel *levels=pBiDi->levels;
1349     int32_t isolateCount=0;
1350 
1351     int32_t length=pBiDi->length;
1352     Flags flags=0;  /* collect all directionalities in the text */
1353     pBiDi->isolateCount=0;
1354 
1355     int32_t currentParaIndex = 0;
1356     int32_t currentParaLimit = pBiDi->paras[0].limit;
1357     int32_t currentParaLevel = pBiDi->paraLevel;
1358 
1359     for(int32_t i=0; i<length; ++i) {
1360         UBiDiLevel level=levels[i];
1361         DirProp dirProp=dirProps[i];
1362         if(dirProp==LRI || dirProp==RLI) {
1363             isolateCount++;
1364             if(isolateCount>pBiDi->isolateCount)
1365                 pBiDi->isolateCount=isolateCount;
1366         }
1367         else if(dirProp==PDI)
1368             isolateCount--;
1369         else if(dirProp==B)
1370             isolateCount=0;
1371 
1372         // optimized version of  int32_t currentParaLevel = GET_PARALEVEL(pBiDi, i);
1373         if (pBiDi->defaultParaLevel != 0 &&
1374                 i == currentParaLimit && (currentParaIndex + 1) < pBiDi->paraCount) {
1375             currentParaLevel = pBiDi->paras[++currentParaIndex].level;
1376             currentParaLimit = pBiDi->paras[currentParaIndex].limit;
1377         }
1378 
1379         UBiDiLevel overrideFlag = level & UBIDI_LEVEL_OVERRIDE;
1380         level &= ~UBIDI_LEVEL_OVERRIDE;
1381         if (level < currentParaLevel || UBIDI_MAX_EXPLICIT_LEVEL < level) {
1382             if (level == 0) {
1383                 if (dirProp == B) {
1384                     // Paragraph separators are ok with explicit level 0.
1385                     // Prevents reordering of paragraphs.
1386                 } else {
1387                     // Treat explicit level 0 as a wildcard for the paragraph level.
1388                     // Avoid making the caller guess what the paragraph level would be.
1389                     level = (UBiDiLevel)currentParaLevel;
1390                     levels[i] = level | overrideFlag;
1391                 }
1392             } else {
1393                 // 1 <= level < currentParaLevel or UBIDI_MAX_EXPLICIT_LEVEL < level
1394                 /* level out of bounds */
1395                 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1396                 return UBIDI_LTR;
1397             }
1398         }
1399         if (overrideFlag != 0) {
1400             /* keep the override flag in levels[i] but adjust the flags */
1401             flags|=DIRPROP_FLAG_O(level);
1402         } else {
1403             /* set the flags */
1404             flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp);
1405         }
1406     }
1407     if(flags&MASK_EMBEDDING)
1408         flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
1409     /* determine if the text is mixed-directional or single-directional */
1410     pBiDi->flags=flags;
1411     return directionFromFlags(pBiDi);
1412 }
1413 
1414 /******************************************************************
1415  The Properties state machine table
1416 *******************************************************************
1417 
1418  All table cells are 8 bits:
1419       bits 0..4:  next state
1420       bits 5..7:  action to perform (if > 0)
1421 
1422  Cells may be of format "n" where n represents the next state
1423  (except for the rightmost column).
1424  Cells may also be of format "s(x,y)" where x represents an action
1425  to perform and y represents the next state.
1426 
1427 *******************************************************************
1428  Definitions and type for properties state table
1429 *******************************************************************
1430 */
1431 #define IMPTABPROPS_COLUMNS 16
1432 #define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1)
1433 #define GET_STATEPROPS(cell) ((cell)&0x1f)
1434 #define GET_ACTIONPROPS(cell) ((cell)>>5)
1435 #define s(action, newState) ((uint8_t)(newState+(action<<5)))
1436 
1437 static const uint8_t groupProp[] =          /* dirProp regrouped */
1438 {
1439 /*  L   R   EN  ES  ET  AN  CS  B   S   WS  ON  LRE LRO AL  RLE RLO PDF NSM BN  FSI LRI RLI PDI ENL ENR */
1440     0,  1,  2,  7,  8,  3,  9,  6,  5,  4,  4,  10, 10, 12, 10, 10, 10, 11, 10, 4,  4,  4,  4,  13, 14
1441 };
1442 enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirProp_S=5, DirProp_B=6 }; /* reduced dirProp */
1443 
1444 /******************************************************************
1445 
1446       PROPERTIES  STATE  TABLE
1447 
1448  In table impTabProps,
1449       - the ON column regroups ON and WS, FSI, RLI, LRI and PDI
1450       - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF
1451       - the Res column is the reduced property assigned to a run
1452 
1453  Action 1: process current run1, init new run1
1454         2: init new run2
1455         3: process run1, process run2, init new run1
1456         4: process run1, set run1=run2, init new run2
1457 
1458  Notes:
1459   1) This table is used in resolveImplicitLevels().
1460   2) This table triggers actions when there is a change in the Bidi
1461      property of incoming characters (action 1).
1462   3) Most such property sequences are processed immediately (in
1463      fact, passed to processPropertySeq().
1464   4) However, numbers are assembled as one sequence. This means
1465      that undefined situations (like CS following digits, until
1466      it is known if the next char will be a digit) are held until
1467      following chars define them.
1468      Example: digits followed by CS, then comes another CS or ON;
1469               the digits will be processed, then the CS assigned
1470               as the start of an ON sequence (action 3).
1471   5) There are cases where more than one sequence must be
1472      processed, for instance digits followed by CS followed by L:
1473      the digits must be processed as one sequence, and the CS
1474      must be processed as an ON sequence, all this before starting
1475      assembling chars for the opening L sequence.
1476 
1477 
1478 */
1479 static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] =
1480 {
1481 /*                        L ,     R ,    EN ,    AN ,    ON ,     S ,     B ,    ES ,    ET ,    CS ,    BN ,   NSM ,    AL ,   ENL ,   ENR , Res */
1482 /* 0 Init        */ {     1 ,     2 ,     4 ,     5 ,     7 ,    15 ,    17 ,     7 ,     9 ,     7 ,     0 ,     7 ,     3 ,    18 ,    21 , DirProp_ON },
1483 /* 1 L           */ {     1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7),     1 ,     1 , s(1,3),s(1,18),s(1,21),  DirProp_L },
1484 /* 2 R           */ { s(1,1),     2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7),     2 ,     2 , s(1,3),s(1,18),s(1,21),  DirProp_R },
1485 /* 3 AL          */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8),     3 ,     3 ,     3 ,s(1,18),s(1,21),  DirProp_R },
1486 /* 4 EN          */ { s(1,1), s(1,2),     4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10),    11 ,s(2,10),     4 ,     4 , s(1,3),    18 ,    21 , DirProp_EN },
1487 /* 5 AN          */ { s(1,1), s(1,2), s(1,4),     5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12),     5 ,     5 , s(1,3),s(1,18),s(1,21), DirProp_AN },
1488 /* 6 AL:EN/AN    */ { s(1,1), s(1,2),     6 ,     6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13),     6 ,     6 , s(1,3),    18 ,    21 , DirProp_AN },
1489 /* 7 ON          */ { s(1,1), s(1,2), s(1,4), s(1,5),     7 ,s(1,15),s(1,17),     7 ,s(2,14),     7 ,     7 ,     7 , s(1,3),s(1,18),s(1,21), DirProp_ON },
1490 /* 8 AL:ON       */ { s(1,1), s(1,2), s(1,6), s(1,6),     8 ,s(1,16),s(1,17),     8 ,     8 ,     8 ,     8 ,     8 , s(1,3),s(1,18),s(1,21), DirProp_ON },
1491 /* 9 ET          */ { s(1,1), s(1,2),     4 , s(1,5),     7 ,s(1,15),s(1,17),     7 ,     9 ,     7 ,     9 ,     9 , s(1,3),    18 ,    21 , DirProp_ON },
1492 /*10 EN+ES/CS    */ { s(3,1), s(3,2),     4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    10 , s(4,7), s(3,3),    18 ,    21 , DirProp_EN },
1493 /*11 EN+ET       */ { s(1,1), s(1,2),     4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7),    11 , s(1,7),    11 ,    11 , s(1,3),    18 ,    21 , DirProp_EN },
1494 /*12 AN+CS       */ { s(3,1), s(3,2), s(3,4),     5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    12 , s(4,7), s(3,3),s(3,18),s(3,21), DirProp_AN },
1495 /*13 AL:EN/AN+CS */ { s(3,1), s(3,2),     6 ,     6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8),    13 , s(4,8), s(3,3),    18 ,    21 , DirProp_AN },
1496 /*14 ON+ET       */ { s(1,1), s(1,2), s(4,4), s(1,5),     7 ,s(1,15),s(1,17),     7 ,    14 ,     7 ,    14 ,    14 , s(1,3),s(4,18),s(4,21), DirProp_ON },
1497 /*15 S           */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),    15 ,s(1,17), s(1,7), s(1,9), s(1,7),    15 , s(1,7), s(1,3),s(1,18),s(1,21),  DirProp_S },
1498 /*16 AL:S        */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),    16 ,s(1,17), s(1,8), s(1,8), s(1,8),    16 , s(1,8), s(1,3),s(1,18),s(1,21),  DirProp_S },
1499 /*17 B           */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),    17 , s(1,7), s(1,9), s(1,7),    17 , s(1,7), s(1,3),s(1,18),s(1,21),  DirProp_B },
1500 /*18 ENL         */ { s(1,1), s(1,2),    18 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,19),    20 ,s(2,19),    18 ,    18 , s(1,3),    18 ,    21 ,  DirProp_L },
1501 /*19 ENL+ES/CS   */ { s(3,1), s(3,2),    18 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    19 , s(4,7), s(3,3),    18 ,    21 ,  DirProp_L },
1502 /*20 ENL+ET      */ { s(1,1), s(1,2),    18 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7),    20 , s(1,7),    20 ,    20 , s(1,3),    18 ,    21 ,  DirProp_L },
1503 /*21 ENR         */ { s(1,1), s(1,2),    21 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,22),    23 ,s(2,22),    21 ,    21 , s(1,3),    18 ,    21 , DirProp_AN },
1504 /*22 ENR+ES/CS   */ { s(3,1), s(3,2),    21 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    22 , s(4,7), s(3,3),    18 ,    21 , DirProp_AN },
1505 /*23 ENR+ET      */ { s(1,1), s(1,2),    21 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7),    23 , s(1,7),    23 ,    23 , s(1,3),    18 ,    21 , DirProp_AN }
1506 };
1507 
1508 /*  we must undef macro s because the levels tables have a different
1509  *  structure (4 bits for action and 4 bits for next state.
1510  */
1511 #undef s
1512 
1513 /******************************************************************
1514  The levels state machine tables
1515 *******************************************************************
1516 
1517  All table cells are 8 bits:
1518       bits 0..3:  next state
1519       bits 4..7:  action to perform (if > 0)
1520 
1521  Cells may be of format "n" where n represents the next state
1522  (except for the rightmost column).
1523  Cells may also be of format "s(x,y)" where x represents an action
1524  to perform and y represents the next state.
1525 
1526  This format limits each table to 16 states each and to 15 actions.
1527 
1528 *******************************************************************
1529  Definitions and type for levels state tables
1530 *******************************************************************
1531 */
1532 #define IMPTABLEVELS_COLUMNS (DirProp_B + 2)
1533 #define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1)
1534 #define GET_STATE(cell) ((cell)&0x0f)
1535 #define GET_ACTION(cell) ((cell)>>4)
1536 #define s(action, newState) ((uint8_t)(newState+(action<<4)))
1537 
1538 typedef uint8_t ImpTab[][IMPTABLEVELS_COLUMNS];
1539 typedef uint8_t ImpAct[];
1540 
1541 /* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct,
1542  * instead of having a pair of ImpTab and a pair of ImpAct.
1543  */
1544 typedef struct ImpTabPair {
1545     const void * pImpTab[2];
1546     const void * pImpAct[2];
1547 } ImpTabPair;
1548 
1549 /******************************************************************
1550 
1551       LEVELS  STATE  TABLES
1552 
1553  In all levels state tables,
1554       - state 0 is the initial state
1555       - the Res column is the increment to add to the text level
1556         for this property sequence.
1557 
1558  The impAct arrays for each table of a pair map the local action
1559  numbers of the table to the total list of actions. For instance,
1560  action 2 in a given table corresponds to the action number which
1561  appears in entry [2] of the impAct array for that table.
1562  The first entry of all impAct arrays must be 0.
1563 
1564  Action 1: init conditional sequence
1565         2: prepend conditional sequence to current sequence
1566         3: set ON sequence to new level - 1
1567         4: init EN/AN/ON sequence
1568         5: fix EN/AN/ON sequence followed by R
1569         6: set previous level sequence to level 2
1570 
1571  Notes:
1572   1) These tables are used in processPropertySeq(). The input
1573      is property sequences as determined by resolveImplicitLevels.
1574   2) Most such property sequences are processed immediately
1575      (levels are assigned).
1576   3) However, some sequences cannot be assigned a final level till
1577      one or more following sequences are received. For instance,
1578      ON following an R sequence within an even-level paragraph.
1579      If the following sequence is R, the ON sequence will be
1580      assigned basic run level+1, and so will the R sequence.
1581   4) S is generally handled like ON, since its level will be fixed
1582      to paragraph level in adjustWSLevels().
1583 
1584 */
1585 
1586 static const ImpTab impTabL_DEFAULT =   /* Even paragraph level */
1587 /*  In this table, conditional sequences receive the lower possible level
1588     until proven otherwise.
1589 */
1590 {
1591 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1592 /* 0 : init       */ {     0 ,     1 ,     0 ,     2 ,     0 ,     0 ,     0 ,  0 },
1593 /* 1 : R          */ {     0 ,     1 ,     3 ,     3 , s(1,4), s(1,4),     0 ,  1 },
1594 /* 2 : AN         */ {     0 ,     1 ,     0 ,     2 , s(1,5), s(1,5),     0 ,  2 },
1595 /* 3 : R+EN/AN    */ {     0 ,     1 ,     3 ,     3 , s(1,4), s(1,4),     0 ,  2 },
1596 /* 4 : R+ON       */ {     0 , s(2,1), s(3,3), s(3,3),     4 ,     4 ,     0 ,  0 },
1597 /* 5 : AN+ON      */ {     0 , s(2,1),     0 , s(3,2),     5 ,     5 ,     0 ,  0 }
1598 };
1599 static const ImpTab impTabR_DEFAULT =   /* Odd  paragraph level */
1600 /*  In this table, conditional sequences receive the lower possible level
1601     until proven otherwise.
1602 */
1603 {
1604 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1605 /* 0 : init       */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  0 },
1606 /* 1 : L          */ {     1 ,     0 ,     1 ,     3 , s(1,4), s(1,4),     0 ,  1 },
1607 /* 2 : EN/AN      */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  1 },
1608 /* 3 : L+AN       */ {     1 ,     0 ,     1 ,     3 ,     5 ,     5 ,     0 ,  1 },
1609 /* 4 : L+ON       */ { s(2,1),     0 , s(2,1),     3 ,     4 ,     4 ,     0 ,  0 },
1610 /* 5 : L+AN+ON    */ {     1 ,     0 ,     1 ,     3 ,     5 ,     5 ,     0 ,  0 }
1611 };
1612 static const ImpAct impAct0 = {0,1,2,3,4};
1613 static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT,
1614                                            &impTabR_DEFAULT},
1615                                           {&impAct0, &impAct0}};
1616 
1617 static const ImpTab impTabL_NUMBERS_SPECIAL =   /* Even paragraph level */
1618 /*  In this table, conditional sequences receive the lower possible level
1619     until proven otherwise.
1620 */
1621 {
1622 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1623 /* 0 : init       */ {     0 ,     2 , s(1,1), s(1,1),     0 ,     0 ,     0 ,  0 },
1624 /* 1 : L+EN/AN    */ {     0 , s(4,2),     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1625 /* 2 : R          */ {     0 ,     2 ,     4 ,     4 , s(1,3), s(1,3),     0 ,  1 },
1626 /* 3 : R+ON       */ {     0 , s(2,2), s(3,4), s(3,4),     3 ,     3 ,     0 ,  0 },
1627 /* 4 : R+EN/AN    */ {     0 ,     2 ,     4 ,     4 , s(1,3), s(1,3),     0 ,  2 }
1628 };
1629 static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL,
1630                                                    &impTabR_DEFAULT},
1631                                                   {&impAct0, &impAct0}};
1632 
1633 static const ImpTab impTabL_GROUP_NUMBERS_WITH_R =
1634 /*  In this table, EN/AN+ON sequences receive levels as if associated with R
1635     until proven that there is L or sor/eor on both sides. AN is handled like EN.
1636 */
1637 {
1638 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1639 /* 0 init         */ {     0 ,     3 , s(1,1), s(1,1),     0 ,     0 ,     0 ,  0 },
1640 /* 1 EN/AN        */ { s(2,0),     3 ,     1 ,     1 ,     2 , s(2,0), s(2,0),  2 },
1641 /* 2 EN/AN+ON     */ { s(2,0),     3 ,     1 ,     1 ,     2 , s(2,0), s(2,0),  1 },
1642 /* 3 R            */ {     0 ,     3 ,     5 ,     5 , s(1,4),     0 ,     0 ,  1 },
1643 /* 4 R+ON         */ { s(2,0),     3 ,     5 ,     5 ,     4 , s(2,0), s(2,0),  1 },
1644 /* 5 R+EN/AN      */ {     0 ,     3 ,     5 ,     5 , s(1,4),     0 ,     0 ,  2 }
1645 };
1646 static const ImpTab impTabR_GROUP_NUMBERS_WITH_R =
1647 /*  In this table, EN/AN+ON sequences receive levels as if associated with R
1648     until proven that there is L on both sides. AN is handled like EN.
1649 */
1650 {
1651 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1652 /* 0 init         */ {     2 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1653 /* 1 EN/AN        */ {     2 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  1 },
1654 /* 2 L            */ {     2 ,     0 , s(1,4), s(1,4), s(1,3),     0 ,     0 ,  1 },
1655 /* 3 L+ON         */ { s(2,2),     0 ,     4 ,     4 ,     3 ,     0 ,     0 ,  0 },
1656 /* 4 L+EN/AN      */ { s(2,2),     0 ,     4 ,     4 ,     3 ,     0 ,     0 ,  1 }
1657 };
1658 static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R = {
1659                         {&impTabL_GROUP_NUMBERS_WITH_R,
1660                          &impTabR_GROUP_NUMBERS_WITH_R},
1661                         {&impAct0, &impAct0}};
1662 
1663 
1664 static const ImpTab impTabL_INVERSE_NUMBERS_AS_L =
1665 /*  This table is identical to the Default LTR table except that EN and AN are
1666     handled like L.
1667 */
1668 {
1669 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1670 /* 0 : init       */ {     0 ,     1 ,     0 ,     0 ,     0 ,     0 ,     0 ,  0 },
1671 /* 1 : R          */ {     0 ,     1 ,     0 ,     0 , s(1,4), s(1,4),     0 ,  1 },
1672 /* 2 : AN         */ {     0 ,     1 ,     0 ,     0 , s(1,5), s(1,5),     0 ,  2 },
1673 /* 3 : R+EN/AN    */ {     0 ,     1 ,     0 ,     0 , s(1,4), s(1,4),     0 ,  2 },
1674 /* 4 : R+ON       */ { s(2,0),     1 , s(2,0), s(2,0),     4 ,     4 , s(2,0),  1 },
1675 /* 5 : AN+ON      */ { s(2,0),     1 , s(2,0), s(2,0),     5 ,     5 , s(2,0),  1 }
1676 };
1677 static const ImpTab impTabR_INVERSE_NUMBERS_AS_L =
1678 /*  This table is identical to the Default RTL table except that EN and AN are
1679     handled like L.
1680 */
1681 {
1682 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1683 /* 0 : init       */ {     1 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1684 /* 1 : L          */ {     1 ,     0 ,     1 ,     1 , s(1,4), s(1,4),     0 ,  1 },
1685 /* 2 : EN/AN      */ {     1 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  1 },
1686 /* 3 : L+AN       */ {     1 ,     0 ,     1 ,     1 ,     5 ,     5 ,     0 ,  1 },
1687 /* 4 : L+ON       */ { s(2,1),     0 , s(2,1), s(2,1),     4 ,     4 ,     0 ,  0 },
1688 /* 5 : L+AN+ON    */ {     1 ,     0 ,     1 ,     1 ,     5 ,     5 ,     0 ,  0 }
1689 };
1690 static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L = {
1691                         {&impTabL_INVERSE_NUMBERS_AS_L,
1692                          &impTabR_INVERSE_NUMBERS_AS_L},
1693                         {&impAct0, &impAct0}};
1694 
1695 static const ImpTab impTabR_INVERSE_LIKE_DIRECT =   /* Odd  paragraph level */
1696 /*  In this table, conditional sequences receive the lower possible level
1697     until proven otherwise.
1698 */
1699 {
1700 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1701 /* 0 : init       */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  0 },
1702 /* 1 : L          */ {     1 ,     0 ,     1 ,     2 , s(1,3), s(1,3),     0 ,  1 },
1703 /* 2 : EN/AN      */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  1 },
1704 /* 3 : L+ON       */ { s(2,1), s(3,0),     6 ,     4 ,     3 ,     3 , s(3,0),  0 },
1705 /* 4 : L+ON+AN    */ { s(2,1), s(3,0),     6 ,     4 ,     5 ,     5 , s(3,0),  3 },
1706 /* 5 : L+AN+ON    */ { s(2,1), s(3,0),     6 ,     4 ,     5 ,     5 , s(3,0),  2 },
1707 /* 6 : L+ON+EN    */ { s(2,1), s(3,0),     6 ,     4 ,     3 ,     3 , s(3,0),  1 }
1708 };
1709 static const ImpAct impAct1 = {0,1,13,14};
1710 /* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc"
1711  */
1712 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = {
1713                         {&impTabL_DEFAULT,
1714                          &impTabR_INVERSE_LIKE_DIRECT},
1715                         {&impAct0, &impAct1}};
1716 
1717 static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS =
1718 /*  The case handled in this table is (visually):  R EN L
1719 */
1720 {
1721 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1722 /* 0 : init       */ {     0 , s(6,3),     0 ,     1 ,     0 ,     0 ,     0 ,  0 },
1723 /* 1 : L+AN       */ {     0 , s(6,3),     0 ,     1 , s(1,2), s(3,0),     0 ,  4 },
1724 /* 2 : L+AN+ON    */ { s(2,0), s(6,3), s(2,0),     1 ,     2 , s(3,0), s(2,0),  3 },
1725 /* 3 : R          */ {     0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0),     0 ,  3 },
1726 /* 4 : R+ON       */ { s(3,0), s(4,3), s(5,5), s(5,6),     4 , s(3,0), s(3,0),  3 },
1727 /* 5 : R+EN       */ { s(3,0), s(4,3),     5 , s(5,6), s(1,4), s(3,0), s(3,0),  4 },
1728 /* 6 : R+AN       */ { s(3,0), s(4,3), s(5,5),     6 , s(1,4), s(3,0), s(3,0),  4 }
1729 };
1730 static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS =
1731 /*  The cases handled in this table are (visually):  R EN L
1732                                                      R L AN L
1733 */
1734 {
1735 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1736 /* 0 : init       */ { s(1,3),     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1737 /* 1 : R+EN/AN    */ { s(2,3),     0 ,     1 ,     1 ,     2 , s(4,0),     0 ,  1 },
1738 /* 2 : R+EN/AN+ON */ { s(2,3),     0 ,     1 ,     1 ,     2 , s(4,0),     0 ,  0 },
1739 /* 3 : L          */ {     3 ,     0 ,     3 , s(3,6), s(1,4), s(4,0),     0 ,  1 },
1740 /* 4 : L+ON       */ { s(5,3), s(4,0),     5 , s(3,6),     4 , s(4,0), s(4,0),  0 },
1741 /* 5 : L+ON+EN    */ { s(5,3), s(4,0),     5 , s(3,6),     4 , s(4,0), s(4,0),  1 },
1742 /* 6 : L+AN       */ { s(5,3), s(4,0),     6 ,     6 ,     4 , s(4,0), s(4,0),  3 }
1743 };
1744 static const ImpAct impAct2 = {0,1,2,5,6,7,8};
1745 static const ImpAct impAct3 = {0,1,9,10,11,12};
1746 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = {
1747                         {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
1748                          &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
1749                         {&impAct2, &impAct3}};
1750 
1751 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = {
1752                         {&impTabL_NUMBERS_SPECIAL,
1753                          &impTabR_INVERSE_LIKE_DIRECT},
1754                         {&impAct0, &impAct1}};
1755 
1756 static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS =
1757 /*  The case handled in this table is (visually):  R EN L
1758 */
1759 {
1760 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1761 /* 0 : init       */ {     0 , s(6,2),     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1762 /* 1 : L+EN/AN    */ {     0 , s(6,2),     1 ,     1 ,     0 , s(3,0),     0 ,  4 },
1763 /* 2 : R          */ {     0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0),     0 ,  3 },
1764 /* 3 : R+ON       */ { s(3,0), s(4,2), s(5,4), s(5,4),     3 , s(3,0), s(3,0),  3 },
1765 /* 4 : R+EN/AN    */ { s(3,0), s(4,2),     4 ,     4 , s(1,3), s(3,0), s(3,0),  4 }
1766 };
1767 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = {
1768                         {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
1769                          &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
1770                         {&impAct2, &impAct3}};
1771 
1772 #undef s
1773 
1774 typedef struct {
1775     const ImpTab * pImpTab;             /* level table pointer          */
1776     const ImpAct * pImpAct;             /* action map array             */
1777     int32_t startON;                    /* start of ON sequence         */
1778     int32_t startL2EN;                  /* start of level 2 sequence    */
1779     int32_t lastStrongRTL;              /* index of last found R or AL  */
1780     int32_t state;                      /* current state                */
1781     int32_t runStart;                   /* start position of the run    */
1782     UBiDiLevel runLevel;                /* run level before implicit solving */
1783 } LevState;
1784 
1785 /*------------------------------------------------------------------------*/
1786 
1787 static void
addPoint(UBiDi * pBiDi,int32_t pos,int32_t flag)1788 addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag)
1789   /* param pos:     position where to insert
1790      param flag:    one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER
1791   */
1792 {
1793 #define FIRSTALLOC  10
1794     Point point;
1795     InsertPoints * pInsertPoints=&(pBiDi->insertPoints);
1796 
1797     if (pInsertPoints->capacity == 0)
1798     {
1799         pInsertPoints->points=static_cast<Point *>(uprv_malloc(sizeof(Point)*FIRSTALLOC));
1800         if (pInsertPoints->points == NULL)
1801         {
1802             pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
1803             return;
1804         }
1805         pInsertPoints->capacity=FIRSTALLOC;
1806     }
1807     if (pInsertPoints->size >= pInsertPoints->capacity) /* no room for new point */
1808     {
1809         Point * savePoints=pInsertPoints->points;
1810         pInsertPoints->points=static_cast<Point *>(uprv_realloc(pInsertPoints->points,
1811                                            pInsertPoints->capacity*2*sizeof(Point)));
1812         if (pInsertPoints->points == NULL)
1813         {
1814             pInsertPoints->points=savePoints;
1815             pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
1816             return;
1817         }
1818         else  pInsertPoints->capacity*=2;
1819     }
1820     point.pos=pos;
1821     point.flag=flag;
1822     pInsertPoints->points[pInsertPoints->size]=point;
1823     pInsertPoints->size++;
1824 #undef FIRSTALLOC
1825 }
1826 
1827 static void
setLevelsOutsideIsolates(UBiDi * pBiDi,int32_t start,int32_t limit,UBiDiLevel level)1828 setLevelsOutsideIsolates(UBiDi *pBiDi, int32_t start, int32_t limit, UBiDiLevel level)
1829 {
1830     DirProp *dirProps=pBiDi->dirProps, dirProp;
1831     UBiDiLevel *levels=pBiDi->levels;
1832     int32_t isolateCount=0, k;
1833     for(k=start; k<limit; k++) {
1834         dirProp=dirProps[k];
1835         if(dirProp==PDI)
1836             isolateCount--;
1837         if(isolateCount==0)
1838             levels[k]=level;
1839         if(dirProp==LRI || dirProp==RLI)
1840             isolateCount++;
1841     }
1842 }
1843 
1844 /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
1845 
1846 /*
1847  * This implementation of the (Wn) rules applies all rules in one pass.
1848  * In order to do so, it needs a look-ahead of typically 1 character
1849  * (except for W5: sequences of ET) and keeps track of changes
1850  * in a rule Wp that affect a later Wq (p<q).
1851  *
1852  * The (Nn) and (In) rules are also performed in that same single loop,
1853  * but effectively one iteration behind for white space.
1854  *
1855  * Since all implicit rules are performed in one step, it is not necessary
1856  * to actually store the intermediate directional properties in dirProps[].
1857  */
1858 
1859 static void
processPropertySeq(UBiDi * pBiDi,LevState * pLevState,uint8_t _prop,int32_t start,int32_t limit)1860 processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop,
1861                    int32_t start, int32_t limit) {
1862     uint8_t cell, oldStateSeq, actionSeq;
1863     const ImpTab * pImpTab=pLevState->pImpTab;
1864     const ImpAct * pImpAct=pLevState->pImpAct;
1865     UBiDiLevel * levels=pBiDi->levels;
1866     UBiDiLevel level, addLevel;
1867     InsertPoints * pInsertPoints;
1868     int32_t start0, k;
1869 
1870     start0=start;                           /* save original start position */
1871     oldStateSeq=(uint8_t)pLevState->state;
1872     cell=(*pImpTab)[oldStateSeq][_prop];
1873     pLevState->state=GET_STATE(cell);       /* isolate the new state */
1874     actionSeq=(*pImpAct)[GET_ACTION(cell)]; /* isolate the action */
1875     addLevel=(*pImpTab)[pLevState->state][IMPTABLEVELS_RES];
1876 
1877     if(actionSeq) {
1878         switch(actionSeq) {
1879         case 1:                         /* init ON seq */
1880             pLevState->startON=start0;
1881             break;
1882 
1883         case 2:                         /* prepend ON seq to current seq */
1884             start=pLevState->startON;
1885             break;
1886 
1887         case 3:                         /* EN/AN after R+ON */
1888             level=pLevState->runLevel+1;
1889             setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level);
1890             break;
1891 
1892         case 4:                         /* EN/AN before R for NUMBERS_SPECIAL */
1893             level=pLevState->runLevel+2;
1894             setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level);
1895             break;
1896 
1897         case 5:                         /* L or S after possible relevant EN/AN */
1898             /* check if we had EN after R/AL */
1899             if (pLevState->startL2EN >= 0) {
1900                 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
1901             }
1902             pLevState->startL2EN=-1;  /* not within previous if since could also be -2 */
1903             /* check if we had any relevant EN/AN after R/AL */
1904             pInsertPoints=&(pBiDi->insertPoints);
1905             if ((pInsertPoints->capacity == 0) ||
1906                 (pInsertPoints->size <= pInsertPoints->confirmed))
1907             {
1908                 /* nothing, just clean up */
1909                 pLevState->lastStrongRTL=-1;
1910                 /* check if we have a pending conditional segment */
1911                 level=(*pImpTab)[oldStateSeq][IMPTABLEVELS_RES];
1912                 if ((level & 1) && (pLevState->startON > 0)) {  /* after ON */
1913                     start=pLevState->startON;   /* reset to basic run level */
1914                 }
1915                 if (_prop == DirProp_S)                /* add LRM before S */
1916                 {
1917                     addPoint(pBiDi, start0, LRM_BEFORE);
1918                     pInsertPoints->confirmed=pInsertPoints->size;
1919                 }
1920                 break;
1921             }
1922             /* reset previous RTL cont to level for LTR text */
1923             for (k=pLevState->lastStrongRTL+1; k<start0; k++)
1924             {
1925                 /* reset odd level, leave runLevel+2 as is */
1926                 levels[k]=(levels[k] - 2) & ~1;
1927             }
1928             /* mark insert points as confirmed */
1929             pInsertPoints->confirmed=pInsertPoints->size;
1930             pLevState->lastStrongRTL=-1;
1931             if (_prop == DirProp_S)            /* add LRM before S */
1932             {
1933                 addPoint(pBiDi, start0, LRM_BEFORE);
1934                 pInsertPoints->confirmed=pInsertPoints->size;
1935             }
1936             break;
1937 
1938         case 6:                         /* R/AL after possible relevant EN/AN */
1939             /* just clean up */
1940             pInsertPoints=&(pBiDi->insertPoints);
1941             if (pInsertPoints->capacity > 0)
1942                 /* remove all non confirmed insert points */
1943                 pInsertPoints->size=pInsertPoints->confirmed;
1944             pLevState->startON=-1;
1945             pLevState->startL2EN=-1;
1946             pLevState->lastStrongRTL=limit - 1;
1947             break;
1948 
1949         case 7:                         /* EN/AN after R/AL + possible cont */
1950             /* check for real AN */
1951             if ((_prop == DirProp_AN) && (pBiDi->dirProps[start0] == AN) &&
1952                 (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))
1953             {
1954                 /* real AN */
1955                 if (pLevState->startL2EN == -1) /* if no relevant EN already found */
1956                 {
1957                     /* just note the righmost digit as a strong RTL */
1958                     pLevState->lastStrongRTL=limit - 1;
1959                     break;
1960                 }
1961                 if (pLevState->startL2EN >= 0)  /* after EN, no AN */
1962                 {
1963                     addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
1964                     pLevState->startL2EN=-2;
1965                 }
1966                 /* note AN */
1967                 addPoint(pBiDi, start0, LRM_BEFORE);
1968                 break;
1969             }
1970             /* if first EN/AN after R/AL */
1971             if (pLevState->startL2EN == -1) {
1972                 pLevState->startL2EN=start0;
1973             }
1974             break;
1975 
1976         case 8:                         /* note location of latest R/AL */
1977             pLevState->lastStrongRTL=limit - 1;
1978             pLevState->startON=-1;
1979             break;
1980 
1981         case 9:                         /* L after R+ON/EN/AN */
1982             /* include possible adjacent number on the left */
1983             for (k=start0-1; k>=0 && !(levels[k]&1); k--);
1984             if(k>=0) {
1985                 addPoint(pBiDi, k, RLM_BEFORE);             /* add RLM before */
1986                 pInsertPoints=&(pBiDi->insertPoints);
1987                 pInsertPoints->confirmed=pInsertPoints->size;   /* confirm it */
1988             }
1989             pLevState->startON=start0;
1990             break;
1991 
1992         case 10:                        /* AN after L */
1993             /* AN numbers between L text on both sides may be trouble. */
1994             /* tentatively bracket with LRMs; will be confirmed if followed by L */
1995             addPoint(pBiDi, start0, LRM_BEFORE);    /* add LRM before */
1996             addPoint(pBiDi, start0, LRM_AFTER);     /* add LRM after  */
1997             break;
1998 
1999         case 11:                        /* R after L+ON/EN/AN */
2000             /* false alert, infirm LRMs around previous AN */
2001             pInsertPoints=&(pBiDi->insertPoints);
2002             pInsertPoints->size=pInsertPoints->confirmed;
2003             if (_prop == DirProp_S)            /* add RLM before S */
2004             {
2005                 addPoint(pBiDi, start0, RLM_BEFORE);
2006                 pInsertPoints->confirmed=pInsertPoints->size;
2007             }
2008             break;
2009 
2010         case 12:                        /* L after L+ON/AN */
2011             level=pLevState->runLevel + addLevel;
2012             for(k=pLevState->startON; k<start0; k++) {
2013                 if (levels[k]<level)
2014                     levels[k]=level;
2015             }
2016             pInsertPoints=&(pBiDi->insertPoints);
2017             pInsertPoints->confirmed=pInsertPoints->size;   /* confirm inserts */
2018             pLevState->startON=start0;
2019             break;
2020 
2021         case 13:                        /* L after L+ON+EN/AN/ON */
2022             level=pLevState->runLevel;
2023             for(k=start0-1; k>=pLevState->startON; k--) {
2024                 if(levels[k]==level+3) {
2025                     while(levels[k]==level+3) {
2026                         levels[k--]-=2;
2027                     }
2028                     while(levels[k]==level) {
2029                         k--;
2030                     }
2031                 }
2032                 if(levels[k]==level+2) {
2033                     levels[k]=level;
2034                     continue;
2035                 }
2036                 levels[k]=level+1;
2037             }
2038             break;
2039 
2040         case 14:                        /* R after L+ON+EN/AN/ON */
2041             level=pLevState->runLevel+1;
2042             for(k=start0-1; k>=pLevState->startON; k--) {
2043                 if(levels[k]>level) {
2044                     levels[k]-=2;
2045                 }
2046             }
2047             break;
2048 
2049         default:                        /* we should never get here */
2050             UPRV_UNREACHABLE;
2051         }
2052     }
2053     if((addLevel) || (start < start0)) {
2054         level=pLevState->runLevel + addLevel;
2055         if(start>=pLevState->runStart) {
2056             for(k=start; k<limit; k++) {
2057                 levels[k]=level;
2058             }
2059         } else {
2060             setLevelsOutsideIsolates(pBiDi, start, limit, level);
2061         }
2062     }
2063 }
2064 
2065 /**
2066  * Returns the directionality of the last strong character at the end of the prologue, if any.
2067  * Requires prologue!=null.
2068  */
2069 static DirProp
lastL_R_AL(UBiDi * pBiDi)2070 lastL_R_AL(UBiDi *pBiDi) {
2071     const UChar *text=pBiDi->prologue;
2072     int32_t length=pBiDi->proLength;
2073     int32_t i;
2074     UChar32 uchar;
2075     DirProp dirProp;
2076     for(i=length; i>0; ) {
2077         /* i is decremented by U16_PREV */
2078         U16_PREV(text, 0, i, uchar);
2079         dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
2080         if(dirProp==L) {
2081             return DirProp_L;
2082         }
2083         if(dirProp==R || dirProp==AL) {
2084             return DirProp_R;
2085         }
2086         if(dirProp==B) {
2087             return DirProp_ON;
2088         }
2089     }
2090     return DirProp_ON;
2091 }
2092 
2093 /**
2094  * Returns the directionality of the first strong character, or digit, in the epilogue, if any.
2095  * Requires epilogue!=null.
2096  */
2097 static DirProp
firstL_R_AL_EN_AN(UBiDi * pBiDi)2098 firstL_R_AL_EN_AN(UBiDi *pBiDi) {
2099     const UChar *text=pBiDi->epilogue;
2100     int32_t length=pBiDi->epiLength;
2101     int32_t i;
2102     UChar32 uchar;
2103     DirProp dirProp;
2104     for(i=0; i<length; ) {
2105         /* i is incremented by U16_NEXT */
2106         U16_NEXT(text, i, length, uchar);
2107         dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
2108         if(dirProp==L) {
2109             return DirProp_L;
2110         }
2111         if(dirProp==R || dirProp==AL) {
2112             return DirProp_R;
2113         }
2114         if(dirProp==EN) {
2115             return DirProp_EN;
2116         }
2117         if(dirProp==AN) {
2118             return DirProp_AN;
2119         }
2120     }
2121     return DirProp_ON;
2122 }
2123 
2124 static void
resolveImplicitLevels(UBiDi * pBiDi,int32_t start,int32_t limit,DirProp sor,DirProp eor)2125 resolveImplicitLevels(UBiDi *pBiDi,
2126                       int32_t start, int32_t limit,
2127                       DirProp sor, DirProp eor) {
2128     const DirProp *dirProps=pBiDi->dirProps;
2129     DirProp dirProp;
2130     LevState levState;
2131     int32_t i, start1, start2;
2132     uint16_t oldStateImp, stateImp, actionImp;
2133     uint8_t gprop, resProp, cell;
2134     UBool inverseRTL;
2135     DirProp nextStrongProp=R;
2136     int32_t nextStrongPos=-1;
2137 
2138     /* check for RTL inverse BiDi mode */
2139     /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to
2140      * loop on the text characters from end to start.
2141      * This would need a different properties state table (at least different
2142      * actions) and different levels state tables (maybe very similar to the
2143      * LTR corresponding ones.
2144      */
2145     inverseRTL=(UBool)
2146         ((start<pBiDi->lastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) &&
2147          (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT  ||
2148           pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL));
2149 
2150     /* initialize for property and levels state tables */
2151     levState.startL2EN=-1;              /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
2152     levState.lastStrongRTL=-1;          /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
2153     levState.runStart=start;
2154     levState.runLevel=pBiDi->levels[start];
2155     levState.pImpTab=(const ImpTab*)((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel&1];
2156     levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel&1];
2157     if(start==0 && pBiDi->proLength>0) {
2158         DirProp lastStrong=lastL_R_AL(pBiDi);
2159         if(lastStrong!=DirProp_ON) {
2160             sor=lastStrong;
2161         }
2162     }
2163     /* The isolates[] entries contain enough information to
2164        resume the bidi algorithm in the same state as it was
2165        when it was interrupted by an isolate sequence. */
2166     if(dirProps[start]==PDI  && pBiDi->isolateCount >= 0) {
2167         levState.startON=pBiDi->isolates[pBiDi->isolateCount].startON;
2168         start1=pBiDi->isolates[pBiDi->isolateCount].start1;
2169         stateImp=pBiDi->isolates[pBiDi->isolateCount].stateImp;
2170         levState.state=pBiDi->isolates[pBiDi->isolateCount].state;
2171         pBiDi->isolateCount--;
2172     } else {
2173         levState.startON=-1;
2174         start1=start;
2175         if(dirProps[start]==NSM)
2176             stateImp = 1 + sor;
2177         else
2178             stateImp=0;
2179         levState.state=0;
2180         processPropertySeq(pBiDi, &levState, sor, start, start);
2181     }
2182     start2=start;                       /* to make Java compiler happy */
2183 
2184     for(i=start; i<=limit; i++) {
2185         if(i>=limit) {
2186             int32_t k;
2187             for(k=limit-1; k>start&&(DIRPROP_FLAG(dirProps[k])&MASK_BN_EXPLICIT); k--);
2188             dirProp=dirProps[k];
2189             if(dirProp==LRI || dirProp==RLI)
2190                 break;      /* no forced closing for sequence ending with LRI/RLI */
2191             gprop=eor;
2192         } else {
2193             DirProp prop, prop1;
2194             prop=dirProps[i];
2195             if(prop==B) {
2196                 pBiDi->isolateCount=-1; /* current isolates stack entry == none */
2197             }
2198             if(inverseRTL) {
2199                 if(prop==AL) {
2200                     /* AL before EN does not make it AN */
2201                     prop=R;
2202                 } else if(prop==EN) {
2203                     if(nextStrongPos<=i) {
2204                         /* look for next strong char (L/R/AL) */
2205                         int32_t j;
2206                         nextStrongProp=R;   /* set default */
2207                         nextStrongPos=limit;
2208                         for(j=i+1; j<limit; j++) {
2209                             prop1=dirProps[j];
2210                             if(prop1==L || prop1==R || prop1==AL) {
2211                                 nextStrongProp=prop1;
2212                                 nextStrongPos=j;
2213                                 break;
2214                             }
2215                         }
2216                     }
2217                     if(nextStrongProp==AL) {
2218                         prop=AN;
2219                     }
2220                 }
2221             }
2222             gprop=groupProp[prop];
2223         }
2224         oldStateImp=stateImp;
2225         cell=impTabProps[oldStateImp][gprop];
2226         stateImp=GET_STATEPROPS(cell);      /* isolate the new state */
2227         actionImp=GET_ACTIONPROPS(cell);    /* isolate the action */
2228         if((i==limit) && (actionImp==0)) {
2229             /* there is an unprocessed sequence if its property == eor   */
2230             actionImp=1;                    /* process the last sequence */
2231         }
2232         if(actionImp) {
2233             resProp=impTabProps[oldStateImp][IMPTABPROPS_RES];
2234             switch(actionImp) {
2235             case 1:             /* process current seq1, init new seq1 */
2236                 processPropertySeq(pBiDi, &levState, resProp, start1, i);
2237                 start1=i;
2238                 break;
2239             case 2:             /* init new seq2 */
2240                 start2=i;
2241                 break;
2242             case 3:             /* process seq1, process seq2, init new seq1 */
2243                 processPropertySeq(pBiDi, &levState, resProp, start1, start2);
2244                 processPropertySeq(pBiDi, &levState, DirProp_ON, start2, i);
2245                 start1=i;
2246                 break;
2247             case 4:             /* process seq1, set seq1=seq2, init new seq2 */
2248                 processPropertySeq(pBiDi, &levState, resProp, start1, start2);
2249                 start1=start2;
2250                 start2=i;
2251                 break;
2252             default:            /* we should never get here */
2253                 UPRV_UNREACHABLE;
2254             }
2255         }
2256     }
2257 
2258     /* flush possible pending sequence, e.g. ON */
2259     if(limit==pBiDi->length && pBiDi->epiLength>0) {
2260         DirProp firstStrong=firstL_R_AL_EN_AN(pBiDi);
2261         if(firstStrong!=DirProp_ON) {
2262             eor=firstStrong;
2263         }
2264     }
2265 
2266     /* look for the last char not a BN or LRE/RLE/LRO/RLO/PDF */
2267     for(i=limit-1; i>start&&(DIRPROP_FLAG(dirProps[i])&MASK_BN_EXPLICIT); i--);
2268     dirProp=dirProps[i];
2269     if((dirProp==LRI || dirProp==RLI) && limit<pBiDi->length) {
2270         pBiDi->isolateCount++;
2271         pBiDi->isolates[pBiDi->isolateCount].stateImp=stateImp;
2272         pBiDi->isolates[pBiDi->isolateCount].state=levState.state;
2273         pBiDi->isolates[pBiDi->isolateCount].start1=start1;
2274         pBiDi->isolates[pBiDi->isolateCount].startON=levState.startON;
2275     }
2276     else
2277         processPropertySeq(pBiDi, &levState, eor, limit, limit);
2278 }
2279 
2280 /* perform (L1) and (X9) ---------------------------------------------------- */
2281 
2282 /*
2283  * Reset the embedding levels for some non-graphic characters (L1).
2284  * This function also sets appropriate levels for BN, and
2285  * explicit embedding types that are supposed to have been removed
2286  * from the paragraph in (X9).
2287  */
2288 static void
adjustWSLevels(UBiDi * pBiDi)2289 adjustWSLevels(UBiDi *pBiDi) {
2290     const DirProp *dirProps=pBiDi->dirProps;
2291     UBiDiLevel *levels=pBiDi->levels;
2292     int32_t i;
2293 
2294     if(pBiDi->flags&MASK_WS) {
2295         UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR;
2296         Flags flag;
2297 
2298         i=pBiDi->trailingWSStart;
2299         while(i>0) {
2300             /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
2301             while(i>0 && (flag=DIRPROP_FLAG(dirProps[--i]))&MASK_WS) {
2302                 if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
2303                     levels[i]=0;
2304                 } else {
2305                     levels[i]=GET_PARALEVEL(pBiDi, i);
2306                 }
2307             }
2308 
2309             /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
2310             /* here, i+1 is guaranteed to be <length */
2311             while(i>0) {
2312                 flag=DIRPROP_FLAG(dirProps[--i]);
2313                 if(flag&MASK_BN_EXPLICIT) {
2314                     levels[i]=levels[i+1];
2315                 } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
2316                     levels[i]=0;
2317                     break;
2318                 } else if(flag&MASK_B_S) {
2319                     levels[i]=GET_PARALEVEL(pBiDi, i);
2320                     break;
2321                 }
2322             }
2323         }
2324     }
2325 }
2326 
2327 U_CAPI void U_EXPORT2
ubidi_setContext(UBiDi * pBiDi,const UChar * prologue,int32_t proLength,const UChar * epilogue,int32_t epiLength,UErrorCode * pErrorCode)2328 ubidi_setContext(UBiDi *pBiDi,
2329                  const UChar *prologue, int32_t proLength,
2330                  const UChar *epilogue, int32_t epiLength,
2331                  UErrorCode *pErrorCode) {
2332     /* check the argument values */
2333     RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2334     if(pBiDi==NULL || proLength<-1 || epiLength<-1 ||
2335        (prologue==NULL && proLength!=0) || (epilogue==NULL && epiLength!=0)) {
2336         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2337         return;
2338     }
2339 
2340     if(proLength==-1) {
2341         pBiDi->proLength=u_strlen(prologue);
2342     } else {
2343         pBiDi->proLength=proLength;
2344     }
2345     if(epiLength==-1) {
2346         pBiDi->epiLength=u_strlen(epilogue);
2347     } else {
2348         pBiDi->epiLength=epiLength;
2349     }
2350     pBiDi->prologue=prologue;
2351     pBiDi->epilogue=epilogue;
2352 }
2353 
2354 static void
setParaSuccess(UBiDi * pBiDi)2355 setParaSuccess(UBiDi *pBiDi) {
2356     pBiDi->proLength=0;                 /* forget the last context */
2357     pBiDi->epiLength=0;
2358     pBiDi->pParaBiDi=pBiDi;             /* mark successful setPara */
2359 }
2360 
2361 #define BIDI_MIN(x, y)   ((x)<(y) ? (x) : (y))
2362 #define BIDI_ABS(x)      ((x)>=0  ? (x) : (-(x)))
2363 
2364 static void
setParaRunsOnly(UBiDi * pBiDi,const UChar * text,int32_t length,UBiDiLevel paraLevel,UErrorCode * pErrorCode)2365 setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length,
2366                 UBiDiLevel paraLevel, UErrorCode *pErrorCode) {
2367     int32_t *runsOnlyMemory = NULL;
2368     int32_t *visualMap;
2369     UChar *visualText;
2370     int32_t saveLength, saveTrailingWSStart;
2371     const UBiDiLevel *levels;
2372     UBiDiLevel *saveLevels;
2373     UBiDiDirection saveDirection;
2374     UBool saveMayAllocateText;
2375     Run *runs;
2376     int32_t visualLength, i, j, visualStart, logicalStart,
2377             runCount, runLength, addedRuns, insertRemove,
2378             start, limit, step, indexOddBit, logicalPos,
2379             index0, index1;
2380     uint32_t saveOptions;
2381 
2382     pBiDi->reorderingMode=UBIDI_REORDER_DEFAULT;
2383     if(length==0) {
2384         ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
2385         goto cleanup3;
2386     }
2387     /* obtain memory for mapping table and visual text */
2388     runsOnlyMemory=static_cast<int32_t *>(uprv_malloc(length*(sizeof(int32_t)+sizeof(UChar)+sizeof(UBiDiLevel))));
2389     if(runsOnlyMemory==NULL) {
2390         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2391         goto cleanup3;
2392     }
2393     visualMap=runsOnlyMemory;
2394     visualText=(UChar *)&visualMap[length];
2395     saveLevels=(UBiDiLevel *)&visualText[length];
2396     saveOptions=pBiDi->reorderingOptions;
2397     if(saveOptions & UBIDI_OPTION_INSERT_MARKS) {
2398         pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
2399         pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS;
2400     }
2401     paraLevel&=1;                       /* accept only 0 or 1 */
2402     ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
2403     if(U_FAILURE(*pErrorCode)) {
2404         goto cleanup3;
2405     }
2406     /* we cannot access directly pBiDi->levels since it is not yet set if
2407      * direction is not MIXED
2408      */
2409     levels=ubidi_getLevels(pBiDi, pErrorCode);
2410     uprv_memcpy(saveLevels, levels, (size_t)pBiDi->length*sizeof(UBiDiLevel));
2411     saveTrailingWSStart=pBiDi->trailingWSStart;
2412     saveLength=pBiDi->length;
2413     saveDirection=pBiDi->direction;
2414 
2415     /* FOOD FOR THOUGHT: instead of writing the visual text, we could use
2416      * the visual map and the dirProps array to drive the second call
2417      * to ubidi_setPara (but must make provision for possible removal of
2418      * BiDi controls.  Alternatively, only use the dirProps array via
2419      * customized classifier callback.
2420      */
2421     visualLength=ubidi_writeReordered(pBiDi, visualText, length,
2422                                       UBIDI_DO_MIRRORING, pErrorCode);
2423     ubidi_getVisualMap(pBiDi, visualMap, pErrorCode);
2424     if(U_FAILURE(*pErrorCode)) {
2425         goto cleanup2;
2426     }
2427     pBiDi->reorderingOptions=saveOptions;
2428 
2429     pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT;
2430     paraLevel^=1;
2431     /* Because what we did with reorderingOptions, visualText may be shorter
2432      * than the original text. But we don't want the levels memory to be
2433      * reallocated shorter than the original length, since we need to restore
2434      * the levels as after the first call to ubidi_setpara() before returning.
2435      * We will force mayAllocateText to FALSE before the second call to
2436      * ubidi_setpara(), and will restore it afterwards.
2437      */
2438     saveMayAllocateText=pBiDi->mayAllocateText;
2439     pBiDi->mayAllocateText=FALSE;
2440     ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, NULL, pErrorCode);
2441     pBiDi->mayAllocateText=saveMayAllocateText;
2442     ubidi_getRuns(pBiDi, pErrorCode);
2443     if(U_FAILURE(*pErrorCode)) {
2444         goto cleanup1;
2445     }
2446     /* check if some runs must be split, count how many splits */
2447     addedRuns=0;
2448     runCount=pBiDi->runCount;
2449     runs=pBiDi->runs;
2450     visualStart=0;
2451     for(i=0; i<runCount; i++, visualStart+=runLength) {
2452         runLength=runs[i].visualLimit-visualStart;
2453         if(runLength<2) {
2454             continue;
2455         }
2456         logicalStart=GET_INDEX(runs[i].logicalStart);
2457         for(j=logicalStart+1; j<logicalStart+runLength; j++) {
2458             index0=visualMap[j];
2459             index1=visualMap[j-1];
2460             if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
2461                 addedRuns++;
2462             }
2463         }
2464     }
2465     if(addedRuns) {
2466         if(getRunsMemory(pBiDi, runCount+addedRuns)) {
2467             if(runCount==1) {
2468                 /* because we switch from UBiDi.simpleRuns to UBiDi.runs */
2469                 pBiDi->runsMemory[0]=runs[0];
2470             }
2471             runs=pBiDi->runs=pBiDi->runsMemory;
2472             pBiDi->runCount+=addedRuns;
2473         } else {
2474             goto cleanup1;
2475         }
2476     }
2477     /* split runs which are not consecutive in source text */
2478     for(i=runCount-1; i>=0; i--) {
2479         runLength= i==0 ? runs[0].visualLimit :
2480                           runs[i].visualLimit-runs[i-1].visualLimit;
2481         logicalStart=runs[i].logicalStart;
2482         indexOddBit=GET_ODD_BIT(logicalStart);
2483         logicalStart=GET_INDEX(logicalStart);
2484         if(runLength<2) {
2485             if(addedRuns) {
2486                 runs[i+addedRuns]=runs[i];
2487             }
2488             logicalPos=visualMap[logicalStart];
2489             runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2490                                             saveLevels[logicalPos]^indexOddBit);
2491             continue;
2492         }
2493         if(indexOddBit) {
2494             start=logicalStart;
2495             limit=logicalStart+runLength-1;
2496             step=1;
2497         } else {
2498             start=logicalStart+runLength-1;
2499             limit=logicalStart;
2500             step=-1;
2501         }
2502         for(j=start; j!=limit; j+=step) {
2503             index0=visualMap[j];
2504             index1=visualMap[j+step];
2505             if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
2506                 logicalPos=BIDI_MIN(visualMap[start], index0);
2507                 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2508                                             saveLevels[logicalPos]^indexOddBit);
2509                 runs[i+addedRuns].visualLimit=runs[i].visualLimit;
2510                 runs[i].visualLimit-=BIDI_ABS(j-start)+1;
2511                 insertRemove=runs[i].insertRemove&(LRM_AFTER|RLM_AFTER);
2512                 runs[i+addedRuns].insertRemove=insertRemove;
2513                 runs[i].insertRemove&=~insertRemove;
2514                 start=j+step;
2515                 addedRuns--;
2516             }
2517         }
2518         if(addedRuns) {
2519             runs[i+addedRuns]=runs[i];
2520         }
2521         logicalPos=BIDI_MIN(visualMap[start], visualMap[limit]);
2522         runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2523                                             saveLevels[logicalPos]^indexOddBit);
2524     }
2525 
2526   cleanup1:
2527     /* restore initial paraLevel */
2528     pBiDi->paraLevel^=1;
2529   cleanup2:
2530     /* restore real text */
2531     pBiDi->text=text;
2532     pBiDi->length=saveLength;
2533     pBiDi->originalLength=length;
2534     pBiDi->direction=saveDirection;
2535     /* the saved levels should never excess levelsSize, but we check anyway */
2536     if(saveLength>pBiDi->levelsSize) {
2537         saveLength=pBiDi->levelsSize;
2538     }
2539     uprv_memcpy(pBiDi->levels, saveLevels, (size_t)saveLength*sizeof(UBiDiLevel));
2540     pBiDi->trailingWSStart=saveTrailingWSStart;
2541     if(pBiDi->runCount>1) {
2542         pBiDi->direction=UBIDI_MIXED;
2543     }
2544   cleanup3:
2545     /* free memory for mapping table and visual text */
2546     uprv_free(runsOnlyMemory);
2547 
2548     pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY;
2549 }
2550 
2551 /* ubidi_setPara ------------------------------------------------------------ */
2552 
2553 U_CAPI void U_EXPORT2
ubidi_setPara(UBiDi * pBiDi,const UChar * text,int32_t length,UBiDiLevel paraLevel,UBiDiLevel * embeddingLevels,UErrorCode * pErrorCode)2554 ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
2555               UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
2556               UErrorCode *pErrorCode) {
2557     UBiDiDirection direction;
2558     DirProp *dirProps;
2559 
2560     /* check the argument values */
2561     RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2562     if(pBiDi==NULL || text==NULL || length<-1 ||
2563        (paraLevel>UBIDI_MAX_EXPLICIT_LEVEL && paraLevel<UBIDI_DEFAULT_LTR)) {
2564         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2565         return;
2566     }
2567 
2568     if(length==-1) {
2569         length=u_strlen(text);
2570     }
2571 
2572     /* special treatment for RUNS_ONLY mode */
2573     if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) {
2574         setParaRunsOnly(pBiDi, text, length, paraLevel, pErrorCode);
2575         return;
2576     }
2577 
2578     /* initialize the UBiDi structure */
2579     pBiDi->pParaBiDi=NULL;          /* mark unfinished setPara */
2580     pBiDi->text=text;
2581     pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length;
2582     pBiDi->paraLevel=paraLevel;
2583     pBiDi->direction=(UBiDiDirection)(paraLevel&1);
2584     pBiDi->paraCount=1;
2585 
2586     pBiDi->dirProps=NULL;
2587     pBiDi->levels=NULL;
2588     pBiDi->runs=NULL;
2589     pBiDi->insertPoints.size=0;         /* clean up from last call */
2590     pBiDi->insertPoints.confirmed=0;    /* clean up from last call */
2591 
2592     /*
2593      * Save the original paraLevel if contextual; otherwise, set to 0.
2594      */
2595     pBiDi->defaultParaLevel=IS_DEFAULT_LEVEL(paraLevel);
2596 
2597     if(length==0) {
2598         /*
2599          * For an empty paragraph, create a UBiDi object with the paraLevel and
2600          * the flags and the direction set but without allocating zero-length arrays.
2601          * There is nothing more to do.
2602          */
2603         if(IS_DEFAULT_LEVEL(paraLevel)) {
2604             pBiDi->paraLevel&=1;
2605             pBiDi->defaultParaLevel=0;
2606         }
2607         pBiDi->flags=DIRPROP_FLAG_LR(paraLevel);
2608         pBiDi->runCount=0;
2609         pBiDi->paraCount=0;
2610         setParaSuccess(pBiDi);          /* mark successful setPara */
2611         return;
2612     }
2613 
2614     pBiDi->runCount=-1;
2615 
2616     /* allocate paras memory */
2617     if(pBiDi->parasMemory)
2618         pBiDi->paras=pBiDi->parasMemory;
2619     else
2620         pBiDi->paras=pBiDi->simpleParas;
2621 
2622     /*
2623      * Get the directional properties,
2624      * the flags bit-set, and
2625      * determine the paragraph level if necessary.
2626      */
2627     if(getDirPropsMemory(pBiDi, length)) {
2628         pBiDi->dirProps=pBiDi->dirPropsMemory;
2629         if(!getDirProps(pBiDi)) {
2630             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2631             return;
2632         }
2633     } else {
2634         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2635         return;
2636     }
2637     dirProps=pBiDi->dirProps;
2638     /* the processed length may have changed if UBIDI_OPTION_STREAMING */
2639     length= pBiDi->length;
2640     pBiDi->trailingWSStart=length;  /* the levels[] will reflect the WS run */
2641 
2642     /* are explicit levels specified? */
2643     if(embeddingLevels==NULL) {
2644         /* no: determine explicit levels according to the (Xn) rules */\
2645         if(getLevelsMemory(pBiDi, length)) {
2646             pBiDi->levels=pBiDi->levelsMemory;
2647             direction=resolveExplicitLevels(pBiDi, pErrorCode);
2648             if(U_FAILURE(*pErrorCode)) {
2649                 return;
2650             }
2651         } else {
2652             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2653             return;
2654         }
2655     } else {
2656         /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */
2657         pBiDi->levels=embeddingLevels;
2658         direction=checkExplicitLevels(pBiDi, pErrorCode);
2659         if(U_FAILURE(*pErrorCode)) {
2660             return;
2661         }
2662     }
2663 
2664     /* allocate isolate memory */
2665     if(pBiDi->isolateCount<=SIMPLE_ISOLATES_COUNT)
2666         pBiDi->isolates=pBiDi->simpleIsolates;
2667     else
2668         if((int32_t)(pBiDi->isolateCount*sizeof(Isolate))<=pBiDi->isolatesSize)
2669             pBiDi->isolates=pBiDi->isolatesMemory;
2670         else {
2671             if(getInitialIsolatesMemory(pBiDi, pBiDi->isolateCount)) {
2672                 pBiDi->isolates=pBiDi->isolatesMemory;
2673             } else {
2674                 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2675                 return;
2676             }
2677         }
2678     pBiDi->isolateCount=-1;             /* current isolates stack entry == none */
2679 
2680     /*
2681      * The steps after (X9) in the UBiDi algorithm are performed only if
2682      * the paragraph text has mixed directionality!
2683      */
2684     pBiDi->direction=direction;
2685     switch(direction) {
2686     case UBIDI_LTR:
2687         /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
2688         pBiDi->trailingWSStart=0;
2689         break;
2690     case UBIDI_RTL:
2691         /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
2692         pBiDi->trailingWSStart=0;
2693         break;
2694     default:
2695         /*
2696          *  Choose the right implicit state table
2697          */
2698         switch(pBiDi->reorderingMode) {
2699         case UBIDI_REORDER_DEFAULT:
2700             pBiDi->pImpTabPair=&impTab_DEFAULT;
2701             break;
2702         case UBIDI_REORDER_NUMBERS_SPECIAL:
2703             pBiDi->pImpTabPair=&impTab_NUMBERS_SPECIAL;
2704             break;
2705         case UBIDI_REORDER_GROUP_NUMBERS_WITH_R:
2706             pBiDi->pImpTabPair=&impTab_GROUP_NUMBERS_WITH_R;
2707             break;
2708         case UBIDI_REORDER_INVERSE_NUMBERS_AS_L:
2709             pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L;
2710             break;
2711         case UBIDI_REORDER_INVERSE_LIKE_DIRECT:
2712             if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
2713                 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS;
2714             } else {
2715                 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT;
2716             }
2717             break;
2718         case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL:
2719             if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
2720                 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS;
2721             } else {
2722                 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL;
2723             }
2724             break;
2725         default:
2726             /* we should never get here */
2727             UPRV_UNREACHABLE;
2728         }
2729         /*
2730          * If there are no external levels specified and there
2731          * are no significant explicit level codes in the text,
2732          * then we can treat the entire paragraph as one run.
2733          * Otherwise, we need to perform the following rules on runs of
2734          * the text with the same embedding levels. (X10)
2735          * "Significant" explicit level codes are ones that actually
2736          * affect non-BN characters.
2737          * Examples for "insignificant" ones are empty embeddings
2738          * LRE-PDF, LRE-RLE-PDF-PDF, etc.
2739          */
2740         if(embeddingLevels==NULL && pBiDi->paraCount<=1 &&
2741                                    !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) {
2742             resolveImplicitLevels(pBiDi, 0, length,
2743                                     GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)),
2744                                     GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, length-1)));
2745         } else {
2746             /* sor, eor: start and end types of same-level-run */
2747             UBiDiLevel *levels=pBiDi->levels;
2748             int32_t start, limit=0;
2749             UBiDiLevel level, nextLevel;
2750             DirProp sor, eor;
2751 
2752             /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
2753             level=GET_PARALEVEL(pBiDi, 0);
2754             nextLevel=levels[0];
2755             if(level<nextLevel) {
2756                 eor=GET_LR_FROM_LEVEL(nextLevel);
2757             } else {
2758                 eor=GET_LR_FROM_LEVEL(level);
2759             }
2760 
2761             do {
2762                 /* determine start and limit of the run (end points just behind the run) */
2763 
2764                 /* the values for this run's start are the same as for the previous run's end */
2765                 start=limit;
2766                 level=nextLevel;
2767                 if((start>0) && (dirProps[start-1]==B)) {
2768                     /* except if this is a new paragraph, then set sor = para level */
2769                     sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start));
2770                 } else {
2771                     sor=eor;
2772                 }
2773 
2774                 /* search for the limit of this run */
2775                 while((++limit<length) &&
2776                       ((levels[limit]==level) ||
2777                        (DIRPROP_FLAG(dirProps[limit])&MASK_BN_EXPLICIT))) {}
2778 
2779                 /* get the correct level of the next run */
2780                 if(limit<length) {
2781                     nextLevel=levels[limit];
2782                 } else {
2783                     nextLevel=GET_PARALEVEL(pBiDi, length-1);
2784                 }
2785 
2786                 /* determine eor from max(level, nextLevel); sor is last run's eor */
2787                 if(NO_OVERRIDE(level)<NO_OVERRIDE(nextLevel)) {
2788                     eor=GET_LR_FROM_LEVEL(nextLevel);
2789                 } else {
2790                     eor=GET_LR_FROM_LEVEL(level);
2791                 }
2792 
2793                 /* if the run consists of overridden directional types, then there
2794                    are no implicit types to be resolved */
2795                 if(!(level&UBIDI_LEVEL_OVERRIDE)) {
2796                     resolveImplicitLevels(pBiDi, start, limit, sor, eor);
2797                 } else {
2798                     /* remove the UBIDI_LEVEL_OVERRIDE flags */
2799                     do {
2800                         levels[start++]&=~UBIDI_LEVEL_OVERRIDE;
2801                     } while(start<limit);
2802                 }
2803             } while(limit<length);
2804         }
2805         /* check if we got any memory shortage while adding insert points */
2806         if (U_FAILURE(pBiDi->insertPoints.errorCode))
2807         {
2808             *pErrorCode=pBiDi->insertPoints.errorCode;
2809             return;
2810         }
2811         /* reset the embedding levels for some non-graphic characters (L1), (X9) */
2812         adjustWSLevels(pBiDi);
2813         break;
2814     }
2815     /* add RLM for inverse Bidi with contextual orientation resolving
2816      * to RTL which would not round-trip otherwise
2817      */
2818     if((pBiDi->defaultParaLevel>0) &&
2819        (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) &&
2820        ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) ||
2821         (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) {
2822         int32_t i, j, start, last;
2823         UBiDiLevel level;
2824         DirProp dirProp;
2825         for(i=0; i<pBiDi->paraCount; i++) {
2826             last=(pBiDi->paras[i].limit)-1;
2827             level= static_cast<UBiDiLevel>(pBiDi->paras[i].level);
2828             if(level==0)
2829                 continue;           /* LTR paragraph */
2830             start= i==0 ? 0 : pBiDi->paras[i-1].limit;
2831             for(j=last; j>=start; j--) {
2832                 dirProp=dirProps[j];
2833                 if(dirProp==L) {
2834                     if(j<last) {
2835                         while(dirProps[last]==B) {
2836                             last--;
2837                         }
2838                     }
2839                     addPoint(pBiDi, last, RLM_BEFORE);
2840                     break;
2841                 }
2842                 if(DIRPROP_FLAG(dirProp) & MASK_R_AL) {
2843                     break;
2844                 }
2845             }
2846         }
2847     }
2848 
2849     if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
2850         pBiDi->resultLength -= pBiDi->controlCount;
2851     } else {
2852         pBiDi->resultLength += pBiDi->insertPoints.size;
2853     }
2854     setParaSuccess(pBiDi);              /* mark successful setPara */
2855 }
2856 
2857 U_CAPI void U_EXPORT2
ubidi_orderParagraphsLTR(UBiDi * pBiDi,UBool orderParagraphsLTR)2858 ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) {
2859     if(pBiDi!=NULL) {
2860         pBiDi->orderParagraphsLTR=orderParagraphsLTR;
2861     }
2862 }
2863 
2864 U_CAPI UBool U_EXPORT2
ubidi_isOrderParagraphsLTR(UBiDi * pBiDi)2865 ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) {
2866     if(pBiDi!=NULL) {
2867         return pBiDi->orderParagraphsLTR;
2868     } else {
2869         return FALSE;
2870     }
2871 }
2872 
2873 U_CAPI UBiDiDirection U_EXPORT2
ubidi_getDirection(const UBiDi * pBiDi)2874 ubidi_getDirection(const UBiDi *pBiDi) {
2875     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2876         return pBiDi->direction;
2877     } else {
2878         return UBIDI_LTR;
2879     }
2880 }
2881 
2882 U_CAPI const UChar * U_EXPORT2
ubidi_getText(const UBiDi * pBiDi)2883 ubidi_getText(const UBiDi *pBiDi) {
2884     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2885         return pBiDi->text;
2886     } else {
2887         return NULL;
2888     }
2889 }
2890 
2891 U_CAPI int32_t U_EXPORT2
ubidi_getLength(const UBiDi * pBiDi)2892 ubidi_getLength(const UBiDi *pBiDi) {
2893     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2894         return pBiDi->originalLength;
2895     } else {
2896         return 0;
2897     }
2898 }
2899 
2900 U_CAPI int32_t U_EXPORT2
ubidi_getProcessedLength(const UBiDi * pBiDi)2901 ubidi_getProcessedLength(const UBiDi *pBiDi) {
2902     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2903         return pBiDi->length;
2904     } else {
2905         return 0;
2906     }
2907 }
2908 
2909 U_CAPI int32_t U_EXPORT2
ubidi_getResultLength(const UBiDi * pBiDi)2910 ubidi_getResultLength(const UBiDi *pBiDi) {
2911     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2912         return pBiDi->resultLength;
2913     } else {
2914         return 0;
2915     }
2916 }
2917 
2918 /* paragraphs API functions ------------------------------------------------- */
2919 
2920 U_CAPI UBiDiLevel U_EXPORT2
ubidi_getParaLevel(const UBiDi * pBiDi)2921 ubidi_getParaLevel(const UBiDi *pBiDi) {
2922     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2923         return pBiDi->paraLevel;
2924     } else {
2925         return 0;
2926     }
2927 }
2928 
2929 U_CAPI int32_t U_EXPORT2
ubidi_countParagraphs(UBiDi * pBiDi)2930 ubidi_countParagraphs(UBiDi *pBiDi) {
2931     if(!IS_VALID_PARA_OR_LINE(pBiDi)) {
2932         return 0;
2933     } else {
2934         return pBiDi->paraCount;
2935     }
2936 }
2937 
2938 U_CAPI void U_EXPORT2
ubidi_getParagraphByIndex(const UBiDi * pBiDi,int32_t paraIndex,int32_t * pParaStart,int32_t * pParaLimit,UBiDiLevel * pParaLevel,UErrorCode * pErrorCode)2939 ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex,
2940                           int32_t *pParaStart, int32_t *pParaLimit,
2941                           UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
2942     int32_t paraStart;
2943 
2944     /* check the argument values */
2945     RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2946     RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode);
2947     RETURN_VOID_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode);
2948 
2949     pBiDi=pBiDi->pParaBiDi;             /* get Para object if Line object */
2950     if(paraIndex) {
2951         paraStart=pBiDi->paras[paraIndex-1].limit;
2952     } else {
2953         paraStart=0;
2954     }
2955     if(pParaStart!=NULL) {
2956         *pParaStart=paraStart;
2957     }
2958     if(pParaLimit!=NULL) {
2959         *pParaLimit=pBiDi->paras[paraIndex].limit;
2960     }
2961     if(pParaLevel!=NULL) {
2962         *pParaLevel=GET_PARALEVEL(pBiDi, paraStart);
2963     }
2964 }
2965 
2966 U_CAPI int32_t U_EXPORT2
ubidi_getParagraph(const UBiDi * pBiDi,int32_t charIndex,int32_t * pParaStart,int32_t * pParaLimit,UBiDiLevel * pParaLevel,UErrorCode * pErrorCode)2967 ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex,
2968                           int32_t *pParaStart, int32_t *pParaLimit,
2969                           UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
2970     int32_t paraIndex;
2971 
2972     /* check the argument values */
2973     /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */
2974     RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
2975     RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
2976     pBiDi=pBiDi->pParaBiDi;             /* get Para object if Line object */
2977     RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1);
2978 
2979     for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex].limit; paraIndex++);
2980     ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode);
2981     return paraIndex;
2982 }
2983 
2984 U_CAPI void U_EXPORT2
ubidi_setClassCallback(UBiDi * pBiDi,UBiDiClassCallback * newFn,const void * newContext,UBiDiClassCallback ** oldFn,const void ** oldContext,UErrorCode * pErrorCode)2985 ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn,
2986                        const void *newContext, UBiDiClassCallback **oldFn,
2987                        const void **oldContext, UErrorCode *pErrorCode)
2988 {
2989     RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2990     if(pBiDi==NULL) {
2991         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2992         return;
2993     }
2994     if( oldFn )
2995     {
2996         *oldFn = pBiDi->fnClassCallback;
2997     }
2998     if( oldContext )
2999     {
3000         *oldContext = pBiDi->coClassCallback;
3001     }
3002     pBiDi->fnClassCallback = newFn;
3003     pBiDi->coClassCallback = newContext;
3004 }
3005 
3006 U_CAPI void U_EXPORT2
ubidi_getClassCallback(UBiDi * pBiDi,UBiDiClassCallback ** fn,const void ** context)3007 ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context)
3008 {
3009     if(pBiDi==NULL) {
3010         return;
3011     }
3012     if( fn )
3013     {
3014         *fn = pBiDi->fnClassCallback;
3015     }
3016     if( context )
3017     {
3018         *context = pBiDi->coClassCallback;
3019     }
3020 }
3021 
3022 U_CAPI UCharDirection U_EXPORT2
ubidi_getCustomizedClass(UBiDi * pBiDi,UChar32 c)3023 ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c)
3024 {
3025     UCharDirection dir;
3026 
3027     if( pBiDi->fnClassCallback == NULL ||
3028         (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT )
3029     {
3030         dir = ubidi_getClass(c);
3031     }
3032     if(dir >= U_CHAR_DIRECTION_COUNT) {
3033         dir = (UCharDirection)ON;
3034     }
3035     return dir;
3036 }
3037