1 /*
2 ******************************************************************************
3 *
4 *   Copyright (C) 1999-2015, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 ******************************************************************************
8 *   file name:  ubidi.c
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 1999jul27
14 *   created by: Markus W. Scherer, updated by Matitiahu Allouche
15 *
16 */
17 
18 #include "cmemory.h"
19 #include "unicode/utypes.h"
20 #include "unicode/ustring.h"
21 #include "unicode/uchar.h"
22 #include "unicode/ubidi.h"
23 #include "unicode/utf16.h"
24 #include "ubidi_props.h"
25 #include "ubidiimp.h"
26 #include "uassert.h"
27 
28 /*
29  * General implementation notes:
30  *
31  * Throughout the implementation, there are comments like (W2) that refer to
32  * rules of the BiDi algorithm, in this example to the second rule of the
33  * resolution of weak types.
34  *
35  * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
36  * character according to UTF-16, the second UChar gets the directional property of
37  * the entire character assigned, while the first one gets a BN, a boundary
38  * neutral, type, which is ignored by most of the algorithm according to
39  * rule (X9) and the implementation suggestions of the BiDi algorithm.
40  *
41  * Later, adjustWSLevels() will set the level for each BN to that of the
42  * following character (UChar), which results in surrogate pairs getting the
43  * same level on each of their surrogates.
44  *
45  * In a UTF-8 implementation, the same thing could be done: the last byte of
46  * a multi-byte sequence would get the "real" property, while all previous
47  * bytes of that sequence would get BN.
48  *
49  * It is not possible to assign all those parts of a character the same real
50  * property because this would fail in the resolution of weak types with rules
51  * that look at immediately surrounding types.
52  *
53  * As a related topic, this implementation does not remove Boundary Neutral
54  * types from the input, but ignores them wherever this is relevant.
55  * For example, the loop for the resolution of the weak types reads
56  * types until it finds a non-BN.
57  * Also, explicit embedding codes are neither changed into BN nor removed.
58  * They are only treated the same way real BNs are.
59  * As stated before, adjustWSLevels() takes care of them at the end.
60  * For the purpose of conformance, the levels of all these codes
61  * do not matter.
62  *
63  * Note that this implementation modifies the dirProps
64  * after the initial setup, when applying X5c (replace FSI by LRI or RLI),
65  * X6, N0 (replace paired brackets by L or R).
66  *
67  * In this implementation, the resolution of weak types (W1 to W6),
68  * neutrals (N1 and N2), and the assignment of the resolved level (In)
69  * are all done in one single loop, in resolveImplicitLevels().
70  * Changes of dirProp values are done on the fly, without writing
71  * them back to the dirProps array.
72  *
73  *
74  * This implementation contains code that allows to bypass steps of the
75  * algorithm that are not needed on the specific paragraph
76  * in order to speed up the most common cases considerably,
77  * like text that is entirely LTR, or RTL text without numbers.
78  *
79  * Most of this is done by setting a bit for each directional property
80  * in a flags variable and later checking for whether there are
81  * any LTR characters or any RTL characters, or both, whether
82  * there are any explicit embedding codes, etc.
83  *
84  * If the (Xn) steps are performed, then the flags are re-evaluated,
85  * because they will then not contain the embedding codes any more
86  * and will be adjusted for override codes, so that subsequently
87  * more bypassing may be possible than what the initial flags suggested.
88  *
89  * If the text is not mixed-directional, then the
90  * algorithm steps for the weak type resolution are not performed,
91  * and all levels are set to the paragraph level.
92  *
93  * If there are no explicit embedding codes, then the (Xn) steps
94  * are not performed.
95  *
96  * If embedding levels are supplied as a parameter, then all
97  * explicit embedding codes are ignored, and the (Xn) steps
98  * are not performed.
99  *
100  * White Space types could get the level of the run they belong to,
101  * and are checked with a test of (flags&MASK_EMBEDDING) to
102  * consider if the paragraph direction should be considered in
103  * the flags variable.
104  *
105  * If there are no White Space types in the paragraph, then
106  * (L1) is not necessary in adjustWSLevels().
107  */
108 
109 /* to avoid some conditional statements, use tiny constant arrays */
110 static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) };
111 static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) };
112 static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) };
113 
114 #define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
115 #define DIRPROP_FLAG_E(level)  flagE[(level)&1]
116 #define DIRPROP_FLAG_O(level)  flagO[(level)&1]
117 
118 #define DIR_FROM_STRONG(strong) ((strong)==L ? L : R)
119 
120 #define NO_OVERRIDE(level)  ((level)&~UBIDI_LEVEL_OVERRIDE)
121 
122 /* UBiDi object management -------------------------------------------------- */
123 
124 U_CAPI UBiDi * U_EXPORT2
ubidi_open(void)125 ubidi_open(void)
126 {
127     UErrorCode errorCode=U_ZERO_ERROR;
128     return ubidi_openSized(0, 0, &errorCode);
129 }
130 
131 U_CAPI UBiDi * U_EXPORT2
ubidi_openSized(int32_t maxLength,int32_t maxRunCount,UErrorCode * pErrorCode)132 ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) {
133     UBiDi *pBiDi;
134 
135     /* check the argument values */
136     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
137         return NULL;
138     } else if(maxLength<0 || maxRunCount<0) {
139         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
140         return NULL;    /* invalid arguments */
141     }
142 
143     /* allocate memory for the object */
144     pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi));
145     if(pBiDi==NULL) {
146         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
147         return NULL;
148     }
149 
150     /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */
151     uprv_memset(pBiDi, 0, sizeof(UBiDi));
152 
153     /* get BiDi properties */
154     pBiDi->bdp=ubidi_getSingleton();
155 
156     /* allocate memory for arrays as requested */
157     if(maxLength>0) {
158         if( !getInitialDirPropsMemory(pBiDi, maxLength) ||
159             !getInitialLevelsMemory(pBiDi, maxLength)
160         ) {
161             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
162         }
163     } else {
164         pBiDi->mayAllocateText=TRUE;
165     }
166 
167     if(maxRunCount>0) {
168         if(maxRunCount==1) {
169             /* use simpleRuns[] */
170             pBiDi->runsSize=sizeof(Run);
171         } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) {
172             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
173         }
174     } else {
175         pBiDi->mayAllocateRuns=TRUE;
176     }
177 
178     if(U_SUCCESS(*pErrorCode)) {
179         return pBiDi;
180     } else {
181         ubidi_close(pBiDi);
182         return NULL;
183     }
184 }
185 
186 /*
187  * We are allowed to allocate memory if memory==NULL or
188  * mayAllocate==TRUE for each array that we need.
189  * We also try to grow memory as needed if we
190  * allocate it.
191  *
192  * Assume sizeNeeded>0.
193  * If *pMemory!=NULL, then assume *pSize>0.
194  *
195  * ### this realloc() may unnecessarily copy the old data,
196  * which we know we don't need any more;
197  * is this the best way to do this??
198  */
199 U_CFUNC UBool
ubidi_getMemory(BidiMemoryForAllocation * bidiMem,int32_t * pSize,UBool mayAllocate,int32_t sizeNeeded)200 ubidi_getMemory(BidiMemoryForAllocation *bidiMem, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded) {
201     void **pMemory = (void **)bidiMem;
202     /* check for existing memory */
203     if(*pMemory==NULL) {
204         /* we need to allocate memory */
205         if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) {
206             *pSize=sizeNeeded;
207             return TRUE;
208         } else {
209             return FALSE;
210         }
211     } else {
212         if(sizeNeeded<=*pSize) {
213             /* there is already enough memory */
214             return TRUE;
215         }
216         else if(!mayAllocate) {
217             /* not enough memory, and we must not allocate */
218             return FALSE;
219         } else {
220             /* we try to grow */
221             void *memory;
222             /* in most cases, we do not need the copy-old-data part of
223              * realloc, but it is needed when adding runs using getRunsMemory()
224              * in setParaRunsOnly()
225              */
226             if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) {
227                 *pMemory=memory;
228                 *pSize=sizeNeeded;
229                 return TRUE;
230             } else {
231                 /* we failed to grow */
232                 return FALSE;
233             }
234         }
235     }
236 }
237 
238 U_CAPI void U_EXPORT2
ubidi_close(UBiDi * pBiDi)239 ubidi_close(UBiDi *pBiDi) {
240     if(pBiDi!=NULL) {
241         pBiDi->pParaBiDi=NULL;          /* in case one tries to reuse this block */
242         if(pBiDi->dirPropsMemory!=NULL) {
243             uprv_free(pBiDi->dirPropsMemory);
244         }
245         if(pBiDi->levelsMemory!=NULL) {
246             uprv_free(pBiDi->levelsMemory);
247         }
248         if(pBiDi->openingsMemory!=NULL) {
249             uprv_free(pBiDi->openingsMemory);
250         }
251         if(pBiDi->parasMemory!=NULL) {
252             uprv_free(pBiDi->parasMemory);
253         }
254         if(pBiDi->runsMemory!=NULL) {
255             uprv_free(pBiDi->runsMemory);
256         }
257         if(pBiDi->isolatesMemory!=NULL) {
258             uprv_free(pBiDi->isolatesMemory);
259         }
260         if(pBiDi->insertPoints.points!=NULL) {
261             uprv_free(pBiDi->insertPoints.points);
262         }
263 
264         uprv_free(pBiDi);
265     }
266 }
267 
268 /* set to approximate "inverse BiDi" ---------------------------------------- */
269 
270 U_CAPI void U_EXPORT2
ubidi_setInverse(UBiDi * pBiDi,UBool isInverse)271 ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) {
272     if(pBiDi!=NULL) {
273         pBiDi->isInverse=isInverse;
274         pBiDi->reorderingMode = isInverse ? UBIDI_REORDER_INVERSE_NUMBERS_AS_L
275                                           : UBIDI_REORDER_DEFAULT;
276     }
277 }
278 
279 U_CAPI UBool U_EXPORT2
ubidi_isInverse(UBiDi * pBiDi)280 ubidi_isInverse(UBiDi *pBiDi) {
281     if(pBiDi!=NULL) {
282         return pBiDi->isInverse;
283     } else {
284         return FALSE;
285     }
286 }
287 
288 /* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
289  * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre
290  * concept of RUNS_ONLY which is a double operation.
291  * It could be advantageous to divide this into 3 concepts:
292  * a) Operation: direct / inverse / RUNS_ONLY
293  * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R
294  * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL
295  * This would allow combinations not possible today like RUNS_ONLY with
296  * NUMBERS_SPECIAL.
297  * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and
298  * REMOVE_CONTROLS for the inverse step.
299  * Not all combinations would be supported, and probably not all do make sense.
300  * This would need to document which ones are supported and what are the
301  * fallbacks for unsupported combinations.
302  */
303 U_CAPI void U_EXPORT2
ubidi_setReorderingMode(UBiDi * pBiDi,UBiDiReorderingMode reorderingMode)304 ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) {
305     if ((pBiDi!=NULL) && (reorderingMode >= UBIDI_REORDER_DEFAULT)
306                         && (reorderingMode < UBIDI_REORDER_COUNT)) {
307         pBiDi->reorderingMode = reorderingMode;
308         pBiDi->isInverse = (UBool)(reorderingMode == UBIDI_REORDER_INVERSE_NUMBERS_AS_L);
309     }
310 }
311 
312 U_CAPI UBiDiReorderingMode U_EXPORT2
ubidi_getReorderingMode(UBiDi * pBiDi)313 ubidi_getReorderingMode(UBiDi *pBiDi) {
314     if (pBiDi!=NULL) {
315         return pBiDi->reorderingMode;
316     } else {
317         return UBIDI_REORDER_DEFAULT;
318     }
319 }
320 
321 U_CAPI void U_EXPORT2
ubidi_setReorderingOptions(UBiDi * pBiDi,uint32_t reorderingOptions)322 ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) {
323     if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
324         reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
325     }
326     if (pBiDi!=NULL) {
327         pBiDi->reorderingOptions=reorderingOptions;
328     }
329 }
330 
331 U_CAPI uint32_t U_EXPORT2
ubidi_getReorderingOptions(UBiDi * pBiDi)332 ubidi_getReorderingOptions(UBiDi *pBiDi) {
333     if (pBiDi!=NULL) {
334         return pBiDi->reorderingOptions;
335     } else {
336         return 0;
337     }
338 }
339 
340 U_CAPI UBiDiDirection U_EXPORT2
ubidi_getBaseDirection(const UChar * text,int32_t length)341 ubidi_getBaseDirection(const UChar *text,
342 int32_t length){
343 
344     int32_t i;
345     UChar32 uchar;
346     UCharDirection dir;
347 
348     if( text==NULL || length<-1 ){
349         return UBIDI_NEUTRAL;
350     }
351 
352     if(length==-1) {
353         length=u_strlen(text);
354     }
355 
356     for( i = 0 ; i < length; ) {
357         /* i is incremented by U16_NEXT */
358         U16_NEXT(text, i, length, uchar);
359         dir = u_charDirection(uchar);
360         if( dir == U_LEFT_TO_RIGHT )
361                 return UBIDI_LTR;
362         if( dir == U_RIGHT_TO_LEFT || dir ==U_RIGHT_TO_LEFT_ARABIC )
363                 return UBIDI_RTL;
364     }
365     return UBIDI_NEUTRAL;
366 }
367 
368 /* perform (P2)..(P3) ------------------------------------------------------- */
369 
370 /**
371  * Returns the directionality of the first strong character
372  * after the last B in prologue, if any.
373  * Requires prologue!=null.
374  */
375 static DirProp
firstL_R_AL(UBiDi * pBiDi)376 firstL_R_AL(UBiDi *pBiDi) {
377     const UChar *text=pBiDi->prologue;
378     int32_t length=pBiDi->proLength;
379     int32_t i;
380     UChar32 uchar;
381     DirProp dirProp, result=ON;
382     for(i=0; i<length; ) {
383         /* i is incremented by U16_NEXT */
384         U16_NEXT(text, i, length, uchar);
385         dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
386         if(result==ON) {
387             if(dirProp==L || dirProp==R || dirProp==AL) {
388                 result=dirProp;
389             }
390         } else {
391             if(dirProp==B) {
392                 result=ON;
393             }
394         }
395     }
396     return result;
397 }
398 
399 /*
400  * Check that there are enough entries in the array pointed to by pBiDi->paras
401  */
402 static UBool
checkParaCount(UBiDi * pBiDi)403 checkParaCount(UBiDi *pBiDi) {
404     int32_t count=pBiDi->paraCount;
405     if(pBiDi->paras==pBiDi->simpleParas) {
406         if(count<=SIMPLE_PARAS_COUNT)
407             return TRUE;
408         if(!getInitialParasMemory(pBiDi, SIMPLE_PARAS_COUNT * 2))
409             return FALSE;
410         pBiDi->paras=pBiDi->parasMemory;
411         uprv_memcpy(pBiDi->parasMemory, pBiDi->simpleParas, SIMPLE_PARAS_COUNT * sizeof(Para));
412         return TRUE;
413     }
414     if(!getInitialParasMemory(pBiDi, count * 2))
415         return FALSE;
416     pBiDi->paras=pBiDi->parasMemory;
417     return TRUE;
418 }
419 
420 /*
421  * Get the directional properties for the text, calculate the flags bit-set, and
422  * determine the paragraph level if necessary (in pBiDi->paras[i].level).
423  * FSI initiators are also resolved and their dirProp replaced with LRI or RLI.
424  * When encountering an FSI, it is initially replaced with an LRI, which is the
425  * default. Only if a strong R or AL is found within its scope will the LRI be
426  * replaced by an RLI.
427  */
428 static UBool
getDirProps(UBiDi * pBiDi)429 getDirProps(UBiDi *pBiDi) {
430     const UChar *text=pBiDi->text;
431     DirProp *dirProps=pBiDi->dirPropsMemory;    /* pBiDi->dirProps is const */
432 
433     int32_t i=0, originalLength=pBiDi->originalLength;
434     Flags flags=0;      /* collect all directionalities in the text */
435     UChar32 uchar;
436     DirProp dirProp=0, defaultParaLevel=0;  /* initialize to avoid compiler warnings */
437     UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel);
438     /* for inverse BiDi, the default para level is set to RTL if there is a
439        strong R or AL character at either end of the text                            */
440     UBool isDefaultLevelInverse=isDefaultLevel && (UBool)
441             (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
442              pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL);
443     int32_t lastArabicPos=-1;
444     int32_t controlCount=0;
445     UBool removeBiDiControls = (UBool)(pBiDi->reorderingOptions &
446                                        UBIDI_OPTION_REMOVE_CONTROLS);
447 
448     typedef enum {
449          NOT_SEEKING_STRONG,            /* 0: not contextual paraLevel, not after FSI */
450          SEEKING_STRONG_FOR_PARA,       /* 1: looking for first strong char in para */
451          SEEKING_STRONG_FOR_FSI,        /* 2: looking for first strong after FSI */
452          LOOKING_FOR_PDI                /* 3: found strong after FSI, looking for PDI */
453     } State;
454     State state;
455     DirProp lastStrong=ON;              /* for default level & inverse BiDi */
456     /* The following stacks are used to manage isolate sequences. Those
457        sequences may be nested, but obviously never more deeply than the
458        maximum explicit embedding level.
459        lastStack is the index of the last used entry in the stack. A value of -1
460        means that there is no open isolate sequence.
461        lastStack is reset to -1 on paragraph boundaries. */
462     /* The following stack contains the position of the initiator of
463        each open isolate sequence */
464     int32_t isolateStartStack[UBIDI_MAX_EXPLICIT_LEVEL+1];
465     /* The following stack contains the last known state before
466        encountering the initiator of an isolate sequence */
467     int8_t  previousStateStack[UBIDI_MAX_EXPLICIT_LEVEL+1];
468     int32_t stackLast=-1;
469 
470     if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING)
471         pBiDi->length=0;
472     defaultParaLevel=pBiDi->paraLevel&1;
473     if(isDefaultLevel) {
474         pBiDi->paras[0].level=defaultParaLevel;
475         lastStrong=defaultParaLevel;
476         if(pBiDi->proLength>0 &&                    /* there is a prologue */
477            (dirProp=firstL_R_AL(pBiDi))!=ON) {  /* with a strong character */
478             if(dirProp==L)
479                 pBiDi->paras[0].level=0;    /* set the default para level */
480             else
481                 pBiDi->paras[0].level=1;    /* set the default para level */
482             state=NOT_SEEKING_STRONG;
483         } else {
484             state=SEEKING_STRONG_FOR_PARA;
485         }
486     } else {
487         pBiDi->paras[0].level=pBiDi->paraLevel;
488         state=NOT_SEEKING_STRONG;
489     }
490     /* count paragraphs and determine the paragraph level (P2..P3) */
491     /*
492      * see comment in ubidi.h:
493      * the UBIDI_DEFAULT_XXX values are designed so that
494      * their bit 0 alone yields the intended default
495      */
496     for( /* i=0 above */ ; i<originalLength; ) {
497         /* i is incremented by U16_NEXT */
498         U16_NEXT(text, i, originalLength, uchar);
499         flags|=DIRPROP_FLAG(dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar));
500         dirProps[i-1]=dirProp;
501         if(uchar>0xffff) {  /* set the lead surrogate's property to BN */
502             flags|=DIRPROP_FLAG(BN);
503             dirProps[i-2]=BN;
504         }
505         if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar))
506             controlCount++;
507         if(dirProp==L) {
508             if(state==SEEKING_STRONG_FOR_PARA) {
509                 pBiDi->paras[pBiDi->paraCount-1].level=0;
510                 state=NOT_SEEKING_STRONG;
511             }
512             else if(state==SEEKING_STRONG_FOR_FSI) {
513                 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
514                     /* no need for next statement, already set by default */
515                     /* dirProps[isolateStartStack[stackLast]]=LRI; */
516                     flags|=DIRPROP_FLAG(LRI);
517                 }
518                 state=LOOKING_FOR_PDI;
519             }
520             lastStrong=L;
521             continue;
522         }
523         if(dirProp==R || dirProp==AL) {
524             if(state==SEEKING_STRONG_FOR_PARA) {
525                 pBiDi->paras[pBiDi->paraCount-1].level=1;
526                 state=NOT_SEEKING_STRONG;
527             }
528             else if(state==SEEKING_STRONG_FOR_FSI) {
529                 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
530                     dirProps[isolateStartStack[stackLast]]=RLI;
531                     flags|=DIRPROP_FLAG(RLI);
532                 }
533                 state=LOOKING_FOR_PDI;
534             }
535             lastStrong=R;
536             if(dirProp==AL)
537                 lastArabicPos=i-1;
538             continue;
539         }
540         if(dirProp>=FSI && dirProp<=RLI) {  /* FSI, LRI or RLI */
541             stackLast++;
542             if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
543                 isolateStartStack[stackLast]=i-1;
544                 previousStateStack[stackLast]=state;
545             }
546             if(dirProp==FSI) {
547                 dirProps[i-1]=LRI;      /* default if no strong char */
548                 state=SEEKING_STRONG_FOR_FSI;
549             }
550             else
551                 state=LOOKING_FOR_PDI;
552             continue;
553         }
554         if(dirProp==PDI) {
555             if(state==SEEKING_STRONG_FOR_FSI) {
556                 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
557                     /* no need for next statement, already set by default */
558                     /* dirProps[isolateStartStack[stackLast]]=LRI; */
559                     flags|=DIRPROP_FLAG(LRI);
560                 }
561             }
562             if(stackLast>=0) {
563                 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL)
564                     state=previousStateStack[stackLast];
565                 stackLast--;
566             }
567             continue;
568         }
569         if(dirProp==B) {
570             if(i<originalLength && uchar==CR && text[i]==LF) /* do nothing on the CR */
571                 continue;
572             pBiDi->paras[pBiDi->paraCount-1].limit=i;
573             if(isDefaultLevelInverse && lastStrong==R)
574                 pBiDi->paras[pBiDi->paraCount-1].level=1;
575             if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
576                 /* When streaming, we only process whole paragraphs
577                    thus some updates are only done on paragraph boundaries */
578                 pBiDi->length=i;        /* i is index to next character */
579                 pBiDi->controlCount=controlCount;
580             }
581             if(i<originalLength) {              /* B not last char in text */
582                 pBiDi->paraCount++;
583                 if(checkParaCount(pBiDi)==FALSE)    /* not enough memory for a new para entry */
584                     return FALSE;
585                 if(isDefaultLevel) {
586                     pBiDi->paras[pBiDi->paraCount-1].level=defaultParaLevel;
587                     state=SEEKING_STRONG_FOR_PARA;
588                     lastStrong=defaultParaLevel;
589                 } else {
590                     pBiDi->paras[pBiDi->paraCount-1].level=pBiDi->paraLevel;
591                     state=NOT_SEEKING_STRONG;
592                 }
593                 stackLast=-1;
594             }
595             continue;
596         }
597     }
598     /* Ignore still open isolate sequences with overflow */
599     if(stackLast>UBIDI_MAX_EXPLICIT_LEVEL) {
600         stackLast=UBIDI_MAX_EXPLICIT_LEVEL;
601         state=SEEKING_STRONG_FOR_FSI;   /* to be on the safe side */
602     }
603     /* Resolve direction of still unresolved open FSI sequences */
604     while(stackLast>=0) {
605         if(state==SEEKING_STRONG_FOR_FSI) {
606             /* no need for next statement, already set by default */
607             /* dirProps[isolateStartStack[stackLast]]=LRI; */
608             flags|=DIRPROP_FLAG(LRI);
609             break;
610         }
611         state=previousStateStack[stackLast];
612         stackLast--;
613     }
614     /* When streaming, ignore text after the last paragraph separator */
615     if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
616         if(pBiDi->length<originalLength)
617             pBiDi->paraCount--;
618     } else {
619         pBiDi->paras[pBiDi->paraCount-1].limit=originalLength;
620         pBiDi->controlCount=controlCount;
621     }
622     /* For inverse bidi, default para direction is RTL if there is
623        a strong R or AL at either end of the paragraph */
624     if(isDefaultLevelInverse && lastStrong==R) {
625         pBiDi->paras[pBiDi->paraCount-1].level=1;
626     }
627     if(isDefaultLevel) {
628         pBiDi->paraLevel=pBiDi->paras[0].level;
629     }
630     /* The following is needed to resolve the text direction for default level
631        paragraphs containing no strong character */
632     for(i=0; i<pBiDi->paraCount; i++)
633         flags|=DIRPROP_FLAG_LR(pBiDi->paras[i].level);
634 
635     if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
636         flags|=DIRPROP_FLAG(L);
637     }
638     pBiDi->flags=flags;
639     pBiDi->lastArabicPos=lastArabicPos;
640     return TRUE;
641 }
642 
643 /* determine the paragraph level at position index */
644 U_CFUNC UBiDiLevel
ubidi_getParaLevelAtIndex(const UBiDi * pBiDi,int32_t pindex)645 ubidi_getParaLevelAtIndex(const UBiDi *pBiDi, int32_t pindex) {
646     int32_t i;
647     for(i=0; i<pBiDi->paraCount; i++)
648         if(pindex<pBiDi->paras[i].limit)
649             break;
650     if(i>=pBiDi->paraCount)
651         i=pBiDi->paraCount-1;
652     return (UBiDiLevel)(pBiDi->paras[i].level);
653 }
654 
655 /* Functions for handling paired brackets ----------------------------------- */
656 
657 /* In the isoRuns array, the first entry is used for text outside of any
658    isolate sequence.  Higher entries are used for each more deeply nested
659    isolate sequence. isoRunLast is the index of the last used entry.  The
660    openings array is used to note the data of opening brackets not yet
661    matched by a closing bracket, or matched but still susceptible to change
662    level.
663    Each isoRun entry contains the index of the first and
664    one-after-last openings entries for pending opening brackets it
665    contains.  The next openings entry to use is the one-after-last of the
666    most deeply nested isoRun entry.
667    isoRun entries also contain their current embedding level and the last
668    encountered strong character, since these will be needed to resolve
669    the level of paired brackets.  */
670 
671 static void
bracketInit(UBiDi * pBiDi,BracketData * bd)672 bracketInit(UBiDi *pBiDi, BracketData *bd) {
673     bd->pBiDi=pBiDi;
674     bd->isoRunLast=0;
675     bd->isoRuns[0].start=0;
676     bd->isoRuns[0].limit=0;
677     bd->isoRuns[0].level=GET_PARALEVEL(pBiDi, 0);
678     bd->isoRuns[0].lastStrong=bd->isoRuns[0].lastBase=bd->isoRuns[0].contextDir=GET_PARALEVEL(pBiDi, 0)&1;
679     bd->isoRuns[0].contextPos=0;
680     if(pBiDi->openingsMemory) {
681         bd->openings=pBiDi->openingsMemory;
682         bd->openingsCount=pBiDi->openingsSize / sizeof(Opening);
683     } else {
684         bd->openings=bd->simpleOpenings;
685         bd->openingsCount=SIMPLE_OPENINGS_COUNT;
686     }
687     bd->isNumbersSpecial=bd->pBiDi->reorderingMode==UBIDI_REORDER_NUMBERS_SPECIAL ||
688                          bd->pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL;
689 }
690 
691 /* paragraph boundary */
692 static void
bracketProcessB(BracketData * bd,UBiDiLevel level)693 bracketProcessB(BracketData *bd, UBiDiLevel level) {
694     bd->isoRunLast=0;
695     bd->isoRuns[0].limit=0;
696     bd->isoRuns[0].level=level;
697     bd->isoRuns[0].lastStrong=bd->isoRuns[0].lastBase=bd->isoRuns[0].contextDir=level&1;
698     bd->isoRuns[0].contextPos=0;
699 }
700 
701 /* LRE, LRO, RLE, RLO, PDF */
702 static void
bracketProcessBoundary(BracketData * bd,int32_t lastCcPos,UBiDiLevel contextLevel,UBiDiLevel embeddingLevel)703 bracketProcessBoundary(BracketData *bd, int32_t lastCcPos,
704                        UBiDiLevel contextLevel, UBiDiLevel embeddingLevel) {
705     IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
706     DirProp *dirProps=bd->pBiDi->dirProps;
707     if(DIRPROP_FLAG(dirProps[lastCcPos])&MASK_ISO)  /* after an isolate */
708         return;
709     if(NO_OVERRIDE(embeddingLevel)>NO_OVERRIDE(contextLevel))   /* not a PDF */
710         contextLevel=embeddingLevel;
711     pLastIsoRun->limit=pLastIsoRun->start;
712     pLastIsoRun->level=embeddingLevel;
713     pLastIsoRun->lastStrong=pLastIsoRun->lastBase=pLastIsoRun->contextDir=contextLevel&1;
714     pLastIsoRun->contextPos=lastCcPos;
715 }
716 
717 /* LRI or RLI */
718 static void
bracketProcessLRI_RLI(BracketData * bd,UBiDiLevel level)719 bracketProcessLRI_RLI(BracketData *bd, UBiDiLevel level) {
720     IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
721     int16_t lastLimit;
722     pLastIsoRun->lastBase=ON;
723     lastLimit=pLastIsoRun->limit;
724     bd->isoRunLast++;
725     pLastIsoRun++;
726     pLastIsoRun->start=pLastIsoRun->limit=lastLimit;
727     pLastIsoRun->level=level;
728     pLastIsoRun->lastStrong=pLastIsoRun->lastBase=pLastIsoRun->contextDir=level&1;
729     pLastIsoRun->contextPos=0;
730 }
731 
732 /* PDI */
733 static void
bracketProcessPDI(BracketData * bd)734 bracketProcessPDI(BracketData *bd) {
735     IsoRun *pLastIsoRun;
736     bd->isoRunLast--;
737     pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
738     pLastIsoRun->lastBase=ON;
739 }
740 
741 /* newly found opening bracket: create an openings entry */
742 static UBool                            /* return TRUE if success */
bracketAddOpening(BracketData * bd,UChar match,int32_t position)743 bracketAddOpening(BracketData *bd, UChar match, int32_t position) {
744     IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
745     Opening *pOpening;
746     if(pLastIsoRun->limit>=bd->openingsCount) {  /* no available new entry */
747         UBiDi *pBiDi=bd->pBiDi;
748         if(!getInitialOpeningsMemory(pBiDi, pLastIsoRun->limit * 2))
749             return FALSE;
750         if(bd->openings==bd->simpleOpenings)
751             uprv_memcpy(pBiDi->openingsMemory, bd->simpleOpenings,
752                         SIMPLE_OPENINGS_COUNT * sizeof(Opening));
753         bd->openings=pBiDi->openingsMemory;     /* may have changed */
754         bd->openingsCount=pBiDi->openingsSize / sizeof(Opening);
755     }
756     pOpening=&bd->openings[pLastIsoRun->limit];
757     pOpening->position=position;
758     pOpening->match=match;
759     pOpening->contextDir=pLastIsoRun->contextDir;
760     pOpening->contextPos=pLastIsoRun->contextPos;
761     pOpening->flags=0;
762     pLastIsoRun->limit++;
763     return TRUE;
764 }
765 
766 /* change N0c1 to N0c2 when a preceding bracket is assigned the embedding level */
767 static void
fixN0c(BracketData * bd,int32_t openingIndex,int32_t newPropPosition,DirProp newProp)768 fixN0c(BracketData *bd, int32_t openingIndex, int32_t newPropPosition, DirProp newProp) {
769     /* This function calls itself recursively */
770     IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
771     Opening *qOpening;
772     DirProp *dirProps=bd->pBiDi->dirProps;
773     int32_t k, openingPosition, closingPosition;
774     for(k=openingIndex+1, qOpening=&bd->openings[k]; k<pLastIsoRun->limit; k++, qOpening++) {
775         if(qOpening->match>=0)      /* not an N0c match */
776             continue;
777         if(newPropPosition<qOpening->contextPos)
778             break;
779         if(newPropPosition>=qOpening->position)
780             continue;
781         if(newProp==qOpening->contextDir)
782             break;
783         openingPosition=qOpening->position;
784         dirProps[openingPosition]=newProp;
785         closingPosition=-(qOpening->match);
786         dirProps[closingPosition]=newProp;
787         qOpening->match=0;                      /* prevent further changes */
788         fixN0c(bd, k, openingPosition, newProp);
789         fixN0c(bd, k, closingPosition, newProp);
790     }
791 }
792 
793 /* process closing bracket */
794 static DirProp              /* return L or R if N0b or N0c, ON if N0d */
bracketProcessClosing(BracketData * bd,int32_t openIdx,int32_t position)795 bracketProcessClosing(BracketData *bd, int32_t openIdx, int32_t position) {
796     IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
797     Opening *pOpening, *qOpening;
798     UBiDiDirection direction;
799     UBool stable;
800     DirProp newProp;
801     pOpening=&bd->openings[openIdx];
802     direction=pLastIsoRun->level&1;
803     stable=TRUE;            /* assume stable until proved otherwise */
804 
805     /* The stable flag is set when brackets are paired and their
806        level is resolved and cannot be changed by what will be
807        found later in the source string.
808        An unstable match can occur only when applying N0c, where
809        the resolved level depends on the preceding context, and
810        this context may be affected by text occurring later.
811        Example: RTL paragraph containing:  abc[(latin) HEBREW]
812        When the closing parenthesis is encountered, it appears
813        that N0c1 must be applied since 'abc' sets an opposite
814        direction context and both parentheses receive level 2.
815        However, when the closing square bracket is processed,
816        N0b applies because of 'HEBREW' being included within the
817        brackets, thus the square brackets are treated like R and
818        receive level 1. However, this changes the preceding
819        context of the opening parenthesis, and it now appears
820        that N0c2 must be applied to the parentheses rather than
821        N0c1. */
822 
823     if((direction==0 && pOpening->flags&FOUND_L) ||
824        (direction==1 && pOpening->flags&FOUND_R)) { /* N0b */
825         newProp=direction;
826     }
827     else if(pOpening->flags&(FOUND_L|FOUND_R)) {    /* N0c */
828         /* it is stable if there is no containing pair or in
829            conditions too complicated and not worth checking */
830         stable=(openIdx==pLastIsoRun->start);
831         if(direction!=pOpening->contextDir)
832             newProp=pOpening->contextDir;           /* N0c1 */
833         else
834             newProp=direction;                      /* N0c2 */
835     } else {
836         /* forget this and any brackets nested within this pair */
837         pLastIsoRun->limit=openIdx;
838         return ON;                                  /* N0d */
839     }
840     bd->pBiDi->dirProps[pOpening->position]=newProp;
841     bd->pBiDi->dirProps[position]=newProp;
842     /* Update nested N0c pairs that may be affected */
843     fixN0c(bd, openIdx, pOpening->position, newProp);
844     if(stable) {
845         pLastIsoRun->limit=openIdx; /* forget any brackets nested within this pair */
846         /* remove lower located synonyms if any */
847         while(pLastIsoRun->limit>pLastIsoRun->start &&
848               bd->openings[pLastIsoRun->limit-1].position==pOpening->position)
849             pLastIsoRun->limit--;
850     } else {
851         int32_t k;
852         pOpening->match=-position;
853         /* neutralize lower located synonyms if any */
854         k=openIdx-1;
855         while(k>=pLastIsoRun->start &&
856               bd->openings[k].position==pOpening->position)
857             bd->openings[k--].match=0;
858         /* neutralize any unmatched opening between the current pair;
859            this will also neutralize higher located synonyms if any */
860         for(k=openIdx+1; k<pLastIsoRun->limit; k++) {
861             qOpening=&bd->openings[k];
862             if(qOpening->position>=position)
863                 break;
864             if(qOpening->match>0)
865                 qOpening->match=0;
866         }
867     }
868     return newProp;
869 }
870 
871 /* handle strong characters, digits and candidates for closing brackets */
872 static UBool                            /* return TRUE if success */
bracketProcessChar(BracketData * bd,int32_t position)873 bracketProcessChar(BracketData *bd, int32_t position) {
874     IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
875     DirProp *dirProps, dirProp, newProp;
876     UBiDiLevel level;
877     dirProps=bd->pBiDi->dirProps;
878     dirProp=dirProps[position];
879     if(dirProp==ON) {
880         UChar c, match;
881         int32_t idx;
882         /* First see if it is a matching closing bracket. Hopefully, this is
883            more efficient than checking if it is a closing bracket at all */
884         c=bd->pBiDi->text[position];
885         for(idx=pLastIsoRun->limit-1; idx>=pLastIsoRun->start; idx--) {
886             if(bd->openings[idx].match!=c)
887                 continue;
888             /* We have a match */
889             newProp=bracketProcessClosing(bd, idx, position);
890             if(newProp==ON) {           /* N0d */
891                 c=0;        /* prevent handling as an opening */
892                 break;
893             }
894             pLastIsoRun->lastBase=ON;
895             pLastIsoRun->contextDir=newProp;
896             pLastIsoRun->contextPos=position;
897             level=bd->pBiDi->levels[position];
898             if(level&UBIDI_LEVEL_OVERRIDE) {    /* X4, X5 */
899                 uint16_t flag;
900                 int32_t i;
901                 newProp=level&1;
902                 pLastIsoRun->lastStrong=newProp;
903                 flag=DIRPROP_FLAG(newProp);
904                 for(i=pLastIsoRun->start; i<idx; i++)
905                     bd->openings[i].flags|=flag;
906                 /* matching brackets are not overridden by LRO/RLO */
907                 bd->pBiDi->levels[position]&=~UBIDI_LEVEL_OVERRIDE;
908             }
909             /* matching brackets are not overridden by LRO/RLO */
910             bd->pBiDi->levels[bd->openings[idx].position]&=~UBIDI_LEVEL_OVERRIDE;
911             return TRUE;
912         }
913         /* We get here only if the ON character is not a matching closing
914            bracket or it is a case of N0d */
915         /* Now see if it is an opening bracket */
916         if(c)
917             match=u_getBidiPairedBracket(c);    /* get the matching char */
918         else
919             match=0;
920         if(match!=c &&                  /* has a matching char */
921            ubidi_getPairedBracketType(bd->pBiDi->bdp, c)==U_BPT_OPEN) { /* opening bracket */
922             /* special case: process synonyms
923                create an opening entry for each synonym */
924             if(match==0x232A) {     /* RIGHT-POINTING ANGLE BRACKET */
925                 if(!bracketAddOpening(bd, 0x3009, position))
926                     return FALSE;
927             }
928             else if(match==0x3009) {         /* RIGHT ANGLE BRACKET */
929                 if(!bracketAddOpening(bd, 0x232A, position))
930                     return FALSE;
931             }
932             if(!bracketAddOpening(bd, match, position))
933                 return FALSE;
934         }
935     }
936     level=bd->pBiDi->levels[position];
937     if(level&UBIDI_LEVEL_OVERRIDE) {    /* X4, X5 */
938         newProp=level&1;
939         if(dirProp!=S && dirProp!=WS && dirProp!=ON)
940             dirProps[position]=newProp;
941         pLastIsoRun->lastBase=newProp;
942         pLastIsoRun->lastStrong=newProp;
943         pLastIsoRun->contextDir=newProp;
944         pLastIsoRun->contextPos=position;
945     }
946     else if(dirProp<=R || dirProp==AL) {
947         newProp=DIR_FROM_STRONG(dirProp);
948         pLastIsoRun->lastBase=dirProp;
949         pLastIsoRun->lastStrong=dirProp;
950         pLastIsoRun->contextDir=newProp;
951         pLastIsoRun->contextPos=position;
952     }
953     else if(dirProp==EN) {
954         pLastIsoRun->lastBase=EN;
955         if(pLastIsoRun->lastStrong==L) {
956             newProp=L;                  /* W7 */
957             if(!bd->isNumbersSpecial)
958                 dirProps[position]=ENL;
959             pLastIsoRun->contextDir=L;
960             pLastIsoRun->contextPos=position;
961         }
962         else {
963             newProp=R;                  /* N0 */
964             if(pLastIsoRun->lastStrong==AL)
965                 dirProps[position]=AN;  /* W2 */
966             else
967                 dirProps[position]=ENR;
968             pLastIsoRun->contextDir=R;
969             pLastIsoRun->contextPos=position;
970         }
971     }
972     else if(dirProp==AN) {
973         newProp=R;                      /* N0 */
974         pLastIsoRun->lastBase=AN;
975         pLastIsoRun->contextDir=R;
976         pLastIsoRun->contextPos=position;
977     }
978     else if(dirProp==NSM) {
979         /* if the last real char was ON, change NSM to ON so that it
980            will stay ON even if the last real char is a bracket which
981            may be changed to L or R */
982         newProp=pLastIsoRun->lastBase;
983         if(newProp==ON)
984             dirProps[position]=newProp;
985     }
986     else {
987         newProp=dirProp;
988         pLastIsoRun->lastBase=dirProp;
989     }
990     if(newProp<=R || newProp==AL) {
991         int32_t i;
992         uint16_t flag=DIRPROP_FLAG(DIR_FROM_STRONG(newProp));
993         for(i=pLastIsoRun->start; i<pLastIsoRun->limit; i++)
994             if(position>bd->openings[i].position)
995                 bd->openings[i].flags|=flag;
996     }
997     return TRUE;
998 }
999 
1000 /* perform (X1)..(X9) ------------------------------------------------------- */
1001 
1002 /* determine if the text is mixed-directional or single-directional */
1003 static UBiDiDirection
directionFromFlags(UBiDi * pBiDi)1004 directionFromFlags(UBiDi *pBiDi) {
1005     Flags flags=pBiDi->flags;
1006     /* if the text contains AN and neutrals, then some neutrals may become RTL */
1007     if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) {
1008         return UBIDI_LTR;
1009     } else if(!(flags&MASK_LTR)) {
1010         return UBIDI_RTL;
1011     } else {
1012         return UBIDI_MIXED;
1013     }
1014 }
1015 
1016 /*
1017  * Resolve the explicit levels as specified by explicit embedding codes.
1018  * Recalculate the flags to have them reflect the real properties
1019  * after taking the explicit embeddings into account.
1020  *
1021  * The BiDi algorithm is designed to result in the same behavior whether embedding
1022  * levels are externally specified (from "styled text", supposedly the preferred
1023  * method) or set by explicit embedding codes (LRx, RLx, PDF, FSI, PDI) in the plain text.
1024  * That is why (X9) instructs to remove all not-isolate explicit codes (and BN).
1025  * However, in a real implementation, the removal of these codes and their index
1026  * positions in the plain text is undesirable since it would result in
1027  * reallocated, reindexed text.
1028  * Instead, this implementation leaves the codes in there and just ignores them
1029  * in the subsequent processing.
1030  * In order to get the same reordering behavior, positions with a BN or a not-isolate
1031  * explicit embedding code just get the same level assigned as the last "real"
1032  * character.
1033  *
1034  * Some implementations, not this one, then overwrite some of these
1035  * directionality properties at "real" same-level-run boundaries by
1036  * L or R codes so that the resolution of weak types can be performed on the
1037  * entire paragraph at once instead of having to parse it once more and
1038  * perform that resolution on same-level-runs.
1039  * This limits the scope of the implicit rules in effectively
1040  * the same way as the run limits.
1041  *
1042  * Instead, this implementation does not modify these codes, except for
1043  * paired brackets whose properties (ON) may be replaced by L or R.
1044  * On one hand, the paragraph has to be scanned for same-level-runs, but
1045  * on the other hand, this saves another loop to reset these codes,
1046  * or saves making and modifying a copy of dirProps[].
1047  *
1048  *
1049  * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm.
1050  *
1051  *
1052  * Handling the stack of explicit levels (Xn):
1053  *
1054  * With the BiDi stack of explicit levels, as pushed with each
1055  * LRE, RLE, LRO, RLO, LRI, RLI and FSI and popped with each PDF and PDI,
1056  * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL.
1057  *
1058  * In order to have a correct push-pop semantics even in the case of overflows,
1059  * overflow counters and a valid isolate counter are used as described in UAX#9
1060  * section 3.3.2 "Explicit Levels and Directions".
1061  *
1062  * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
1063  *
1064  * Returns normally the direction; -1 if there was a memory shortage
1065  *
1066  */
1067 static UBiDiDirection
resolveExplicitLevels(UBiDi * pBiDi,UErrorCode * pErrorCode)1068 resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
1069     DirProp *dirProps=pBiDi->dirProps;
1070     UBiDiLevel *levels=pBiDi->levels;
1071     const UChar *text=pBiDi->text;
1072 
1073     int32_t i=0, length=pBiDi->length;
1074     Flags flags=pBiDi->flags;       /* collect all directionalities in the text */
1075     DirProp dirProp;
1076     UBiDiLevel level=GET_PARALEVEL(pBiDi, 0);
1077     UBiDiDirection direction;
1078     pBiDi->isolateCount=0;
1079 
1080     if(U_FAILURE(*pErrorCode)) { return UBIDI_LTR; }
1081 
1082     /* determine if the text is mixed-directional or single-directional */
1083     direction=directionFromFlags(pBiDi);
1084 
1085     /* we may not need to resolve any explicit levels */
1086     if((direction!=UBIDI_MIXED)) {
1087         /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
1088         return direction;
1089     }
1090     if(pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL) {
1091         /* inverse BiDi: mixed, but all characters are at the same embedding level */
1092         /* set all levels to the paragraph level */
1093         int32_t paraIndex, start, limit;
1094         for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) {
1095             if(paraIndex==0)
1096                 start=0;
1097             else
1098                 start=pBiDi->paras[paraIndex-1].limit;
1099             limit=pBiDi->paras[paraIndex].limit;
1100             level=pBiDi->paras[paraIndex].level;
1101             for(i=start; i<limit; i++)
1102                 levels[i]=level;
1103         }
1104         return direction;   /* no bracket matching for inverse BiDi */
1105     }
1106     if(!(flags&(MASK_EXPLICIT|MASK_ISO))) {
1107         /* no embeddings, set all levels to the paragraph level */
1108         /* we still have to perform bracket matching */
1109         int32_t paraIndex, start, limit;
1110         BracketData bracketData;
1111         bracketInit(pBiDi, &bracketData);
1112         for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) {
1113             if(paraIndex==0)
1114                 start=0;
1115             else
1116                 start=pBiDi->paras[paraIndex-1].limit;
1117             limit=pBiDi->paras[paraIndex].limit;
1118             level=pBiDi->paras[paraIndex].level;
1119             for(i=start; i<limit; i++) {
1120                 levels[i]=level;
1121                 dirProp=dirProps[i];
1122                 if(dirProp==BN)
1123                     continue;
1124                 if(dirProp==B) {
1125                     if((i+1)<length) {
1126                         if(text[i]==CR && text[i+1]==LF)
1127                             continue;   /* skip CR when followed by LF */
1128                         bracketProcessB(&bracketData, level);
1129                     }
1130                     continue;
1131                 }
1132                 if(!bracketProcessChar(&bracketData, i)) {
1133                     *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1134                     return UBIDI_LTR;
1135                 }
1136             }
1137         }
1138         return direction;
1139     }
1140     {
1141         /* continue to perform (Xn) */
1142 
1143         /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
1144         /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */
1145         UBiDiLevel embeddingLevel=level, newLevel;
1146         UBiDiLevel previousLevel=level;     /* previous level for regular (not CC) characters */
1147         int32_t lastCcPos=0;                /* index of last effective LRx,RLx, PDx */
1148 
1149         /* The following stack remembers the embedding level and the ISOLATE flag of level runs.
1150            stackLast points to its current entry. */
1151         uint16_t stack[UBIDI_MAX_EXPLICIT_LEVEL+2];   /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL
1152                                                         but we need one more entry as base */
1153         uint32_t stackLast=0;
1154         int32_t overflowIsolateCount=0;
1155         int32_t overflowEmbeddingCount=0;
1156         int32_t validIsolateCount=0;
1157         BracketData bracketData;
1158         bracketInit(pBiDi, &bracketData);
1159         stack[0]=level;     /* initialize base entry to para level, no override, no isolate */
1160 
1161         /* recalculate the flags */
1162         flags=0;
1163 
1164         for(i=0; i<length; ++i) {
1165             dirProp=dirProps[i];
1166             switch(dirProp) {
1167             case LRE:
1168             case RLE:
1169             case LRO:
1170             case RLO:
1171                 /* (X2, X3, X4, X5) */
1172                 flags|=DIRPROP_FLAG(BN);
1173                 levels[i]=previousLevel;
1174                 if (dirProp==LRE || dirProp==LRO)
1175                     /* least greater even level */
1176                     newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1));
1177                 else
1178                     /* least greater odd level */
1179                     newLevel=(UBiDiLevel)((NO_OVERRIDE(embeddingLevel)+1)|1);
1180                 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 &&
1181                                                          overflowEmbeddingCount==0) {
1182                     lastCcPos=i;
1183                     embeddingLevel=newLevel;
1184                     if(dirProp==LRO || dirProp==RLO)
1185                         embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
1186                     stackLast++;
1187                     stack[stackLast]=embeddingLevel;
1188                     /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE and RLE
1189                        since this has already been done for newLevel which is
1190                        the source for embeddingLevel.
1191                      */
1192                 } else {
1193                     if(overflowIsolateCount==0)
1194                         overflowEmbeddingCount++;
1195                 }
1196                 break;
1197             case PDF:
1198                 /* (X7) */
1199                 flags|=DIRPROP_FLAG(BN);
1200                 levels[i]=previousLevel;
1201                 /* handle all the overflow cases first */
1202                 if(overflowIsolateCount) {
1203                     break;
1204                 }
1205                 if(overflowEmbeddingCount) {
1206                     overflowEmbeddingCount--;
1207                     break;
1208                 }
1209                 if(stackLast>0 && stack[stackLast]<ISOLATE) {   /* not an isolate entry */
1210                     lastCcPos=i;
1211                     stackLast--;
1212                     embeddingLevel=(UBiDiLevel)stack[stackLast];
1213                 }
1214                 break;
1215             case LRI:
1216             case RLI:
1217                 flags|=(DIRPROP_FLAG(ON)|DIRPROP_FLAG_LR(embeddingLevel));
1218                 levels[i]=NO_OVERRIDE(embeddingLevel);
1219                 if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
1220                     bracketProcessBoundary(&bracketData, lastCcPos,
1221                                            previousLevel, embeddingLevel);
1222                     flags|=DIRPROP_FLAG_MULTI_RUNS;
1223                 }
1224                 previousLevel=embeddingLevel;
1225                 /* (X5a, X5b) */
1226                 if(dirProp==LRI)
1227                     /* least greater even level */
1228                     newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1));
1229                 else
1230                     /* least greater odd level */
1231                     newLevel=(UBiDiLevel)((NO_OVERRIDE(embeddingLevel)+1)|1);
1232                 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 &&
1233                                                          overflowEmbeddingCount==0) {
1234                     flags|=DIRPROP_FLAG(dirProp);
1235                     lastCcPos=i;
1236                     validIsolateCount++;
1237                     if(validIsolateCount>pBiDi->isolateCount)
1238                         pBiDi->isolateCount=validIsolateCount;
1239                     embeddingLevel=newLevel;
1240                     /* we can increment stackLast without checking because newLevel
1241                        will exceed UBIDI_MAX_EXPLICIT_LEVEL before stackLast overflows */
1242                     stackLast++;
1243                     stack[stackLast]=embeddingLevel+ISOLATE;
1244                     bracketProcessLRI_RLI(&bracketData, embeddingLevel);
1245                 } else {
1246                     /* make it WS so that it is handled by adjustWSLevels() */
1247                     dirProps[i]=WS;
1248                     overflowIsolateCount++;
1249                 }
1250                 break;
1251             case PDI:
1252                 if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
1253                     bracketProcessBoundary(&bracketData, lastCcPos,
1254                                            previousLevel, embeddingLevel);
1255                     flags|=DIRPROP_FLAG_MULTI_RUNS;
1256                 }
1257                 /* (X6a) */
1258                 if(overflowIsolateCount) {
1259                     overflowIsolateCount--;
1260                     /* make it WS so that it is handled by adjustWSLevels() */
1261                     dirProps[i]=WS;
1262                 }
1263                 else if(validIsolateCount) {
1264                     flags|=DIRPROP_FLAG(PDI);
1265                     lastCcPos=i;
1266                     overflowEmbeddingCount=0;
1267                     while(stack[stackLast]<ISOLATE) /* pop embedding entries */
1268                         stackLast--;                /* until the last isolate entry */
1269                     stackLast--;                    /* pop also the last isolate entry */
1270                     validIsolateCount--;
1271                     bracketProcessPDI(&bracketData);
1272                 } else
1273                     /* make it WS so that it is handled by adjustWSLevels() */
1274                     dirProps[i]=WS;
1275                 embeddingLevel=(UBiDiLevel)stack[stackLast]&~ISOLATE;
1276                 flags|=(DIRPROP_FLAG(ON)|DIRPROP_FLAG_LR(embeddingLevel));
1277                 previousLevel=embeddingLevel;
1278                 levels[i]=NO_OVERRIDE(embeddingLevel);
1279                 break;
1280             case B:
1281                 flags|=DIRPROP_FLAG(B);
1282                 levels[i]=GET_PARALEVEL(pBiDi, i);
1283                 if((i+1)<length) {
1284                     if(text[i]==CR && text[i+1]==LF)
1285                         break;          /* skip CR when followed by LF */
1286                     overflowEmbeddingCount=overflowIsolateCount=0;
1287                     validIsolateCount=0;
1288                     stackLast=0;
1289                     previousLevel=embeddingLevel=GET_PARALEVEL(pBiDi, i+1);
1290                     stack[0]=embeddingLevel; /* initialize base entry to para level, no override, no isolate */
1291                     bracketProcessB(&bracketData, embeddingLevel);
1292                 }
1293                 break;
1294             case BN:
1295                 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
1296                 /* they will get their levels set correctly in adjustWSLevels() */
1297                 levels[i]=previousLevel;
1298                 flags|=DIRPROP_FLAG(BN);
1299                 break;
1300             default:
1301                 /* all other types are normal characters and get the "real" level */
1302                 if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
1303                     bracketProcessBoundary(&bracketData, lastCcPos,
1304                                            previousLevel, embeddingLevel);
1305                     flags|=DIRPROP_FLAG_MULTI_RUNS;
1306                     if(embeddingLevel&UBIDI_LEVEL_OVERRIDE)
1307                         flags|=DIRPROP_FLAG_O(embeddingLevel);
1308                     else
1309                         flags|=DIRPROP_FLAG_E(embeddingLevel);
1310                 }
1311                 previousLevel=embeddingLevel;
1312                 levels[i]=embeddingLevel;
1313                 if(!bracketProcessChar(&bracketData, i))
1314                     return -1;
1315                 /* the dirProp may have been changed in bracketProcessChar() */
1316                 flags|=DIRPROP_FLAG(dirProps[i]);
1317                 break;
1318             }
1319         }
1320         if(flags&MASK_EMBEDDING)
1321             flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
1322         if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B)))
1323             flags|=DIRPROP_FLAG(L);
1324         /* again, determine if the text is mixed-directional or single-directional */
1325         pBiDi->flags=flags;
1326         direction=directionFromFlags(pBiDi);
1327     }
1328     return direction;
1329 }
1330 
1331 /*
1332  * Use a pre-specified embedding levels array:
1333  *
1334  * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
1335  * ignore all explicit codes (X9),
1336  * and check all the preset levels.
1337  *
1338  * Recalculate the flags to have them reflect the real properties
1339  * after taking the explicit embeddings into account.
1340  */
1341 static UBiDiDirection
checkExplicitLevels(UBiDi * pBiDi,UErrorCode * pErrorCode)1342 checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
1343     DirProp *dirProps=pBiDi->dirProps;
1344     DirProp dirProp;
1345     UBiDiLevel *levels=pBiDi->levels;
1346     int32_t isolateCount=0;
1347 
1348     int32_t i, length=pBiDi->length;
1349     Flags flags=0;  /* collect all directionalities in the text */
1350     UBiDiLevel level;
1351     pBiDi->isolateCount=0;
1352 
1353     for(i=0; i<length; ++i) {
1354         level=levels[i];
1355         dirProp=dirProps[i];
1356         if(dirProp==LRI || dirProp==RLI) {
1357             isolateCount++;
1358             if(isolateCount>pBiDi->isolateCount)
1359                 pBiDi->isolateCount=isolateCount;
1360         }
1361         else if(dirProp==PDI)
1362             isolateCount--;
1363         else if(dirProp==B)
1364             isolateCount=0;
1365         if(level&UBIDI_LEVEL_OVERRIDE) {
1366             /* keep the override flag in levels[i] but adjust the flags */
1367             level&=~UBIDI_LEVEL_OVERRIDE;     /* make the range check below simpler */
1368             flags|=DIRPROP_FLAG_O(level);
1369         } else {
1370             /* set the flags */
1371             flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp);
1372         }
1373         if((level<GET_PARALEVEL(pBiDi, i) &&
1374             !((0==level)&&(dirProp==B))) ||
1375            (UBIDI_MAX_EXPLICIT_LEVEL<level)) {
1376             /* level out of bounds */
1377             *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1378             return UBIDI_LTR;
1379         }
1380     }
1381     if(flags&MASK_EMBEDDING)
1382         flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
1383     /* determine if the text is mixed-directional or single-directional */
1384     pBiDi->flags=flags;
1385     return directionFromFlags(pBiDi);
1386 }
1387 
1388 /******************************************************************
1389  The Properties state machine table
1390 *******************************************************************
1391 
1392  All table cells are 8 bits:
1393       bits 0..4:  next state
1394       bits 5..7:  action to perform (if > 0)
1395 
1396  Cells may be of format "n" where n represents the next state
1397  (except for the rightmost column).
1398  Cells may also be of format "s(x,y)" where x represents an action
1399  to perform and y represents the next state.
1400 
1401 *******************************************************************
1402  Definitions and type for properties state table
1403 *******************************************************************
1404 */
1405 #define IMPTABPROPS_COLUMNS 16
1406 #define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1)
1407 #define GET_STATEPROPS(cell) ((cell)&0x1f)
1408 #define GET_ACTIONPROPS(cell) ((cell)>>5)
1409 #define s(action, newState) ((uint8_t)(newState+(action<<5)))
1410 
1411 static const uint8_t groupProp[] =          /* dirProp regrouped */
1412 {
1413 /*  L   R   EN  ES  ET  AN  CS  B   S   WS  ON  LRE LRO AL  RLE RLO PDF NSM BN  FSI LRI RLI PDI ENL ENR */
1414     0,  1,  2,  7,  8,  3,  9,  6,  5,  4,  4,  10, 10, 12, 10, 10, 10, 11, 10, 4,  4,  4,  4,  13, 14
1415 };
1416 enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirProp_S=5, DirProp_B=6 }; /* reduced dirProp */
1417 
1418 /******************************************************************
1419 
1420       PROPERTIES  STATE  TABLE
1421 
1422  In table impTabProps,
1423       - the ON column regroups ON and WS, FSI, RLI, LRI and PDI
1424       - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF
1425       - the Res column is the reduced property assigned to a run
1426 
1427  Action 1: process current run1, init new run1
1428         2: init new run2
1429         3: process run1, process run2, init new run1
1430         4: process run1, set run1=run2, init new run2
1431 
1432  Notes:
1433   1) This table is used in resolveImplicitLevels().
1434   2) This table triggers actions when there is a change in the Bidi
1435      property of incoming characters (action 1).
1436   3) Most such property sequences are processed immediately (in
1437      fact, passed to processPropertySeq().
1438   4) However, numbers are assembled as one sequence. This means
1439      that undefined situations (like CS following digits, until
1440      it is known if the next char will be a digit) are held until
1441      following chars define them.
1442      Example: digits followed by CS, then comes another CS or ON;
1443               the digits will be processed, then the CS assigned
1444               as the start of an ON sequence (action 3).
1445   5) There are cases where more than one sequence must be
1446      processed, for instance digits followed by CS followed by L:
1447      the digits must be processed as one sequence, and the CS
1448      must be processed as an ON sequence, all this before starting
1449      assembling chars for the opening L sequence.
1450 
1451 
1452 */
1453 static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] =
1454 {
1455 /*                        L ,     R ,    EN ,    AN ,    ON ,     S ,     B ,    ES ,    ET ,    CS ,    BN ,   NSM ,    AL ,   ENL ,   ENR , Res */
1456 /* 0 Init        */ {     1 ,     2 ,     4 ,     5 ,     7 ,    15 ,    17 ,     7 ,     9 ,     7 ,     0 ,     7 ,     3 ,    18 ,    21 , DirProp_ON },
1457 /* 1 L           */ {     1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7),     1 ,     1 , s(1,3),s(1,18),s(1,21),  DirProp_L },
1458 /* 2 R           */ { s(1,1),     2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7),     2 ,     2 , s(1,3),s(1,18),s(1,21),  DirProp_R },
1459 /* 3 AL          */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8),     3 ,     3 ,     3 ,s(1,18),s(1,21),  DirProp_R },
1460 /* 4 EN          */ { s(1,1), s(1,2),     4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10),    11 ,s(2,10),     4 ,     4 , s(1,3),    18 ,    21 , DirProp_EN },
1461 /* 5 AN          */ { s(1,1), s(1,2), s(1,4),     5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12),     5 ,     5 , s(1,3),s(1,18),s(1,21), DirProp_AN },
1462 /* 6 AL:EN/AN    */ { s(1,1), s(1,2),     6 ,     6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13),     6 ,     6 , s(1,3),    18 ,    21 , DirProp_AN },
1463 /* 7 ON          */ { s(1,1), s(1,2), s(1,4), s(1,5),     7 ,s(1,15),s(1,17),     7 ,s(2,14),     7 ,     7 ,     7 , s(1,3),s(1,18),s(1,21), DirProp_ON },
1464 /* 8 AL:ON       */ { s(1,1), s(1,2), s(1,6), s(1,6),     8 ,s(1,16),s(1,17),     8 ,     8 ,     8 ,     8 ,     8 , s(1,3),s(1,18),s(1,21), DirProp_ON },
1465 /* 9 ET          */ { s(1,1), s(1,2),     4 , s(1,5),     7 ,s(1,15),s(1,17),     7 ,     9 ,     7 ,     9 ,     9 , s(1,3),    18 ,    21 , DirProp_ON },
1466 /*10 EN+ES/CS    */ { s(3,1), s(3,2),     4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    10 , s(4,7), s(3,3),    18 ,    21 , DirProp_EN },
1467 /*11 EN+ET       */ { s(1,1), s(1,2),     4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7),    11 , s(1,7),    11 ,    11 , s(1,3),    18 ,    21 , DirProp_EN },
1468 /*12 AN+CS       */ { s(3,1), s(3,2), s(3,4),     5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    12 , s(4,7), s(3,3),s(3,18),s(3,21), DirProp_AN },
1469 /*13 AL:EN/AN+CS */ { s(3,1), s(3,2),     6 ,     6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8),    13 , s(4,8), s(3,3),    18 ,    21 , DirProp_AN },
1470 /*14 ON+ET       */ { s(1,1), s(1,2), s(4,4), s(1,5),     7 ,s(1,15),s(1,17),     7 ,    14 ,     7 ,    14 ,    14 , s(1,3),s(4,18),s(4,21), DirProp_ON },
1471 /*15 S           */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),    15 ,s(1,17), s(1,7), s(1,9), s(1,7),    15 , s(1,7), s(1,3),s(1,18),s(1,21),  DirProp_S },
1472 /*16 AL:S        */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),    16 ,s(1,17), s(1,8), s(1,8), s(1,8),    16 , s(1,8), s(1,3),s(1,18),s(1,21),  DirProp_S },
1473 /*17 B           */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),    17 , s(1,7), s(1,9), s(1,7),    17 , s(1,7), s(1,3),s(1,18),s(1,21),  DirProp_B },
1474 /*18 ENL         */ { s(1,1), s(1,2),    18 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,19),    20 ,s(2,19),    18 ,    18 , s(1,3),    18 ,    21 ,  DirProp_L },
1475 /*19 ENL+ES/CS   */ { s(3,1), s(3,2),    18 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    19 , s(4,7), s(3,3),    18 ,    21 ,  DirProp_L },
1476 /*20 ENL+ET      */ { s(1,1), s(1,2),    18 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7),    20 , s(1,7),    20 ,    20 , s(1,3),    18 ,    21 ,  DirProp_L },
1477 /*21 ENR         */ { s(1,1), s(1,2),    21 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,22),    23 ,s(2,22),    21 ,    21 , s(1,3),    18 ,    21 , DirProp_AN },
1478 /*22 ENR+ES/CS   */ { s(3,1), s(3,2),    21 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    22 , s(4,7), s(3,3),    18 ,    21 , DirProp_AN },
1479 /*23 ENR+ET      */ { s(1,1), s(1,2),    21 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7),    23 , s(1,7),    23 ,    23 , s(1,3),    18 ,    21 , DirProp_AN }
1480 };
1481 
1482 /*  we must undef macro s because the levels tables have a different
1483  *  structure (4 bits for action and 4 bits for next state.
1484  */
1485 #undef s
1486 
1487 /******************************************************************
1488  The levels state machine tables
1489 *******************************************************************
1490 
1491  All table cells are 8 bits:
1492       bits 0..3:  next state
1493       bits 4..7:  action to perform (if > 0)
1494 
1495  Cells may be of format "n" where n represents the next state
1496  (except for the rightmost column).
1497  Cells may also be of format "s(x,y)" where x represents an action
1498  to perform and y represents the next state.
1499 
1500  This format limits each table to 16 states each and to 15 actions.
1501 
1502 *******************************************************************
1503  Definitions and type for levels state tables
1504 *******************************************************************
1505 */
1506 #define IMPTABLEVELS_COLUMNS (DirProp_B + 2)
1507 #define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1)
1508 #define GET_STATE(cell) ((cell)&0x0f)
1509 #define GET_ACTION(cell) ((cell)>>4)
1510 #define s(action, newState) ((uint8_t)(newState+(action<<4)))
1511 
1512 typedef uint8_t ImpTab[][IMPTABLEVELS_COLUMNS];
1513 typedef uint8_t ImpAct[];
1514 
1515 /* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct,
1516  * instead of having a pair of ImpTab and a pair of ImpAct.
1517  */
1518 typedef struct ImpTabPair {
1519     const void * pImpTab[2];
1520     const void * pImpAct[2];
1521 } ImpTabPair;
1522 
1523 /******************************************************************
1524 
1525       LEVELS  STATE  TABLES
1526 
1527  In all levels state tables,
1528       - state 0 is the initial state
1529       - the Res column is the increment to add to the text level
1530         for this property sequence.
1531 
1532  The impAct arrays for each table of a pair map the local action
1533  numbers of the table to the total list of actions. For instance,
1534  action 2 in a given table corresponds to the action number which
1535  appears in entry [2] of the impAct array for that table.
1536  The first entry of all impAct arrays must be 0.
1537 
1538  Action 1: init conditional sequence
1539         2: prepend conditional sequence to current sequence
1540         3: set ON sequence to new level - 1
1541         4: init EN/AN/ON sequence
1542         5: fix EN/AN/ON sequence followed by R
1543         6: set previous level sequence to level 2
1544 
1545  Notes:
1546   1) These tables are used in processPropertySeq(). The input
1547      is property sequences as determined by resolveImplicitLevels.
1548   2) Most such property sequences are processed immediately
1549      (levels are assigned).
1550   3) However, some sequences cannot be assigned a final level till
1551      one or more following sequences are received. For instance,
1552      ON following an R sequence within an even-level paragraph.
1553      If the following sequence is R, the ON sequence will be
1554      assigned basic run level+1, and so will the R sequence.
1555   4) S is generally handled like ON, since its level will be fixed
1556      to paragraph level in adjustWSLevels().
1557 
1558 */
1559 
1560 static const ImpTab impTabL_DEFAULT =   /* Even paragraph level */
1561 /*  In this table, conditional sequences receive the lower possible level
1562     until proven otherwise.
1563 */
1564 {
1565 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1566 /* 0 : init       */ {     0 ,     1 ,     0 ,     2 ,     0 ,     0 ,     0 ,  0 },
1567 /* 1 : R          */ {     0 ,     1 ,     3 ,     3 , s(1,4), s(1,4),     0 ,  1 },
1568 /* 2 : AN         */ {     0 ,     1 ,     0 ,     2 , s(1,5), s(1,5),     0 ,  2 },
1569 /* 3 : R+EN/AN    */ {     0 ,     1 ,     3 ,     3 , s(1,4), s(1,4),     0 ,  2 },
1570 /* 4 : R+ON       */ {     0 , s(2,1), s(3,3), s(3,3),     4 ,     4 ,     0 ,  0 },
1571 /* 5 : AN+ON      */ {     0 , s(2,1),     0 , s(3,2),     5 ,     5 ,     0 ,  0 }
1572 };
1573 static const ImpTab impTabR_DEFAULT =   /* Odd  paragraph level */
1574 /*  In this table, conditional sequences receive the lower possible level
1575     until proven otherwise.
1576 */
1577 {
1578 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1579 /* 0 : init       */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  0 },
1580 /* 1 : L          */ {     1 ,     0 ,     1 ,     3 , s(1,4), s(1,4),     0 ,  1 },
1581 /* 2 : EN/AN      */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  1 },
1582 /* 3 : L+AN       */ {     1 ,     0 ,     1 ,     3 ,     5 ,     5 ,     0 ,  1 },
1583 /* 4 : L+ON       */ { s(2,1),     0 , s(2,1),     3 ,     4 ,     4 ,     0 ,  0 },
1584 /* 5 : L+AN+ON    */ {     1 ,     0 ,     1 ,     3 ,     5 ,     5 ,     0 ,  0 }
1585 };
1586 static const ImpAct impAct0 = {0,1,2,3,4};
1587 static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT,
1588                                            &impTabR_DEFAULT},
1589                                           {&impAct0, &impAct0}};
1590 
1591 static const ImpTab impTabL_NUMBERS_SPECIAL =   /* Even paragraph level */
1592 /*  In this table, conditional sequences receive the lower possible level
1593     until proven otherwise.
1594 */
1595 {
1596 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1597 /* 0 : init       */ {     0 ,     2 , s(1,1), s(1,1),     0 ,     0 ,     0 ,  0 },
1598 /* 1 : L+EN/AN    */ {     0 , s(4,2),     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1599 /* 2 : R          */ {     0 ,     2 ,     4 ,     4 , s(1,3), s(1,3),     0 ,  1 },
1600 /* 3 : R+ON       */ {     0 , s(2,2), s(3,4), s(3,4),     3 ,     3 ,     0 ,  0 },
1601 /* 4 : R+EN/AN    */ {     0 ,     2 ,     4 ,     4 , s(1,3), s(1,3),     0 ,  2 }
1602 };
1603 static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL,
1604                                                    &impTabR_DEFAULT},
1605                                                   {&impAct0, &impAct0}};
1606 
1607 static const ImpTab impTabL_GROUP_NUMBERS_WITH_R =
1608 /*  In this table, EN/AN+ON sequences receive levels as if associated with R
1609     until proven that there is L or sor/eor on both sides. AN is handled like EN.
1610 */
1611 {
1612 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1613 /* 0 init         */ {     0 ,     3 , s(1,1), s(1,1),     0 ,     0 ,     0 ,  0 },
1614 /* 1 EN/AN        */ { s(2,0),     3 ,     1 ,     1 ,     2 , s(2,0), s(2,0),  2 },
1615 /* 2 EN/AN+ON     */ { s(2,0),     3 ,     1 ,     1 ,     2 , s(2,0), s(2,0),  1 },
1616 /* 3 R            */ {     0 ,     3 ,     5 ,     5 , s(1,4),     0 ,     0 ,  1 },
1617 /* 4 R+ON         */ { s(2,0),     3 ,     5 ,     5 ,     4 , s(2,0), s(2,0),  1 },
1618 /* 5 R+EN/AN      */ {     0 ,     3 ,     5 ,     5 , s(1,4),     0 ,     0 ,  2 }
1619 };
1620 static const ImpTab impTabR_GROUP_NUMBERS_WITH_R =
1621 /*  In this table, EN/AN+ON sequences receive levels as if associated with R
1622     until proven that there is L on both sides. AN is handled like EN.
1623 */
1624 {
1625 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1626 /* 0 init         */ {     2 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1627 /* 1 EN/AN        */ {     2 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  1 },
1628 /* 2 L            */ {     2 ,     0 , s(1,4), s(1,4), s(1,3),     0 ,     0 ,  1 },
1629 /* 3 L+ON         */ { s(2,2),     0 ,     4 ,     4 ,     3 ,     0 ,     0 ,  0 },
1630 /* 4 L+EN/AN      */ { s(2,2),     0 ,     4 ,     4 ,     3 ,     0 ,     0 ,  1 }
1631 };
1632 static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R = {
1633                         {&impTabL_GROUP_NUMBERS_WITH_R,
1634                          &impTabR_GROUP_NUMBERS_WITH_R},
1635                         {&impAct0, &impAct0}};
1636 
1637 
1638 static const ImpTab impTabL_INVERSE_NUMBERS_AS_L =
1639 /*  This table is identical to the Default LTR table except that EN and AN are
1640     handled like L.
1641 */
1642 {
1643 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1644 /* 0 : init       */ {     0 ,     1 ,     0 ,     0 ,     0 ,     0 ,     0 ,  0 },
1645 /* 1 : R          */ {     0 ,     1 ,     0 ,     0 , s(1,4), s(1,4),     0 ,  1 },
1646 /* 2 : AN         */ {     0 ,     1 ,     0 ,     0 , s(1,5), s(1,5),     0 ,  2 },
1647 /* 3 : R+EN/AN    */ {     0 ,     1 ,     0 ,     0 , s(1,4), s(1,4),     0 ,  2 },
1648 /* 4 : R+ON       */ { s(2,0),     1 , s(2,0), s(2,0),     4 ,     4 , s(2,0),  1 },
1649 /* 5 : AN+ON      */ { s(2,0),     1 , s(2,0), s(2,0),     5 ,     5 , s(2,0),  1 }
1650 };
1651 static const ImpTab impTabR_INVERSE_NUMBERS_AS_L =
1652 /*  This table is identical to the Default RTL table except that EN and AN are
1653     handled like L.
1654 */
1655 {
1656 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1657 /* 0 : init       */ {     1 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1658 /* 1 : L          */ {     1 ,     0 ,     1 ,     1 , s(1,4), s(1,4),     0 ,  1 },
1659 /* 2 : EN/AN      */ {     1 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  1 },
1660 /* 3 : L+AN       */ {     1 ,     0 ,     1 ,     1 ,     5 ,     5 ,     0 ,  1 },
1661 /* 4 : L+ON       */ { s(2,1),     0 , s(2,1), s(2,1),     4 ,     4 ,     0 ,  0 },
1662 /* 5 : L+AN+ON    */ {     1 ,     0 ,     1 ,     1 ,     5 ,     5 ,     0 ,  0 }
1663 };
1664 static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L = {
1665                         {&impTabL_INVERSE_NUMBERS_AS_L,
1666                          &impTabR_INVERSE_NUMBERS_AS_L},
1667                         {&impAct0, &impAct0}};
1668 
1669 static const ImpTab impTabR_INVERSE_LIKE_DIRECT =   /* Odd  paragraph level */
1670 /*  In this table, conditional sequences receive the lower possible level
1671     until proven otherwise.
1672 */
1673 {
1674 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1675 /* 0 : init       */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  0 },
1676 /* 1 : L          */ {     1 ,     0 ,     1 ,     2 , s(1,3), s(1,3),     0 ,  1 },
1677 /* 2 : EN/AN      */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  1 },
1678 /* 3 : L+ON       */ { s(2,1), s(3,0),     6 ,     4 ,     3 ,     3 , s(3,0),  0 },
1679 /* 4 : L+ON+AN    */ { s(2,1), s(3,0),     6 ,     4 ,     5 ,     5 , s(3,0),  3 },
1680 /* 5 : L+AN+ON    */ { s(2,1), s(3,0),     6 ,     4 ,     5 ,     5 , s(3,0),  2 },
1681 /* 6 : L+ON+EN    */ { s(2,1), s(3,0),     6 ,     4 ,     3 ,     3 , s(3,0),  1 }
1682 };
1683 static const ImpAct impAct1 = {0,1,13,14};
1684 /* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc"
1685  */
1686 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = {
1687                         {&impTabL_DEFAULT,
1688                          &impTabR_INVERSE_LIKE_DIRECT},
1689                         {&impAct0, &impAct1}};
1690 
1691 static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS =
1692 /*  The case handled in this table is (visually):  R EN L
1693 */
1694 {
1695 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1696 /* 0 : init       */ {     0 , s(6,3),     0 ,     1 ,     0 ,     0 ,     0 ,  0 },
1697 /* 1 : L+AN       */ {     0 , s(6,3),     0 ,     1 , s(1,2), s(3,0),     0 ,  4 },
1698 /* 2 : L+AN+ON    */ { s(2,0), s(6,3), s(2,0),     1 ,     2 , s(3,0), s(2,0),  3 },
1699 /* 3 : R          */ {     0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0),     0 ,  3 },
1700 /* 4 : R+ON       */ { s(3,0), s(4,3), s(5,5), s(5,6),     4 , s(3,0), s(3,0),  3 },
1701 /* 5 : R+EN       */ { s(3,0), s(4,3),     5 , s(5,6), s(1,4), s(3,0), s(3,0),  4 },
1702 /* 6 : R+AN       */ { s(3,0), s(4,3), s(5,5),     6 , s(1,4), s(3,0), s(3,0),  4 }
1703 };
1704 static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS =
1705 /*  The cases handled in this table are (visually):  R EN L
1706                                                      R L AN L
1707 */
1708 {
1709 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1710 /* 0 : init       */ { s(1,3),     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1711 /* 1 : R+EN/AN    */ { s(2,3),     0 ,     1 ,     1 ,     2 , s(4,0),     0 ,  1 },
1712 /* 2 : R+EN/AN+ON */ { s(2,3),     0 ,     1 ,     1 ,     2 , s(4,0),     0 ,  0 },
1713 /* 3 : L          */ {     3 ,     0 ,     3 , s(3,6), s(1,4), s(4,0),     0 ,  1 },
1714 /* 4 : L+ON       */ { s(5,3), s(4,0),     5 , s(3,6),     4 , s(4,0), s(4,0),  0 },
1715 /* 5 : L+ON+EN    */ { s(5,3), s(4,0),     5 , s(3,6),     4 , s(4,0), s(4,0),  1 },
1716 /* 6 : L+AN       */ { s(5,3), s(4,0),     6 ,     6 ,     4 , s(4,0), s(4,0),  3 }
1717 };
1718 static const ImpAct impAct2 = {0,1,2,5,6,7,8};
1719 static const ImpAct impAct3 = {0,1,9,10,11,12};
1720 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = {
1721                         {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
1722                          &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
1723                         {&impAct2, &impAct3}};
1724 
1725 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = {
1726                         {&impTabL_NUMBERS_SPECIAL,
1727                          &impTabR_INVERSE_LIKE_DIRECT},
1728                         {&impAct0, &impAct1}};
1729 
1730 static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS =
1731 /*  The case handled in this table is (visually):  R EN L
1732 */
1733 {
1734 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1735 /* 0 : init       */ {     0 , s(6,2),     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1736 /* 1 : L+EN/AN    */ {     0 , s(6,2),     1 ,     1 ,     0 , s(3,0),     0 ,  4 },
1737 /* 2 : R          */ {     0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0),     0 ,  3 },
1738 /* 3 : R+ON       */ { s(3,0), s(4,2), s(5,4), s(5,4),     3 , s(3,0), s(3,0),  3 },
1739 /* 4 : R+EN/AN    */ { s(3,0), s(4,2),     4 ,     4 , s(1,3), s(3,0), s(3,0),  4 }
1740 };
1741 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = {
1742                         {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
1743                          &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
1744                         {&impAct2, &impAct3}};
1745 
1746 #undef s
1747 
1748 typedef struct {
1749     const ImpTab * pImpTab;             /* level table pointer          */
1750     const ImpAct * pImpAct;             /* action map array             */
1751     int32_t startON;                    /* start of ON sequence         */
1752     int32_t startL2EN;                  /* start of level 2 sequence    */
1753     int32_t lastStrongRTL;              /* index of last found R or AL  */
1754     int32_t state;                      /* current state                */
1755     int32_t runStart;                   /* start position of the run    */
1756     UBiDiLevel runLevel;                /* run level before implicit solving */
1757 } LevState;
1758 
1759 /*------------------------------------------------------------------------*/
1760 
1761 static void
addPoint(UBiDi * pBiDi,int32_t pos,int32_t flag)1762 addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag)
1763   /* param pos:     position where to insert
1764      param flag:    one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER
1765   */
1766 {
1767 #define FIRSTALLOC  10
1768     Point point;
1769     InsertPoints * pInsertPoints=&(pBiDi->insertPoints);
1770 
1771     if (pInsertPoints->capacity == 0)
1772     {
1773         pInsertPoints->points=uprv_malloc(sizeof(Point)*FIRSTALLOC);
1774         if (pInsertPoints->points == NULL)
1775         {
1776             pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
1777             return;
1778         }
1779         pInsertPoints->capacity=FIRSTALLOC;
1780     }
1781     if (pInsertPoints->size >= pInsertPoints->capacity) /* no room for new point */
1782     {
1783         void * savePoints=pInsertPoints->points;
1784         pInsertPoints->points=uprv_realloc(pInsertPoints->points,
1785                                            pInsertPoints->capacity*2*sizeof(Point));
1786         if (pInsertPoints->points == NULL)
1787         {
1788             pInsertPoints->points=savePoints;
1789             pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
1790             return;
1791         }
1792         else  pInsertPoints->capacity*=2;
1793     }
1794     point.pos=pos;
1795     point.flag=flag;
1796     pInsertPoints->points[pInsertPoints->size]=point;
1797     pInsertPoints->size++;
1798 #undef FIRSTALLOC
1799 }
1800 
1801 static void
setLevelsOutsideIsolates(UBiDi * pBiDi,int32_t start,int32_t limit,UBiDiLevel level)1802 setLevelsOutsideIsolates(UBiDi *pBiDi, int32_t start, int32_t limit, UBiDiLevel level)
1803 {
1804     DirProp *dirProps=pBiDi->dirProps, dirProp;
1805     UBiDiLevel *levels=pBiDi->levels;
1806     int32_t isolateCount=0, k;
1807     for(k=start; k<limit; k++) {
1808         dirProp=dirProps[k];
1809         if(dirProp==PDI)
1810             isolateCount--;
1811         if(isolateCount==0)
1812             levels[k]=level;
1813         if(dirProp==LRI || dirProp==RLI)
1814             isolateCount++;
1815     }
1816 }
1817 
1818 /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
1819 
1820 /*
1821  * This implementation of the (Wn) rules applies all rules in one pass.
1822  * In order to do so, it needs a look-ahead of typically 1 character
1823  * (except for W5: sequences of ET) and keeps track of changes
1824  * in a rule Wp that affect a later Wq (p<q).
1825  *
1826  * The (Nn) and (In) rules are also performed in that same single loop,
1827  * but effectively one iteration behind for white space.
1828  *
1829  * Since all implicit rules are performed in one step, it is not necessary
1830  * to actually store the intermediate directional properties in dirProps[].
1831  */
1832 
1833 static void
processPropertySeq(UBiDi * pBiDi,LevState * pLevState,uint8_t _prop,int32_t start,int32_t limit)1834 processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop,
1835                    int32_t start, int32_t limit) {
1836     uint8_t cell, oldStateSeq, actionSeq;
1837     const ImpTab * pImpTab=pLevState->pImpTab;
1838     const ImpAct * pImpAct=pLevState->pImpAct;
1839     UBiDiLevel * levels=pBiDi->levels;
1840     UBiDiLevel level, addLevel;
1841     InsertPoints * pInsertPoints;
1842     int32_t start0, k;
1843 
1844     start0=start;                           /* save original start position */
1845     oldStateSeq=(uint8_t)pLevState->state;
1846     cell=(*pImpTab)[oldStateSeq][_prop];
1847     pLevState->state=GET_STATE(cell);       /* isolate the new state */
1848     actionSeq=(*pImpAct)[GET_ACTION(cell)]; /* isolate the action */
1849     addLevel=(*pImpTab)[pLevState->state][IMPTABLEVELS_RES];
1850 
1851     if(actionSeq) {
1852         switch(actionSeq) {
1853         case 1:                         /* init ON seq */
1854             pLevState->startON=start0;
1855             break;
1856 
1857         case 2:                         /* prepend ON seq to current seq */
1858             start=pLevState->startON;
1859             break;
1860 
1861         case 3:                         /* EN/AN after R+ON */
1862             level=pLevState->runLevel+1;
1863             setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level);
1864             break;
1865 
1866         case 4:                         /* EN/AN before R for NUMBERS_SPECIAL */
1867             level=pLevState->runLevel+2;
1868             setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level);
1869             break;
1870 
1871         case 5:                         /* L or S after possible relevant EN/AN */
1872             /* check if we had EN after R/AL */
1873             if (pLevState->startL2EN >= 0) {
1874                 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
1875             }
1876             pLevState->startL2EN=-1;  /* not within previous if since could also be -2 */
1877             /* check if we had any relevant EN/AN after R/AL */
1878             pInsertPoints=&(pBiDi->insertPoints);
1879             if ((pInsertPoints->capacity == 0) ||
1880                 (pInsertPoints->size <= pInsertPoints->confirmed))
1881             {
1882                 /* nothing, just clean up */
1883                 pLevState->lastStrongRTL=-1;
1884                 /* check if we have a pending conditional segment */
1885                 level=(*pImpTab)[oldStateSeq][IMPTABLEVELS_RES];
1886                 if ((level & 1) && (pLevState->startON > 0)) {  /* after ON */
1887                     start=pLevState->startON;   /* reset to basic run level */
1888                 }
1889                 if (_prop == DirProp_S)                /* add LRM before S */
1890                 {
1891                     addPoint(pBiDi, start0, LRM_BEFORE);
1892                     pInsertPoints->confirmed=pInsertPoints->size;
1893                 }
1894                 break;
1895             }
1896             /* reset previous RTL cont to level for LTR text */
1897             for (k=pLevState->lastStrongRTL+1; k<start0; k++)
1898             {
1899                 /* reset odd level, leave runLevel+2 as is */
1900                 levels[k]=(levels[k] - 2) & ~1;
1901             }
1902             /* mark insert points as confirmed */
1903             pInsertPoints->confirmed=pInsertPoints->size;
1904             pLevState->lastStrongRTL=-1;
1905             if (_prop == DirProp_S)            /* add LRM before S */
1906             {
1907                 addPoint(pBiDi, start0, LRM_BEFORE);
1908                 pInsertPoints->confirmed=pInsertPoints->size;
1909             }
1910             break;
1911 
1912         case 6:                         /* R/AL after possible relevant EN/AN */
1913             /* just clean up */
1914             pInsertPoints=&(pBiDi->insertPoints);
1915             if (pInsertPoints->capacity > 0)
1916                 /* remove all non confirmed insert points */
1917                 pInsertPoints->size=pInsertPoints->confirmed;
1918             pLevState->startON=-1;
1919             pLevState->startL2EN=-1;
1920             pLevState->lastStrongRTL=limit - 1;
1921             break;
1922 
1923         case 7:                         /* EN/AN after R/AL + possible cont */
1924             /* check for real AN */
1925             if ((_prop == DirProp_AN) && (pBiDi->dirProps[start0] == AN) &&
1926                 (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))
1927             {
1928                 /* real AN */
1929                 if (pLevState->startL2EN == -1) /* if no relevant EN already found */
1930                 {
1931                     /* just note the righmost digit as a strong RTL */
1932                     pLevState->lastStrongRTL=limit - 1;
1933                     break;
1934                 }
1935                 if (pLevState->startL2EN >= 0)  /* after EN, no AN */
1936                 {
1937                     addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
1938                     pLevState->startL2EN=-2;
1939                 }
1940                 /* note AN */
1941                 addPoint(pBiDi, start0, LRM_BEFORE);
1942                 break;
1943             }
1944             /* if first EN/AN after R/AL */
1945             if (pLevState->startL2EN == -1) {
1946                 pLevState->startL2EN=start0;
1947             }
1948             break;
1949 
1950         case 8:                         /* note location of latest R/AL */
1951             pLevState->lastStrongRTL=limit - 1;
1952             pLevState->startON=-1;
1953             break;
1954 
1955         case 9:                         /* L after R+ON/EN/AN */
1956             /* include possible adjacent number on the left */
1957             for (k=start0-1; k>=0 && !(levels[k]&1); k--);
1958             if(k>=0) {
1959                 addPoint(pBiDi, k, RLM_BEFORE);             /* add RLM before */
1960                 pInsertPoints=&(pBiDi->insertPoints);
1961                 pInsertPoints->confirmed=pInsertPoints->size;   /* confirm it */
1962             }
1963             pLevState->startON=start0;
1964             break;
1965 
1966         case 10:                        /* AN after L */
1967             /* AN numbers between L text on both sides may be trouble. */
1968             /* tentatively bracket with LRMs; will be confirmed if followed by L */
1969             addPoint(pBiDi, start0, LRM_BEFORE);    /* add LRM before */
1970             addPoint(pBiDi, start0, LRM_AFTER);     /* add LRM after  */
1971             break;
1972 
1973         case 11:                        /* R after L+ON/EN/AN */
1974             /* false alert, infirm LRMs around previous AN */
1975             pInsertPoints=&(pBiDi->insertPoints);
1976             pInsertPoints->size=pInsertPoints->confirmed;
1977             if (_prop == DirProp_S)            /* add RLM before S */
1978             {
1979                 addPoint(pBiDi, start0, RLM_BEFORE);
1980                 pInsertPoints->confirmed=pInsertPoints->size;
1981             }
1982             break;
1983 
1984         case 12:                        /* L after L+ON/AN */
1985             level=pLevState->runLevel + addLevel;
1986             for(k=pLevState->startON; k<start0; k++) {
1987                 if (levels[k]<level)
1988                     levels[k]=level;
1989             }
1990             pInsertPoints=&(pBiDi->insertPoints);
1991             pInsertPoints->confirmed=pInsertPoints->size;   /* confirm inserts */
1992             pLevState->startON=start0;
1993             break;
1994 
1995         case 13:                        /* L after L+ON+EN/AN/ON */
1996             level=pLevState->runLevel;
1997             for(k=start0-1; k>=pLevState->startON; k--) {
1998                 if(levels[k]==level+3) {
1999                     while(levels[k]==level+3) {
2000                         levels[k--]-=2;
2001                     }
2002                     while(levels[k]==level) {
2003                         k--;
2004                     }
2005                 }
2006                 if(levels[k]==level+2) {
2007                     levels[k]=level;
2008                     continue;
2009                 }
2010                 levels[k]=level+1;
2011             }
2012             break;
2013 
2014         case 14:                        /* R after L+ON+EN/AN/ON */
2015             level=pLevState->runLevel+1;
2016             for(k=start0-1; k>=pLevState->startON; k--) {
2017                 if(levels[k]>level) {
2018                     levels[k]-=2;
2019                 }
2020             }
2021             break;
2022 
2023         default:                        /* we should never get here */
2024             U_ASSERT(FALSE);
2025             break;
2026         }
2027     }
2028     if((addLevel) || (start < start0)) {
2029         level=pLevState->runLevel + addLevel;
2030         if(start>=pLevState->runStart) {
2031             for(k=start; k<limit; k++) {
2032                 levels[k]=level;
2033             }
2034         } else {
2035             setLevelsOutsideIsolates(pBiDi, start, limit, level);
2036         }
2037     }
2038 }
2039 
2040 /**
2041  * Returns the directionality of the last strong character at the end of the prologue, if any.
2042  * Requires prologue!=null.
2043  */
2044 static DirProp
lastL_R_AL(UBiDi * pBiDi)2045 lastL_R_AL(UBiDi *pBiDi) {
2046     const UChar *text=pBiDi->prologue;
2047     int32_t length=pBiDi->proLength;
2048     int32_t i;
2049     UChar32 uchar;
2050     DirProp dirProp;
2051     for(i=length; i>0; ) {
2052         /* i is decremented by U16_PREV */
2053         U16_PREV(text, 0, i, uchar);
2054         dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
2055         if(dirProp==L) {
2056             return DirProp_L;
2057         }
2058         if(dirProp==R || dirProp==AL) {
2059             return DirProp_R;
2060         }
2061         if(dirProp==B) {
2062             return DirProp_ON;
2063         }
2064     }
2065     return DirProp_ON;
2066 }
2067 
2068 /**
2069  * Returns the directionality of the first strong character, or digit, in the epilogue, if any.
2070  * Requires epilogue!=null.
2071  */
2072 static DirProp
firstL_R_AL_EN_AN(UBiDi * pBiDi)2073 firstL_R_AL_EN_AN(UBiDi *pBiDi) {
2074     const UChar *text=pBiDi->epilogue;
2075     int32_t length=pBiDi->epiLength;
2076     int32_t i;
2077     UChar32 uchar;
2078     DirProp dirProp;
2079     for(i=0; i<length; ) {
2080         /* i is incremented by U16_NEXT */
2081         U16_NEXT(text, i, length, uchar);
2082         dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
2083         if(dirProp==L) {
2084             return DirProp_L;
2085         }
2086         if(dirProp==R || dirProp==AL) {
2087             return DirProp_R;
2088         }
2089         if(dirProp==EN) {
2090             return DirProp_EN;
2091         }
2092         if(dirProp==AN) {
2093             return DirProp_AN;
2094         }
2095     }
2096     return DirProp_ON;
2097 }
2098 
2099 static void
resolveImplicitLevels(UBiDi * pBiDi,int32_t start,int32_t limit,DirProp sor,DirProp eor)2100 resolveImplicitLevels(UBiDi *pBiDi,
2101                       int32_t start, int32_t limit,
2102                       DirProp sor, DirProp eor) {
2103     const DirProp *dirProps=pBiDi->dirProps;
2104     DirProp dirProp;
2105     LevState levState;
2106     int32_t i, start1, start2;
2107     uint16_t oldStateImp, stateImp, actionImp;
2108     uint8_t gprop, resProp, cell;
2109     UBool inverseRTL;
2110     DirProp nextStrongProp=R;
2111     int32_t nextStrongPos=-1;
2112 
2113     /* check for RTL inverse BiDi mode */
2114     /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to
2115      * loop on the text characters from end to start.
2116      * This would need a different properties state table (at least different
2117      * actions) and different levels state tables (maybe very similar to the
2118      * LTR corresponding ones.
2119      */
2120     inverseRTL=(UBool)
2121         ((start<pBiDi->lastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) &&
2122          (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT  ||
2123           pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL));
2124 
2125     /* initialize for property and levels state tables */
2126     levState.startL2EN=-1;              /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
2127     levState.lastStrongRTL=-1;          /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
2128     levState.runStart=start;
2129     levState.runLevel=pBiDi->levels[start];
2130     levState.pImpTab=(const ImpTab*)((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel&1];
2131     levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel&1];
2132     if(start==0 && pBiDi->proLength>0) {
2133         DirProp lastStrong=lastL_R_AL(pBiDi);
2134         if(lastStrong!=DirProp_ON) {
2135             sor=lastStrong;
2136         }
2137     }
2138     /* The isolates[] entries contain enough information to
2139        resume the bidi algorithm in the same state as it was
2140        when it was interrupted by an isolate sequence. */
2141     if(dirProps[start]==PDI  && pBiDi->isolateCount >= 0) {
2142         levState.startON=pBiDi->isolates[pBiDi->isolateCount].startON;
2143         start1=pBiDi->isolates[pBiDi->isolateCount].start1;
2144         stateImp=pBiDi->isolates[pBiDi->isolateCount].stateImp;
2145         levState.state=pBiDi->isolates[pBiDi->isolateCount].state;
2146         pBiDi->isolateCount--;
2147     } else {
2148         levState.startON=-1;
2149         start1=start;
2150         if(dirProps[start]==NSM)
2151             stateImp = 1 + sor;
2152         else
2153             stateImp=0;
2154         levState.state=0;
2155         processPropertySeq(pBiDi, &levState, sor, start, start);
2156     }
2157     start2=start;                       /* to make Java compiler happy */
2158 
2159     for(i=start; i<=limit; i++) {
2160         if(i>=limit) {
2161             int32_t k;
2162             for(k=limit-1; k>start&&(DIRPROP_FLAG(dirProps[k])&MASK_BN_EXPLICIT); k--);
2163             dirProp=dirProps[k];
2164             if(dirProp==LRI || dirProp==RLI)
2165                 break;      /* no forced closing for sequence ending with LRI/RLI */
2166             gprop=eor;
2167         } else {
2168             DirProp prop, prop1;
2169             prop=dirProps[i];
2170             if(prop==B) {
2171                 pBiDi->isolateCount=-1; /* current isolates stack entry == none */
2172             }
2173             if(inverseRTL) {
2174                 if(prop==AL) {
2175                     /* AL before EN does not make it AN */
2176                     prop=R;
2177                 } else if(prop==EN) {
2178                     if(nextStrongPos<=i) {
2179                         /* look for next strong char (L/R/AL) */
2180                         int32_t j;
2181                         nextStrongProp=R;   /* set default */
2182                         nextStrongPos=limit;
2183                         for(j=i+1; j<limit; j++) {
2184                             prop1=dirProps[j];
2185                             if(prop1==L || prop1==R || prop1==AL) {
2186                                 nextStrongProp=prop1;
2187                                 nextStrongPos=j;
2188                                 break;
2189                             }
2190                         }
2191                     }
2192                     if(nextStrongProp==AL) {
2193                         prop=AN;
2194                     }
2195                 }
2196             }
2197             gprop=groupProp[prop];
2198         }
2199         oldStateImp=stateImp;
2200         cell=impTabProps[oldStateImp][gprop];
2201         stateImp=GET_STATEPROPS(cell);      /* isolate the new state */
2202         actionImp=GET_ACTIONPROPS(cell);    /* isolate the action */
2203         if((i==limit) && (actionImp==0)) {
2204             /* there is an unprocessed sequence if its property == eor   */
2205             actionImp=1;                    /* process the last sequence */
2206         }
2207         if(actionImp) {
2208             resProp=impTabProps[oldStateImp][IMPTABPROPS_RES];
2209             switch(actionImp) {
2210             case 1:             /* process current seq1, init new seq1 */
2211                 processPropertySeq(pBiDi, &levState, resProp, start1, i);
2212                 start1=i;
2213                 break;
2214             case 2:             /* init new seq2 */
2215                 start2=i;
2216                 break;
2217             case 3:             /* process seq1, process seq2, init new seq1 */
2218                 processPropertySeq(pBiDi, &levState, resProp, start1, start2);
2219                 processPropertySeq(pBiDi, &levState, DirProp_ON, start2, i);
2220                 start1=i;
2221                 break;
2222             case 4:             /* process seq1, set seq1=seq2, init new seq2 */
2223                 processPropertySeq(pBiDi, &levState, resProp, start1, start2);
2224                 start1=start2;
2225                 start2=i;
2226                 break;
2227             default:            /* we should never get here */
2228                 U_ASSERT(FALSE);
2229                 break;
2230             }
2231         }
2232     }
2233 
2234     /* flush possible pending sequence, e.g. ON */
2235     if(limit==pBiDi->length && pBiDi->epiLength>0) {
2236         DirProp firstStrong=firstL_R_AL_EN_AN(pBiDi);
2237         if(firstStrong!=DirProp_ON) {
2238             eor=firstStrong;
2239         }
2240     }
2241 
2242     /* look for the last char not a BN or LRE/RLE/LRO/RLO/PDF */
2243     for(i=limit-1; i>start&&(DIRPROP_FLAG(dirProps[i])&MASK_BN_EXPLICIT); i--);
2244     dirProp=dirProps[i];
2245     if((dirProp==LRI || dirProp==RLI) && limit<pBiDi->length) {
2246         pBiDi->isolateCount++;
2247         pBiDi->isolates[pBiDi->isolateCount].stateImp=stateImp;
2248         pBiDi->isolates[pBiDi->isolateCount].state=levState.state;
2249         pBiDi->isolates[pBiDi->isolateCount].start1=start1;
2250         pBiDi->isolates[pBiDi->isolateCount].startON=levState.startON;
2251     }
2252     else
2253         processPropertySeq(pBiDi, &levState, eor, limit, limit);
2254 }
2255 
2256 /* perform (L1) and (X9) ---------------------------------------------------- */
2257 
2258 /*
2259  * Reset the embedding levels for some non-graphic characters (L1).
2260  * This function also sets appropriate levels for BN, and
2261  * explicit embedding types that are supposed to have been removed
2262  * from the paragraph in (X9).
2263  */
2264 static void
adjustWSLevels(UBiDi * pBiDi)2265 adjustWSLevels(UBiDi *pBiDi) {
2266     const DirProp *dirProps=pBiDi->dirProps;
2267     UBiDiLevel *levels=pBiDi->levels;
2268     int32_t i;
2269 
2270     if(pBiDi->flags&MASK_WS) {
2271         UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR;
2272         Flags flag;
2273 
2274         i=pBiDi->trailingWSStart;
2275         while(i>0) {
2276             /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
2277             while(i>0 && (flag=DIRPROP_FLAG(dirProps[--i]))&MASK_WS) {
2278                 if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
2279                     levels[i]=0;
2280                 } else {
2281                     levels[i]=GET_PARALEVEL(pBiDi, i);
2282                 }
2283             }
2284 
2285             /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
2286             /* here, i+1 is guaranteed to be <length */
2287             while(i>0) {
2288                 flag=DIRPROP_FLAG(dirProps[--i]);
2289                 if(flag&MASK_BN_EXPLICIT) {
2290                     levels[i]=levels[i+1];
2291                 } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
2292                     levels[i]=0;
2293                     break;
2294                 } else if(flag&MASK_B_S) {
2295                     levels[i]=GET_PARALEVEL(pBiDi, i);
2296                     break;
2297                 }
2298             }
2299         }
2300     }
2301 }
2302 
2303 U_CAPI void U_EXPORT2
ubidi_setContext(UBiDi * pBiDi,const UChar * prologue,int32_t proLength,const UChar * epilogue,int32_t epiLength,UErrorCode * pErrorCode)2304 ubidi_setContext(UBiDi *pBiDi,
2305                  const UChar *prologue, int32_t proLength,
2306                  const UChar *epilogue, int32_t epiLength,
2307                  UErrorCode *pErrorCode) {
2308     /* check the argument values */
2309     RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2310     if(pBiDi==NULL || proLength<-1 || epiLength<-1 ||
2311        (prologue==NULL && proLength!=0) || (epilogue==NULL && epiLength!=0)) {
2312         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2313         return;
2314     }
2315 
2316     if(proLength==-1) {
2317         pBiDi->proLength=u_strlen(prologue);
2318     } else {
2319         pBiDi->proLength=proLength;
2320     }
2321     if(epiLength==-1) {
2322         pBiDi->epiLength=u_strlen(epilogue);
2323     } else {
2324         pBiDi->epiLength=epiLength;
2325     }
2326     pBiDi->prologue=prologue;
2327     pBiDi->epilogue=epilogue;
2328 }
2329 
2330 static void
setParaSuccess(UBiDi * pBiDi)2331 setParaSuccess(UBiDi *pBiDi) {
2332     pBiDi->proLength=0;                 /* forget the last context */
2333     pBiDi->epiLength=0;
2334     pBiDi->pParaBiDi=pBiDi;             /* mark successful setPara */
2335 }
2336 
2337 #define BIDI_MIN(x, y)   ((x)<(y) ? (x) : (y))
2338 #define BIDI_ABS(x)      ((x)>=0  ? (x) : (-(x)))
2339 
2340 static void
setParaRunsOnly(UBiDi * pBiDi,const UChar * text,int32_t length,UBiDiLevel paraLevel,UErrorCode * pErrorCode)2341 setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length,
2342                 UBiDiLevel paraLevel, UErrorCode *pErrorCode) {
2343     void *runsOnlyMemory = NULL;
2344     int32_t *visualMap;
2345     UChar *visualText;
2346     int32_t saveLength, saveTrailingWSStart;
2347     const UBiDiLevel *levels;
2348     UBiDiLevel *saveLevels;
2349     UBiDiDirection saveDirection;
2350     UBool saveMayAllocateText;
2351     Run *runs;
2352     int32_t visualLength, i, j, visualStart, logicalStart,
2353             runCount, runLength, addedRuns, insertRemove,
2354             start, limit, step, indexOddBit, logicalPos,
2355             index0, index1;
2356     uint32_t saveOptions;
2357 
2358     pBiDi->reorderingMode=UBIDI_REORDER_DEFAULT;
2359     if(length==0) {
2360         ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
2361         goto cleanup3;
2362     }
2363     /* obtain memory for mapping table and visual text */
2364     runsOnlyMemory=uprv_malloc(length*(sizeof(int32_t)+sizeof(UChar)+sizeof(UBiDiLevel)));
2365     if(runsOnlyMemory==NULL) {
2366         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2367         goto cleanup3;
2368     }
2369     visualMap=runsOnlyMemory;
2370     visualText=(UChar *)&visualMap[length];
2371     saveLevels=(UBiDiLevel *)&visualText[length];
2372     saveOptions=pBiDi->reorderingOptions;
2373     if(saveOptions & UBIDI_OPTION_INSERT_MARKS) {
2374         pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
2375         pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS;
2376     }
2377     paraLevel&=1;                       /* accept only 0 or 1 */
2378     ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
2379     if(U_FAILURE(*pErrorCode)) {
2380         goto cleanup3;
2381     }
2382     /* we cannot access directly pBiDi->levels since it is not yet set if
2383      * direction is not MIXED
2384      */
2385     levels=ubidi_getLevels(pBiDi, pErrorCode);
2386     uprv_memcpy(saveLevels, levels, pBiDi->length*sizeof(UBiDiLevel));
2387     saveTrailingWSStart=pBiDi->trailingWSStart;
2388     saveLength=pBiDi->length;
2389     saveDirection=pBiDi->direction;
2390 
2391     /* FOOD FOR THOUGHT: instead of writing the visual text, we could use
2392      * the visual map and the dirProps array to drive the second call
2393      * to ubidi_setPara (but must make provision for possible removal of
2394      * BiDi controls.  Alternatively, only use the dirProps array via
2395      * customized classifier callback.
2396      */
2397     visualLength=ubidi_writeReordered(pBiDi, visualText, length,
2398                                       UBIDI_DO_MIRRORING, pErrorCode);
2399     ubidi_getVisualMap(pBiDi, visualMap, pErrorCode);
2400     if(U_FAILURE(*pErrorCode)) {
2401         goto cleanup2;
2402     }
2403     pBiDi->reorderingOptions=saveOptions;
2404 
2405     pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT;
2406     paraLevel^=1;
2407     /* Because what we did with reorderingOptions, visualText may be shorter
2408      * than the original text. But we don't want the levels memory to be
2409      * reallocated shorter than the original length, since we need to restore
2410      * the levels as after the first call to ubidi_setpara() before returning.
2411      * We will force mayAllocateText to FALSE before the second call to
2412      * ubidi_setpara(), and will restore it afterwards.
2413      */
2414     saveMayAllocateText=pBiDi->mayAllocateText;
2415     pBiDi->mayAllocateText=FALSE;
2416     ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, NULL, pErrorCode);
2417     pBiDi->mayAllocateText=saveMayAllocateText;
2418     ubidi_getRuns(pBiDi, pErrorCode);
2419     if(U_FAILURE(*pErrorCode)) {
2420         goto cleanup1;
2421     }
2422     /* check if some runs must be split, count how many splits */
2423     addedRuns=0;
2424     runCount=pBiDi->runCount;
2425     runs=pBiDi->runs;
2426     visualStart=0;
2427     for(i=0; i<runCount; i++, visualStart+=runLength) {
2428         runLength=runs[i].visualLimit-visualStart;
2429         if(runLength<2) {
2430             continue;
2431         }
2432         logicalStart=GET_INDEX(runs[i].logicalStart);
2433         for(j=logicalStart+1; j<logicalStart+runLength; j++) {
2434             index0=visualMap[j];
2435             index1=visualMap[j-1];
2436             if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
2437                 addedRuns++;
2438             }
2439         }
2440     }
2441     if(addedRuns) {
2442         if(getRunsMemory(pBiDi, runCount+addedRuns)) {
2443             if(runCount==1) {
2444                 /* because we switch from UBiDi.simpleRuns to UBiDi.runs */
2445                 pBiDi->runsMemory[0]=runs[0];
2446             }
2447             runs=pBiDi->runs=pBiDi->runsMemory;
2448             pBiDi->runCount+=addedRuns;
2449         } else {
2450             goto cleanup1;
2451         }
2452     }
2453     /* split runs which are not consecutive in source text */
2454     for(i=runCount-1; i>=0; i--) {
2455         runLength= i==0 ? runs[0].visualLimit :
2456                           runs[i].visualLimit-runs[i-1].visualLimit;
2457         logicalStart=runs[i].logicalStart;
2458         indexOddBit=GET_ODD_BIT(logicalStart);
2459         logicalStart=GET_INDEX(logicalStart);
2460         if(runLength<2) {
2461             if(addedRuns) {
2462                 runs[i+addedRuns]=runs[i];
2463             }
2464             logicalPos=visualMap[logicalStart];
2465             runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2466                                             saveLevels[logicalPos]^indexOddBit);
2467             continue;
2468         }
2469         if(indexOddBit) {
2470             start=logicalStart;
2471             limit=logicalStart+runLength-1;
2472             step=1;
2473         } else {
2474             start=logicalStart+runLength-1;
2475             limit=logicalStart;
2476             step=-1;
2477         }
2478         for(j=start; j!=limit; j+=step) {
2479             index0=visualMap[j];
2480             index1=visualMap[j+step];
2481             if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
2482                 logicalPos=BIDI_MIN(visualMap[start], index0);
2483                 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2484                                             saveLevels[logicalPos]^indexOddBit);
2485                 runs[i+addedRuns].visualLimit=runs[i].visualLimit;
2486                 runs[i].visualLimit-=BIDI_ABS(j-start)+1;
2487                 insertRemove=runs[i].insertRemove&(LRM_AFTER|RLM_AFTER);
2488                 runs[i+addedRuns].insertRemove=insertRemove;
2489                 runs[i].insertRemove&=~insertRemove;
2490                 start=j+step;
2491                 addedRuns--;
2492             }
2493         }
2494         if(addedRuns) {
2495             runs[i+addedRuns]=runs[i];
2496         }
2497         logicalPos=BIDI_MIN(visualMap[start], visualMap[limit]);
2498         runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2499                                             saveLevels[logicalPos]^indexOddBit);
2500     }
2501 
2502   cleanup1:
2503     /* restore initial paraLevel */
2504     pBiDi->paraLevel^=1;
2505   cleanup2:
2506     /* restore real text */
2507     pBiDi->text=text;
2508     pBiDi->length=saveLength;
2509     pBiDi->originalLength=length;
2510     pBiDi->direction=saveDirection;
2511     /* the saved levels should never excess levelsSize, but we check anyway */
2512     if(saveLength>pBiDi->levelsSize) {
2513         saveLength=pBiDi->levelsSize;
2514     }
2515     uprv_memcpy(pBiDi->levels, saveLevels, saveLength*sizeof(UBiDiLevel));
2516     pBiDi->trailingWSStart=saveTrailingWSStart;
2517     if(pBiDi->runCount>1) {
2518         pBiDi->direction=UBIDI_MIXED;
2519     }
2520   cleanup3:
2521     /* free memory for mapping table and visual text */
2522     uprv_free(runsOnlyMemory);
2523 
2524     pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY;
2525 }
2526 
2527 /* ubidi_setPara ------------------------------------------------------------ */
2528 
2529 U_CAPI void U_EXPORT2
ubidi_setPara(UBiDi * pBiDi,const UChar * text,int32_t length,UBiDiLevel paraLevel,UBiDiLevel * embeddingLevels,UErrorCode * pErrorCode)2530 ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
2531               UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
2532               UErrorCode *pErrorCode) {
2533     UBiDiDirection direction;
2534     DirProp *dirProps;
2535 
2536     /* check the argument values */
2537     RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2538     if(pBiDi==NULL || text==NULL || length<-1 ||
2539        (paraLevel>UBIDI_MAX_EXPLICIT_LEVEL && paraLevel<UBIDI_DEFAULT_LTR)) {
2540         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2541         return;
2542     }
2543 
2544     if(length==-1) {
2545         length=u_strlen(text);
2546     }
2547 
2548     /* special treatment for RUNS_ONLY mode */
2549     if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) {
2550         setParaRunsOnly(pBiDi, text, length, paraLevel, pErrorCode);
2551         return;
2552     }
2553 
2554     /* initialize the UBiDi structure */
2555     pBiDi->pParaBiDi=NULL;          /* mark unfinished setPara */
2556     pBiDi->text=text;
2557     pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length;
2558     pBiDi->paraLevel=paraLevel;
2559     pBiDi->direction=paraLevel&1;
2560     pBiDi->paraCount=1;
2561 
2562     pBiDi->dirProps=NULL;
2563     pBiDi->levels=NULL;
2564     pBiDi->runs=NULL;
2565     pBiDi->insertPoints.size=0;         /* clean up from last call */
2566     pBiDi->insertPoints.confirmed=0;    /* clean up from last call */
2567 
2568     /*
2569      * Save the original paraLevel if contextual; otherwise, set to 0.
2570      */
2571     pBiDi->defaultParaLevel=IS_DEFAULT_LEVEL(paraLevel);
2572 
2573     if(length==0) {
2574         /*
2575          * For an empty paragraph, create a UBiDi object with the paraLevel and
2576          * the flags and the direction set but without allocating zero-length arrays.
2577          * There is nothing more to do.
2578          */
2579         if(IS_DEFAULT_LEVEL(paraLevel)) {
2580             pBiDi->paraLevel&=1;
2581             pBiDi->defaultParaLevel=0;
2582         }
2583         pBiDi->flags=DIRPROP_FLAG_LR(paraLevel);
2584         pBiDi->runCount=0;
2585         pBiDi->paraCount=0;
2586         setParaSuccess(pBiDi);          /* mark successful setPara */
2587         return;
2588     }
2589 
2590     pBiDi->runCount=-1;
2591 
2592     /* allocate paras memory */
2593     if(pBiDi->parasMemory)
2594         pBiDi->paras=pBiDi->parasMemory;
2595     else
2596         pBiDi->paras=pBiDi->simpleParas;
2597 
2598     /*
2599      * Get the directional properties,
2600      * the flags bit-set, and
2601      * determine the paragraph level if necessary.
2602      */
2603     if(getDirPropsMemory(pBiDi, length)) {
2604         pBiDi->dirProps=pBiDi->dirPropsMemory;
2605         if(!getDirProps(pBiDi)) {
2606             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2607             return;
2608         }
2609     } else {
2610         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2611         return;
2612     }
2613     dirProps=pBiDi->dirProps;
2614     /* the processed length may have changed if UBIDI_OPTION_STREAMING */
2615     length= pBiDi->length;
2616     pBiDi->trailingWSStart=length;  /* the levels[] will reflect the WS run */
2617 
2618     /* are explicit levels specified? */
2619     if(embeddingLevels==NULL) {
2620         /* no: determine explicit levels according to the (Xn) rules */\
2621         if(getLevelsMemory(pBiDi, length)) {
2622             pBiDi->levels=pBiDi->levelsMemory;
2623             direction=resolveExplicitLevels(pBiDi, pErrorCode);
2624             if(U_FAILURE(*pErrorCode)) {
2625                 return;
2626             }
2627         } else {
2628             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2629             return;
2630         }
2631     } else {
2632         /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */
2633         pBiDi->levels=embeddingLevels;
2634         direction=checkExplicitLevels(pBiDi, pErrorCode);
2635         if(U_FAILURE(*pErrorCode)) {
2636             return;
2637         }
2638     }
2639 
2640     /* allocate isolate memory */
2641     if(pBiDi->isolateCount<=SIMPLE_ISOLATES_COUNT)
2642         pBiDi->isolates=pBiDi->simpleIsolates;
2643     else
2644         if((int32_t)(pBiDi->isolateCount*sizeof(Isolate))<=pBiDi->isolatesSize)
2645             pBiDi->isolates=pBiDi->isolatesMemory;
2646         else {
2647             if(getInitialIsolatesMemory(pBiDi, pBiDi->isolateCount)) {
2648                 pBiDi->isolates=pBiDi->isolatesMemory;
2649             } else {
2650                 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2651                 return;
2652             }
2653         }
2654     pBiDi->isolateCount=-1;             /* current isolates stack entry == none */
2655 
2656     /*
2657      * The steps after (X9) in the UBiDi algorithm are performed only if
2658      * the paragraph text has mixed directionality!
2659      */
2660     pBiDi->direction=direction;
2661     switch(direction) {
2662     case UBIDI_LTR:
2663         /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
2664         pBiDi->trailingWSStart=0;
2665         break;
2666     case UBIDI_RTL:
2667         /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
2668         pBiDi->trailingWSStart=0;
2669         break;
2670     default:
2671         /*
2672          *  Choose the right implicit state table
2673          */
2674         switch(pBiDi->reorderingMode) {
2675         case UBIDI_REORDER_DEFAULT:
2676             pBiDi->pImpTabPair=&impTab_DEFAULT;
2677             break;
2678         case UBIDI_REORDER_NUMBERS_SPECIAL:
2679             pBiDi->pImpTabPair=&impTab_NUMBERS_SPECIAL;
2680             break;
2681         case UBIDI_REORDER_GROUP_NUMBERS_WITH_R:
2682             pBiDi->pImpTabPair=&impTab_GROUP_NUMBERS_WITH_R;
2683             break;
2684         case UBIDI_REORDER_INVERSE_NUMBERS_AS_L:
2685             pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L;
2686             break;
2687         case UBIDI_REORDER_INVERSE_LIKE_DIRECT:
2688             if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
2689                 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS;
2690             } else {
2691                 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT;
2692             }
2693             break;
2694         case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL:
2695             if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
2696                 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS;
2697             } else {
2698                 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL;
2699             }
2700             break;
2701         default:
2702             /* we should never get here */
2703             U_ASSERT(FALSE);
2704             break;
2705         }
2706         /*
2707          * If there are no external levels specified and there
2708          * are no significant explicit level codes in the text,
2709          * then we can treat the entire paragraph as one run.
2710          * Otherwise, we need to perform the following rules on runs of
2711          * the text with the same embedding levels. (X10)
2712          * "Significant" explicit level codes are ones that actually
2713          * affect non-BN characters.
2714          * Examples for "insignificant" ones are empty embeddings
2715          * LRE-PDF, LRE-RLE-PDF-PDF, etc.
2716          */
2717         if(embeddingLevels==NULL && pBiDi->paraCount<=1 &&
2718                                    !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) {
2719             resolveImplicitLevels(pBiDi, 0, length,
2720                                     GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)),
2721                                     GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, length-1)));
2722         } else {
2723             /* sor, eor: start and end types of same-level-run */
2724             UBiDiLevel *levels=pBiDi->levels;
2725             int32_t start, limit=0;
2726             UBiDiLevel level, nextLevel;
2727             DirProp sor, eor;
2728 
2729             /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
2730             level=GET_PARALEVEL(pBiDi, 0);
2731             nextLevel=levels[0];
2732             if(level<nextLevel) {
2733                 eor=GET_LR_FROM_LEVEL(nextLevel);
2734             } else {
2735                 eor=GET_LR_FROM_LEVEL(level);
2736             }
2737 
2738             do {
2739                 /* determine start and limit of the run (end points just behind the run) */
2740 
2741                 /* the values for this run's start are the same as for the previous run's end */
2742                 start=limit;
2743                 level=nextLevel;
2744                 if((start>0) && (dirProps[start-1]==B)) {
2745                     /* except if this is a new paragraph, then set sor = para level */
2746                     sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start));
2747                 } else {
2748                     sor=eor;
2749                 }
2750 
2751                 /* search for the limit of this run */
2752                 while((++limit<length) &&
2753                       ((levels[limit]==level) ||
2754                        (DIRPROP_FLAG(dirProps[limit])&MASK_BN_EXPLICIT))) {}
2755 
2756                 /* get the correct level of the next run */
2757                 if(limit<length) {
2758                     nextLevel=levels[limit];
2759                 } else {
2760                     nextLevel=GET_PARALEVEL(pBiDi, length-1);
2761                 }
2762 
2763                 /* determine eor from max(level, nextLevel); sor is last run's eor */
2764                 if(NO_OVERRIDE(level)<NO_OVERRIDE(nextLevel)) {
2765                     eor=GET_LR_FROM_LEVEL(nextLevel);
2766                 } else {
2767                     eor=GET_LR_FROM_LEVEL(level);
2768                 }
2769 
2770                 /* if the run consists of overridden directional types, then there
2771                    are no implicit types to be resolved */
2772                 if(!(level&UBIDI_LEVEL_OVERRIDE)) {
2773                     resolveImplicitLevels(pBiDi, start, limit, sor, eor);
2774                 } else {
2775                     /* remove the UBIDI_LEVEL_OVERRIDE flags */
2776                     do {
2777                         levels[start++]&=~UBIDI_LEVEL_OVERRIDE;
2778                     } while(start<limit);
2779                 }
2780             } while(limit<length);
2781         }
2782         /* check if we got any memory shortage while adding insert points */
2783         if (U_FAILURE(pBiDi->insertPoints.errorCode))
2784         {
2785             *pErrorCode=pBiDi->insertPoints.errorCode;
2786             return;
2787         }
2788         /* reset the embedding levels for some non-graphic characters (L1), (X9) */
2789         adjustWSLevels(pBiDi);
2790         break;
2791     }
2792     /* add RLM for inverse Bidi with contextual orientation resolving
2793      * to RTL which would not round-trip otherwise
2794      */
2795     if((pBiDi->defaultParaLevel>0) &&
2796        (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) &&
2797        ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) ||
2798         (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) {
2799         int32_t i, j, start, last;
2800         UBiDiLevel level;
2801         DirProp dirProp;
2802         for(i=0; i<pBiDi->paraCount; i++) {
2803             last=(pBiDi->paras[i].limit)-1;
2804             level=pBiDi->paras[i].level;
2805             if(level==0)
2806                 continue;           /* LTR paragraph */
2807             start= i==0 ? 0 : pBiDi->paras[i-1].limit;
2808             for(j=last; j>=start; j--) {
2809                 dirProp=dirProps[j];
2810                 if(dirProp==L) {
2811                     if(j<last) {
2812                         while(dirProps[last]==B) {
2813                             last--;
2814                         }
2815                     }
2816                     addPoint(pBiDi, last, RLM_BEFORE);
2817                     break;
2818                 }
2819                 if(DIRPROP_FLAG(dirProp) & MASK_R_AL) {
2820                     break;
2821                 }
2822             }
2823         }
2824     }
2825 
2826     if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
2827         pBiDi->resultLength -= pBiDi->controlCount;
2828     } else {
2829         pBiDi->resultLength += pBiDi->insertPoints.size;
2830     }
2831     setParaSuccess(pBiDi);              /* mark successful setPara */
2832 }
2833 
2834 U_CAPI void U_EXPORT2
ubidi_orderParagraphsLTR(UBiDi * pBiDi,UBool orderParagraphsLTR)2835 ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) {
2836     if(pBiDi!=NULL) {
2837         pBiDi->orderParagraphsLTR=orderParagraphsLTR;
2838     }
2839 }
2840 
2841 U_CAPI UBool U_EXPORT2
ubidi_isOrderParagraphsLTR(UBiDi * pBiDi)2842 ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) {
2843     if(pBiDi!=NULL) {
2844         return pBiDi->orderParagraphsLTR;
2845     } else {
2846         return FALSE;
2847     }
2848 }
2849 
2850 U_CAPI UBiDiDirection U_EXPORT2
ubidi_getDirection(const UBiDi * pBiDi)2851 ubidi_getDirection(const UBiDi *pBiDi) {
2852     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2853         return pBiDi->direction;
2854     } else {
2855         return UBIDI_LTR;
2856     }
2857 }
2858 
2859 U_CAPI const UChar * U_EXPORT2
ubidi_getText(const UBiDi * pBiDi)2860 ubidi_getText(const UBiDi *pBiDi) {
2861     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2862         return pBiDi->text;
2863     } else {
2864         return NULL;
2865     }
2866 }
2867 
2868 U_CAPI int32_t U_EXPORT2
ubidi_getLength(const UBiDi * pBiDi)2869 ubidi_getLength(const UBiDi *pBiDi) {
2870     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2871         return pBiDi->originalLength;
2872     } else {
2873         return 0;
2874     }
2875 }
2876 
2877 U_CAPI int32_t U_EXPORT2
ubidi_getProcessedLength(const UBiDi * pBiDi)2878 ubidi_getProcessedLength(const UBiDi *pBiDi) {
2879     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2880         return pBiDi->length;
2881     } else {
2882         return 0;
2883     }
2884 }
2885 
2886 U_CAPI int32_t U_EXPORT2
ubidi_getResultLength(const UBiDi * pBiDi)2887 ubidi_getResultLength(const UBiDi *pBiDi) {
2888     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2889         return pBiDi->resultLength;
2890     } else {
2891         return 0;
2892     }
2893 }
2894 
2895 /* paragraphs API functions ------------------------------------------------- */
2896 
2897 U_CAPI UBiDiLevel U_EXPORT2
ubidi_getParaLevel(const UBiDi * pBiDi)2898 ubidi_getParaLevel(const UBiDi *pBiDi) {
2899     if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2900         return pBiDi->paraLevel;
2901     } else {
2902         return 0;
2903     }
2904 }
2905 
2906 U_CAPI int32_t U_EXPORT2
ubidi_countParagraphs(UBiDi * pBiDi)2907 ubidi_countParagraphs(UBiDi *pBiDi) {
2908     if(!IS_VALID_PARA_OR_LINE(pBiDi)) {
2909         return 0;
2910     } else {
2911         return pBiDi->paraCount;
2912     }
2913 }
2914 
2915 U_CAPI void U_EXPORT2
ubidi_getParagraphByIndex(const UBiDi * pBiDi,int32_t paraIndex,int32_t * pParaStart,int32_t * pParaLimit,UBiDiLevel * pParaLevel,UErrorCode * pErrorCode)2916 ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex,
2917                           int32_t *pParaStart, int32_t *pParaLimit,
2918                           UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
2919     int32_t paraStart;
2920 
2921     /* check the argument values */
2922     RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2923     RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode);
2924     RETURN_VOID_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode);
2925 
2926     pBiDi=pBiDi->pParaBiDi;             /* get Para object if Line object */
2927     if(paraIndex) {
2928         paraStart=pBiDi->paras[paraIndex-1].limit;
2929     } else {
2930         paraStart=0;
2931     }
2932     if(pParaStart!=NULL) {
2933         *pParaStart=paraStart;
2934     }
2935     if(pParaLimit!=NULL) {
2936         *pParaLimit=pBiDi->paras[paraIndex].limit;
2937     }
2938     if(pParaLevel!=NULL) {
2939         *pParaLevel=GET_PARALEVEL(pBiDi, paraStart);
2940     }
2941 }
2942 
2943 U_CAPI int32_t U_EXPORT2
ubidi_getParagraph(const UBiDi * pBiDi,int32_t charIndex,int32_t * pParaStart,int32_t * pParaLimit,UBiDiLevel * pParaLevel,UErrorCode * pErrorCode)2944 ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex,
2945                           int32_t *pParaStart, int32_t *pParaLimit,
2946                           UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
2947     int32_t paraIndex;
2948 
2949     /* check the argument values */
2950     /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */
2951     RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
2952     RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
2953     pBiDi=pBiDi->pParaBiDi;             /* get Para object if Line object */
2954     RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1);
2955 
2956     for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex].limit; paraIndex++);
2957     ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode);
2958     return paraIndex;
2959 }
2960 
2961 U_CAPI void U_EXPORT2
ubidi_setClassCallback(UBiDi * pBiDi,UBiDiClassCallback * newFn,const void * newContext,UBiDiClassCallback ** oldFn,const void ** oldContext,UErrorCode * pErrorCode)2962 ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn,
2963                        const void *newContext, UBiDiClassCallback **oldFn,
2964                        const void **oldContext, UErrorCode *pErrorCode)
2965 {
2966     RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2967     if(pBiDi==NULL) {
2968         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2969         return;
2970     }
2971     if( oldFn )
2972     {
2973         *oldFn = pBiDi->fnClassCallback;
2974     }
2975     if( oldContext )
2976     {
2977         *oldContext = pBiDi->coClassCallback;
2978     }
2979     pBiDi->fnClassCallback = newFn;
2980     pBiDi->coClassCallback = newContext;
2981 }
2982 
2983 U_CAPI void U_EXPORT2
ubidi_getClassCallback(UBiDi * pBiDi,UBiDiClassCallback ** fn,const void ** context)2984 ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context)
2985 {
2986     if(pBiDi==NULL) {
2987         return;
2988     }
2989     if( fn )
2990     {
2991         *fn = pBiDi->fnClassCallback;
2992     }
2993     if( context )
2994     {
2995         *context = pBiDi->coClassCallback;
2996     }
2997 }
2998 
2999 U_CAPI UCharDirection U_EXPORT2
ubidi_getCustomizedClass(UBiDi * pBiDi,UChar32 c)3000 ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c)
3001 {
3002     UCharDirection dir;
3003 
3004     if( pBiDi->fnClassCallback == NULL ||
3005         (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT )
3006     {
3007         dir = ubidi_getClass(pBiDi->bdp, c);
3008     }
3009     if(dir >= U_CHAR_DIRECTION_COUNT) {
3010         dir = ON;
3011     }
3012     return dir;
3013 }
3014