1 /* Bidi.java -- Bidirectional Algorithm implementation
2    Copyright (C) 2005, 2006, 2012  Free Software Foundation, Inc.
3 
4 This file is part of GNU Classpath.
5 
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10 
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING.  If not, write to the
18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301 USA.
20 
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library.  Thus, the terms and
23 conditions of the GNU General Public License cover the whole
24 combination.
25 
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module.  An independent module is a module which is not derived from
33 or based on this library.  If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so.  If you do not wish to do so, delete this
36 exception statement from your version. */
37 
38 
39 package java.text;
40 
41 import java.awt.font.NumericShaper;
42 import java.awt.font.TextAttribute;
43 import java.util.ArrayList;
44 
45 
46 /**
47  * Bidirectional Algorithm implementation.
48  *
49  * The full algorithm is
50  * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard
51  * Annex #9: The Bidirectional Algorithm</a>.
52  *
53  * @since 1.4
54  */
55 public final class Bidi
56 {
57   /**
58    * This indicates that a strongly directional character in the text should
59    * set the initial direction, but if no such character is found, then the
60    * initial direction will be left-to-right.
61    */
62   public static final int DIRECTION_DEFAULT_LEFT_TO_RIGHT = -2;
63 
64   /**
65    * This indicates that a strongly directional character in the text should
66    * set the initial direction, but if no such character is found, then the
67    * initial direction will be right-to-left.
68    */
69   public static final int DIRECTION_DEFAULT_RIGHT_TO_LEFT = -1;
70 
71   /**
72    * This indicates that the initial direction should be left-to-right.
73    */
74   public static final int DIRECTION_LEFT_TO_RIGHT = 0;
75 
76   /**
77    * This indicates that the initial direction should be right-to-left.
78    */
79   public static final int DIRECTION_RIGHT_TO_LEFT = 1;
80 
81   // Flags used when computing the result.
82   private static final int LTOR = 1 << DIRECTION_LEFT_TO_RIGHT;
83   private static final int RTOL = 1 << DIRECTION_RIGHT_TO_LEFT;
84 
85   // The text we are examining, and the starting offset.
86   // If we had a better way to handle createLineBidi, we wouldn't
87   // need this at all -- which for the String case would be an
88   // efficiency win.
89   private char[] text;
90   private int textOffset;
91   // The embeddings corresponding to the text, and the starting offset.
92   private byte[] embeddings;
93   private int embeddingOffset;
94   // The length of the text (and embeddings) to use.
95   private int length;
96   // The flags.
97   private int flags;
98 
99   // All instance fields following this point are initialized
100   // during analysis.  Fields before this must be set by the constructor.
101 
102   // The initial embedding level.
103   private int baseEmbedding;
104   // The type of each character in the text.
105   private byte[] types;
106   // The levels we compute.
107   private byte[] levels;
108 
109   // A list of indices where a formatting code was found.  These
110   // are indicies into the original text -- not into the text after
111   // the codes have been removed.
112   private ArrayList<Integer> formatterIndices;
113 
114   // Indices of the starts of runs in the text.
115   private int[] runs;
116 
117   // A convenience field where we keep track of what kinds of runs
118   // we've seen.
119   private int resultFlags;
120 
121   /**
122    * Create a new Bidi object given an attributed character iterator.
123    * This constructor will examine various attributes of the text:
124    * <ul>
125    * <li> {@link TextAttribute#RUN_DIRECTION} is used to determine the
126    * paragraph's base embedding level.  This constructor will recognize
127    * either {@link TextAttribute#RUN_DIRECTION_LTR} or
128    * {@link TextAttribute#RUN_DIRECTION_RTL}.  If neither is given,
129    * {@link #DIRECTION_DEFAULT_LEFT_TO_RIGHT} is assumed.
130    * </li>
131    *
132    * <li> If {@link TextAttribute#NUMERIC_SHAPING} is seen, then numeric
133    * shaping will be done before the Bidi algorithm is run.
134    * </li>
135    *
136    * <li> If {@link TextAttribute#BIDI_EMBEDDING} is seen on a given
137    * character, then the value of this attribute will be used as an
138    * embedding level override.
139    * </li>
140    * </ul>
141    * @param iter the attributed character iterator to use
142    */
Bidi(AttributedCharacterIterator iter)143   public Bidi(AttributedCharacterIterator iter)
144   {
145     // If set, this attribute should be set on all characters.
146     // We don't check this (should we?) but we do assume that we
147     // can simply examine the first character.
148     Object val = iter.getAttribute(TextAttribute.RUN_DIRECTION);
149     if (val == TextAttribute.RUN_DIRECTION_LTR)
150       this.flags = DIRECTION_LEFT_TO_RIGHT;
151     else if (val == TextAttribute.RUN_DIRECTION_RTL)
152       this.flags = DIRECTION_RIGHT_TO_LEFT;
153     else
154       this.flags = DIRECTION_DEFAULT_LEFT_TO_RIGHT;
155 
156     // Likewise this attribute should be specified on the whole text.
157     // We read it here and then, if it is set, we apply the numeric shaper
158     // to the text before processing it.
159     NumericShaper shaper = null;
160     val = iter.getAttribute(TextAttribute.NUMERIC_SHAPING);
161     if (val instanceof NumericShaper)
162       shaper = (NumericShaper) val;
163 
164     text = new char[iter.getEndIndex() - iter.getBeginIndex()];
165     embeddings = new byte[text.length];
166     embeddingOffset = 0;
167     length = text.length;
168     for (int i = 0; i < text.length; ++i)
169       {
170         text[i] = iter.current();
171 
172         val = iter.getAttribute(TextAttribute.BIDI_EMBEDDING);
173         if (val instanceof Integer)
174           {
175             int ival = ((Integer) val).intValue();
176             byte bval;
177             if (ival < -62 || ival > 62)
178               bval = 0;
179             else
180               bval = (byte) ival;
181             embeddings[i] = bval;
182           }
183       }
184 
185     // Invoke the numeric shaper, if specified.
186     if (shaper != null)
187       shaper.shape(text, 0, length);
188 
189     runBidi();
190   }
191 
192   /**
193    * Create a new Bidi object with the indicated text and, possibly, explicit
194    * embedding settings.
195    *
196    * If the embeddings array is null, it is ignored.  Otherwise it is taken to
197    * be explicit embedding settings corresponding to the text.  Positive values
198    * from 1 to 61 are embedding levels, and negative values from -1 to -61 are
199    * embedding overrides.  (FIXME: not at all clear what this really means.)
200    *
201    * @param text the text to use
202    * @param offset the offset of the first character of the text
203    * @param embeddings the explicit embeddings, or null if there are none
204    * @param embedOffset the offset of the first embedding value to use
205    * @param length the length of both the text and the embeddings
206    * @param flags a flag indicating the base embedding direction
207    */
Bidi(char[] text, int offset, byte[] embeddings, int embedOffset, int length, int flags)208   public Bidi(char[] text, int offset, byte[] embeddings, int embedOffset,
209               int length, int flags)
210   {
211     if (flags != DIRECTION_DEFAULT_LEFT_TO_RIGHT
212         && flags != DIRECTION_DEFAULT_RIGHT_TO_LEFT
213         && flags != DIRECTION_LEFT_TO_RIGHT
214         && flags != DIRECTION_RIGHT_TO_LEFT)
215       throw new IllegalArgumentException("unrecognized 'flags' argument: "
216                                          + flags);
217     this.text = text;
218     this.textOffset = offset;
219     this.embeddings = embeddings;
220     this.embeddingOffset = embedOffset;
221     this.length = length;
222     this.flags = flags;
223 
224     runBidi();
225   }
226 
227   /**
228    * Create a new Bidi object using the contents of the given String
229    * as the text.
230    * @param text the text to use
231    * @param flags a flag indicating the base embedding direction
232    */
Bidi(String text, int flags)233   public Bidi(String text, int flags)
234   {
235     if (flags != DIRECTION_DEFAULT_LEFT_TO_RIGHT
236         && flags != DIRECTION_DEFAULT_RIGHT_TO_LEFT
237         && flags != DIRECTION_LEFT_TO_RIGHT
238         && flags != DIRECTION_RIGHT_TO_LEFT)
239       throw new IllegalArgumentException("unrecognized 'flags' argument: "
240                                          + flags);
241 
242     // This is inefficient, but it isn't clear whether it matters.
243     // If it does we can change our implementation a bit to allow either
244     // a String or a char[].
245     this.text = text.toCharArray();
246     this.textOffset = 0;
247     this.embeddings = null;
248     this.embeddingOffset = 0;
249     this.length = text.length();
250     this.flags = flags;
251 
252     runBidi();
253   }
254 
255   /**
256    * Implementation function which computes the initial type of
257    * each character in the input.
258    */
computeTypes()259   private void computeTypes()
260   {
261     types = new byte[length];
262     for (int i = 0; i < length; ++i)
263       types[i] = Character.getDirectionality(text[textOffset + i]);
264   }
265 
266   /**
267    * An internal function which implements rules P2 and P3.
268    * This computes the base embedding level.
269    * @return the paragraph's base embedding level
270    */
computeParagraphEmbeddingLevel()271   private int computeParagraphEmbeddingLevel()
272   {
273     // First check to see if the user supplied a directionality override.
274     if (flags == DIRECTION_LEFT_TO_RIGHT
275         || flags == DIRECTION_RIGHT_TO_LEFT)
276       return flags;
277 
278     // This implements rules P2 and P3.
279     // (Note that we don't need P1, as the user supplies
280     // a paragraph.)
281     for (int i = 0; i < length; ++i)
282       {
283         int dir = types[i];
284         if (dir == Character.DIRECTIONALITY_LEFT_TO_RIGHT)
285           return DIRECTION_LEFT_TO_RIGHT;
286         if (dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT
287             || dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT)
288           return DIRECTION_RIGHT_TO_LEFT;
289       }
290     return (flags == DIRECTION_DEFAULT_LEFT_TO_RIGHT
291             ? DIRECTION_LEFT_TO_RIGHT
292             : DIRECTION_RIGHT_TO_LEFT);
293   }
294 
295   /**
296    * An internal function which implements rules X1 through X9.
297    * This computes the initial levels for the text, handling
298    * explicit overrides and embeddings.
299    */
computeExplicitLevels()300   private void computeExplicitLevels()
301   {
302     levels = new byte[length];
303     byte currentEmbedding = (byte) baseEmbedding;
304     // The directional override is a Character directionality
305     // constant.  -1 means there is no override.
306     byte directionalOverride = -1;
307     // The stack of pushed embeddings, and the stack pointer.
308     // Note that because the direction is inherent in the depth,
309     // and because we have a bit left over in a byte, we can encode
310     // the override, if any, directly in this value on the stack.
311     final int MAX_DEPTH = 62;
312     byte[] embeddingStack = new byte[MAX_DEPTH];
313     int sp = 0;
314 
315     for (int i = 0; i < length; ++i)
316       {
317         // If we see an explicit embedding, we use that, even if
318         // the current character is itself a directional override.
319         if (embeddings != null && embeddings[embeddingOffset + i] != 0)
320           {
321             // It isn't at all clear what we're supposed to do here.
322             // What does a negative value really mean?
323             // Should we push on the embedding stack here?
324             currentEmbedding = embeddings[embeddingOffset + i];
325             if (currentEmbedding < 0)
326               {
327                 currentEmbedding = (byte) -currentEmbedding;
328                 directionalOverride
329                   = (((currentEmbedding % 2) == 0)
330                       ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
331                       : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
332               }
333             else
334               directionalOverride = -1;
335             continue;
336           }
337         // No explicit embedding.
338         boolean isLtoR = false;
339         boolean isSpecial = true;
340         switch (types[i])
341           {
342             case Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING:
343             case Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE:
344               isLtoR = true;
345               // Fall through.
346             case Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING:
347             case Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE:
348               {
349                 byte newEmbedding;
350                 if (isLtoR)
351                   {
352                     // Least greater even.
353                     newEmbedding = (byte) ((currentEmbedding & ~1) + 2);
354                   }
355                 else
356                   {
357                     // Least greater odd.
358                     newEmbedding = (byte) ((currentEmbedding + 1) | 1);
359                   }
360                 // FIXME: we don't properly handle invalid pushes.
361                 if (newEmbedding < MAX_DEPTH)
362                   {
363                     // The new level is valid.  Push the old value.
364                     // See above for a comment on the encoding here.
365                     if (directionalOverride != -1)
366                       currentEmbedding |= Byte.MIN_VALUE;
367                     embeddingStack[sp++] = currentEmbedding;
368                     currentEmbedding = newEmbedding;
369                     if (types[i] == Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE)
370                       directionalOverride = Character.DIRECTIONALITY_LEFT_TO_RIGHT;
371                     else if (types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE)
372                       directionalOverride = Character.DIRECTIONALITY_RIGHT_TO_LEFT;
373                     else
374                       directionalOverride = -1;
375                   }
376                 }
377               break;
378             case Character.DIRECTIONALITY_POP_DIRECTIONAL_FORMAT:
379               {
380                 // FIXME: we don't properly handle a pop with a corresponding
381                 // invalid push.
382                 if (sp == 0)
383                   {
384                     // We saw a pop without a push.  Just ignore it.
385                     break;
386                   }
387                 byte newEmbedding = embeddingStack[--sp];
388                 currentEmbedding = (byte) (newEmbedding & 0x7f);
389                 if (newEmbedding < 0)
390                   directionalOverride
391                   = (((newEmbedding & 1) == 0)
392                       ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
393                       : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
394                 else
395                   directionalOverride = -1;
396               }
397               break;
398             default:
399               isSpecial = false;
400               break;
401           }
402         levels[i] = currentEmbedding;
403         if (isSpecial)
404           {
405             // Mark this character for removal.
406             if (formatterIndices == null)
407               formatterIndices = new ArrayList<Integer>();
408             formatterIndices.add(Integer.valueOf(i));
409           }
410         else if (directionalOverride != -1)
411           types[i] = directionalOverride;
412       }
413 
414     // Remove the formatting codes and update both the arrays
415     // and 'length'.  It would be more efficient not to remove
416     // these codes, but it is also more complicated.  Also, the
417     // Unicode algorithm reference does not properly describe
418     // how this is to be done -- from what I can tell, their suggestions
419     // in this area will not yield the correct results.
420     if (formatterIndices == null)
421       return;
422     int output = 0, input = 0;
423     final int size = formatterIndices.size();
424     for (int i = 0; i <= size; ++i)
425       {
426         int nextFmt;
427         if (i == size)
428           nextFmt = length;
429         else
430           nextFmt = formatterIndices.get(i).intValue();
431         // Non-formatter codes are from 'input' to 'nextFmt'.
432         int len = nextFmt - input;
433         System.arraycopy(levels, input, levels, output, len);
434         System.arraycopy(types, input, types, output, len);
435         output += len;
436         input = nextFmt + 1;
437       }
438     length -= formatterIndices.size();
439   }
440 
441   /**
442    * An internal function to compute the boundaries of runs
443    * in the text.  It isn't strictly necessary to do this, but
444    * it lets us write some following passes in a less complicated
445    * way.  Also it lets us efficiently implement some of the public
446    * methods.  A run is simply a sequence of characters at the
447    * same level.
448    */
computeRuns()449   private void computeRuns()
450   {
451     int runCount = 0;
452     int currentEmbedding = baseEmbedding;
453     for (int i = 0; i < length; ++i)
454       {
455         if (levels[i] != currentEmbedding)
456           {
457             currentEmbedding = levels[i];
458             ++runCount;
459           }
460       }
461 
462     // This may be called multiple times.  If so, and if
463     // the number of runs has not changed, then don't bother
464     // allocating a new array.
465     if (runs == null || runs.length != runCount + 1)
466       runs = new int[runCount + 1];
467     int where = 0;
468     int lastRunStart = 0;
469     currentEmbedding = baseEmbedding;
470     for (int i = 0; i < length; ++i)
471       {
472         if (levels[i] != currentEmbedding)
473           {
474             runs[where++] = lastRunStart;
475             lastRunStart = i;
476             currentEmbedding = levels[i];
477           }
478       }
479     runs[where++] = lastRunStart;
480   }
481 
482   /**
483    * An internal method to resolve weak types.  This implements
484    * rules W1 through W7.
485    */
resolveWeakTypes()486   private void resolveWeakTypes()
487   {
488     final int runCount = getRunCount();
489 
490     int previousLevel = baseEmbedding;
491     for (int run = 0; run < runCount; ++run)
492       {
493         int start = getRunStart(run);
494         int end = getRunLimit(run);
495         int level = getRunLevel(run);
496 
497         // These are the names used in the Bidi algorithm.
498         byte sor = (((Math.max(previousLevel, level) % 2) == 0)
499                       ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
500                       : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
501         int nextLevel;
502         if (run == runCount - 1)
503           nextLevel = baseEmbedding;
504         else
505           nextLevel = getRunLevel(run + 1);
506         byte eor = (((Math.max(level, nextLevel) % 2) == 0)
507                       ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
508                       : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
509 
510         byte prevType = sor;
511         byte prevStrongType = sor;
512         for (int i = start; i < end; ++i)
513           {
514             final byte nextType = (i == end - 1) ? eor : types[i + 1];
515 
516             // Rule W1: change NSM to the prevailing direction.
517             if (types[i] == Character.DIRECTIONALITY_NONSPACING_MARK)
518               types[i] = prevType;
519             else
520               prevType = types[i];
521 
522             // Rule W2: change EN to AN in some cases.
523             if (types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
524               {
525                 if (prevStrongType == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC)
526                   types[i] = Character.DIRECTIONALITY_ARABIC_NUMBER;
527               }
528             else if (types[i] == Character.DIRECTIONALITY_LEFT_TO_RIGHT
529                      || types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT
530                      || types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC)
531               prevStrongType = types[i];
532 
533             // Rule W3: change AL to R.
534             if (types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC)
535               types[i] = Character.DIRECTIONALITY_RIGHT_TO_LEFT;
536 
537             // Rule W4: handle separators between two numbers.
538             if (prevType == Character.DIRECTIONALITY_EUROPEAN_NUMBER
539                 && nextType == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
540               {
541                 if (types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
542                     || types[i] == Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR)
543                   types[i] = nextType;
544               }
545             else if (prevType == Character.DIRECTIONALITY_ARABIC_NUMBER
546                      && nextType == Character.DIRECTIONALITY_ARABIC_NUMBER
547                      && types[i] == Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR)
548               types[i] = nextType;
549 
550             // Rule W5: change a sequence of european terminators to
551             // european numbers, if they are adjacent to european numbers.
552             // We also include BN characters in this.
553             if (types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
554                 || types[i] == Character.DIRECTIONALITY_BOUNDARY_NEUTRAL)
555               {
556                 if (prevType == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
557                   types[i] = prevType;
558                 else
559                   {
560                     // Look ahead to see if there is an EN terminating this
561                     // sequence of ETs.
562                     int j = i + 1;
563                     while (j < end
564                            && (types[j] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
565                                || types[j] == Character.DIRECTIONALITY_BOUNDARY_NEUTRAL))
566                       ++j;
567                     if (j < end
568                         && types[j] == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
569                       {
570                         // Change them all to EN now.
571                         for (int k = i; k < j; ++k)
572                           types[k] = Character.DIRECTIONALITY_EUROPEAN_NUMBER;
573                       }
574                   }
575               }
576 
577             // Rule W6: separators and terminators change to ON.
578             // Again we include BN.
579             if (types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
580                 || types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
581                 || types[i] == Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
582                 || types[i] == Character.DIRECTIONALITY_BOUNDARY_NEUTRAL)
583               types[i] = Character.DIRECTIONALITY_OTHER_NEUTRALS;
584 
585             // Rule W7: change european number types.
586             if (prevStrongType == Character.DIRECTIONALITY_LEFT_TO_RIGHT
587                 && types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
588               types[i] = prevStrongType;
589           }
590 
591         previousLevel = level;
592       }
593   }
594 
595   /**
596    * An internal method to resolve neutral types.  This implements
597    * rules N1 and N2.
598    */
resolveNeutralTypes()599   private void resolveNeutralTypes()
600   {
601     // This implements rules N1 and N2.
602     final int runCount = getRunCount();
603 
604     int previousLevel = baseEmbedding;
605     for (int run = 0; run < runCount; ++run)
606       {
607         int start = getRunStart(run);
608         int end = getRunLimit(run);
609         int level = getRunLevel(run);
610 
611         byte embeddingDirection
612           = (((level % 2) == 0) ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
613                                 : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
614         // These are the names used in the Bidi algorithm.
615         byte sor = (((Math.max(previousLevel, level) % 2) == 0)
616                       ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
617                       : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
618         int nextLevel;
619         if (run == runCount - 1)
620           nextLevel = baseEmbedding;
621         else
622           nextLevel = getRunLevel(run + 1);
623         byte eor = (((Math.max(level, nextLevel) % 2) == 0)
624                       ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
625                       : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
626 
627         byte prevStrong = sor;
628         int neutralStart = -1;
629         for (int i = start; i <= end; ++i)
630           {
631             byte newStrong = -1;
632             byte thisType = i == end ? eor : types[i];
633             switch (thisType)
634               {
635               case Character.DIRECTIONALITY_LEFT_TO_RIGHT:
636                 newStrong = Character.DIRECTIONALITY_LEFT_TO_RIGHT;
637                 break;
638               case Character.DIRECTIONALITY_RIGHT_TO_LEFT:
639               case Character.DIRECTIONALITY_ARABIC_NUMBER:
640               case Character.DIRECTIONALITY_EUROPEAN_NUMBER:
641                 newStrong = Character.DIRECTIONALITY_RIGHT_TO_LEFT;
642                 break;
643               case Character.DIRECTIONALITY_BOUNDARY_NEUTRAL:
644               case Character.DIRECTIONALITY_OTHER_NEUTRALS:
645               case Character.DIRECTIONALITY_SEGMENT_SEPARATOR:
646               case Character.DIRECTIONALITY_PARAGRAPH_SEPARATOR:
647               case Character.DIRECTIONALITY_WHITESPACE:
648                 if (neutralStart == -1)
649                   neutralStart = i;
650                 break;
651               }
652             // If we see a strong character, update all the neutrals.
653             if (newStrong != -1)
654               {
655                 if (neutralStart != -1)
656                   {
657                     byte override = (prevStrong == newStrong
658                                      ? prevStrong
659                                      : embeddingDirection);
660                     for (int j = neutralStart; j < i; ++j)
661                       types[j] = override;
662                   }
663                 prevStrong = newStrong;
664                 neutralStart = -1;
665               }
666           }
667 
668         previousLevel = level;
669       }
670   }
671 
672   /**
673    * An internal method to resolve implicit levels.
674    * This implements rules I1 and I2.
675    */
resolveImplicitLevels()676   private void resolveImplicitLevels()
677   {
678     // This implements rules I1 and I2.
679     for (int i = 0; i < length; ++i)
680       {
681         if ((levels[i] & 1) == 0)
682           {
683             if (types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT)
684               ++levels[i];
685             else if (types[i] == Character.DIRECTIONALITY_ARABIC_NUMBER
686                      || types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
687               levels[i] += 2;
688           }
689         else
690           {
691             if (types[i] == Character.DIRECTIONALITY_LEFT_TO_RIGHT
692                 || types[i] == Character.DIRECTIONALITY_ARABIC_NUMBER
693                 || types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
694               ++levels[i];
695           }
696 
697         // Update the result flags.
698         resultFlags |= 1 << (levels[i] & 1);
699       }
700     // One final update of the result flags, using the base level.
701     resultFlags |= 1 << baseEmbedding;
702   }
703 
704   /**
705    * This reinserts the formatting codes that we removed early on.
706    * Actually it does not insert formatting codes per se, but rather
707    * simply inserts new levels at the appropriate locations in the
708    * 'levels' array.
709    */
reinsertFormattingCodes()710   private void reinsertFormattingCodes()
711   {
712     if (formatterIndices == null)
713       return;
714     int input = length;
715     int output = levels.length;
716     // Process from the end as we are copying the array over itself here.
717     for (int index = formatterIndices.size() - 1; index >= 0; --index)
718       {
719         int nextFmt = formatterIndices.get(index).intValue();
720 
721         // nextFmt points to a location in the original array.  So,
722         // nextFmt+1 is the target of our copying.  output is the location
723         // to which we last copied, thus we can derive the length of the
724         // copy from it.
725         int len = output - nextFmt - 1;
726         output = nextFmt;
727         input -= len;
728         // Note that we no longer need 'types' at this point, so we
729         // only edit 'levels'.
730         if (nextFmt + 1 < levels.length)
731           System.arraycopy(levels, input, levels, nextFmt + 1, len);
732 
733         // Now set the level at the reinsertion point.
734         int rightLevel;
735         if (output == levels.length - 1)
736           rightLevel = baseEmbedding;
737         else
738           rightLevel = levels[output + 1];
739         int leftLevel;
740         if (input == 0)
741           leftLevel = baseEmbedding;
742         else
743           leftLevel = levels[input];
744         levels[output] = (byte) Math.max(leftLevel, rightLevel);
745       }
746     length = levels.length;
747   }
748 
749   /**
750    * This is the main internal entry point.  After a constructor
751    * has initialized the appropriate local state, it will call
752    * this method to do all the work.
753    */
runBidi()754   private void runBidi()
755   {
756     computeTypes();
757     baseEmbedding = computeParagraphEmbeddingLevel();
758     computeExplicitLevels();
759     computeRuns();
760     resolveWeakTypes();
761     resolveNeutralTypes();
762     resolveImplicitLevels();
763     // We're done with the types.  Let the GC clean up.
764     types = null;
765     reinsertFormattingCodes();
766     // After resolving the implicit levels, the number
767     // of runs may have changed.
768     computeRuns();
769   }
770 
771   /**
772    * Return true if the paragraph base embedding is left-to-right,
773    * false otherwise.
774    */
baseIsLeftToRight()775   public boolean baseIsLeftToRight()
776   {
777     return baseEmbedding == DIRECTION_LEFT_TO_RIGHT;
778   }
779 
780   /**
781    * Create a new Bidi object for a single line of text, taken
782    * from the text used when creating the current Bidi object.
783    * @param start the index of the first character of the line
784    * @param end the index of the final character of the line
785    * @return a new Bidi object for the indicated line of text
786    */
createLineBidi(int start, int end)787   public Bidi createLineBidi(int start, int end)
788   {
789     // This isn't the most efficient implementation possible.
790     // This probably does not matter, so we choose simplicity instead.
791     int level = getLevelAt(start);
792     int flag = (((level % 2) == 0)
793                 ? DIRECTION_LEFT_TO_RIGHT
794                 : DIRECTION_RIGHT_TO_LEFT);
795     return new Bidi(text, textOffset + start,
796                     embeddings, embeddingOffset + start,
797                     end - start, flag);
798   }
799 
800   /**
801    * Return the base embedding level of the paragraph.
802    */
getBaseLevel()803   public int getBaseLevel()
804   {
805     return baseEmbedding;
806   }
807 
808   /**
809    * Return the length of the paragraph, in characters.
810    */
getLength()811   public int getLength()
812   {
813     return length;
814   }
815 
816   /**
817    * Return the level at the indicated character.  If the
818    * supplied index is less than zero or greater than the length
819    * of the text, then the paragraph's base embedding level will
820    * be returned.
821    * @param offset the character to examine
822    * @return the level of that character
823    */
getLevelAt(int offset)824   public int getLevelAt(int offset)
825   {
826     if (offset < 0 || offset >= length)
827       return getBaseLevel();
828     return levels[offset];
829   }
830 
831   /**
832    * Return the number of runs in the result.  A run is
833    * a sequence of characters at the same embedding level.
834    */
getRunCount()835   public int getRunCount()
836   {
837     return runs.length;
838   }
839 
840   /**
841    * Return the level of the indicated run.
842    * @param which the run to examine
843    * @return the level of that run
844    */
getRunLevel(int which)845   public int getRunLevel(int which)
846   {
847     return levels[runs[which]];
848   }
849 
850   /**
851    * Return the index of the character just following the end
852    * of the indicated run.
853    * @param which the run to examine
854    * @return the index of the character after the final character
855    * of the run
856    */
getRunLimit(int which)857   public int getRunLimit(int which)
858   {
859     if (which == runs.length - 1)
860       return length;
861     return runs[which + 1];
862   }
863 
864   /**
865    * Return the index of the first character in the indicated run.
866    * @param which the run to examine
867    * @return the index of the first character of the run
868    */
getRunStart(int which)869   public int getRunStart(int which)
870   {
871     return runs[which];
872   }
873 
874   /**
875    * Return true if the text is entirely left-to-right, and the
876    * base embedding is also left-to-right.
877    */
isLeftToRight()878   public boolean isLeftToRight()
879   {
880     return resultFlags == LTOR;
881   }
882 
883   /**
884    * Return true if the text consists of mixed left-to-right and
885    * right-to-left runs, or if the text consists of one kind of run
886    * which differs from the base embedding direction.
887    */
isMixed()888   public boolean isMixed()
889   {
890     return resultFlags == (LTOR | RTOL);
891   }
892 
893   /**
894    * Return true if the text is entirely right-to-left, and the
895    * base embedding is also right-to-left.
896    */
isRightToLeft()897   public boolean isRightToLeft()
898   {
899     return resultFlags == RTOL;
900   }
901 
902   /**
903    * Return a String describing the internal state of this object.
904    * This is only useful for debugging.
905    */
toString()906   public String toString()
907   {
908     return "Bidi Bidi Bidi I like you, Buck!";
909   }
910 
911   /**
912    * Reorder objects according to the levels passed in.  This implements
913    * reordering as defined by the Unicode bidirectional layout specification.
914    * The levels are integers from 0 to 62; even numbers represent left-to-right
915    * runs, and odd numbers represent right-to-left runs.
916    *
917    * @param levels the levels associated with each object
918    * @param levelOffset the index of the first level to use
919    * @param objs the objects to reorder according to the levels
920    * @param objOffset the index of the first object to use
921    * @param count the number of objects (and levels) to manipulate
922    */
reorderVisually(byte[] levels, int levelOffset, Object[] objs, int objOffset, int count)923   public static void reorderVisually(byte[] levels, int levelOffset,
924                                      Object[] objs, int objOffset, int count)
925   {
926     // We need a copy of the 'levels' array, as we are going to modify it.
927     // This is unfortunate but difficult to avoid.
928     byte[] levelCopy = new byte[count];
929     // Do this explicitly so we can also find the maximum depth at the
930     // same time.
931     int max = 0;
932     int lowestOdd = 63;
933     for (int i = 0; i < count; ++i)
934       {
935         levelCopy[i] = levels[levelOffset + i];
936         max = Math.max(levelCopy[i], max);
937         if (levelCopy[i] % 2 != 0)
938           lowestOdd = Math.min(lowestOdd, levelCopy[i]);
939       }
940 
941     // Reverse the runs starting with the deepest.
942     for (int depth = max; depth >= lowestOdd; --depth)
943       {
944         int start = 0;
945         while (start < count)
946           {
947             // Find the start of a run >= DEPTH.
948             while (start < count && levelCopy[start] < depth)
949               ++start;
950             if (start == count)
951               break;
952             // Find the end of the run.
953             int end = start + 1;
954             while (end < count && levelCopy[end] >= depth)
955               ++end;
956 
957             // Reverse this run.
958             for (int i = 0; i < (end - start) / 2; ++i)
959               {
960                 byte tmpb = levelCopy[end - i - 1];
961                 levelCopy[end - i - 1] = levelCopy[start + i];
962                 levelCopy[start + i] = tmpb;
963                 Object tmpo = objs[objOffset + end - i - 1];
964                 objs[objOffset + end - i - 1] = objs[objOffset + start + i];
965                 objs[objOffset + start + i] = tmpo;
966               }
967 
968             // Handle the next run.
969             start = end + 1;
970           }
971       }
972   }
973 
974   /**
975    * Returns false if all characters in the text between start and end
976    * are all left-to-right text. This implementation is just calls
977    * <code>Character.getDirectionality(char)</code> on all characters
978    * and makes sure all characters are either explicitly left-to-right
979    * or neutral in directionality (character types L, EN, ES, ET, AN,
980    * CS, S and WS).
981    */
requiresBidi(char[] text, int start, int end)982   public static boolean requiresBidi(char[] text, int start, int end)
983   {
984     for (int i = start; i < end; i++)
985       {
986         byte dir = Character.getDirectionality(text[i]);
987         if (dir != Character.DIRECTIONALITY_LEFT_TO_RIGHT
988             && dir != Character.DIRECTIONALITY_EUROPEAN_NUMBER
989             && dir != Character.DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
990             && dir != Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
991             && dir != Character.DIRECTIONALITY_ARABIC_NUMBER
992             && dir != Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
993             && dir != Character.DIRECTIONALITY_SEGMENT_SEPARATOR
994             && dir != Character.DIRECTIONALITY_WHITESPACE
995             && dir != Character.DIRECTIONALITY_PARAGRAPH_SEPARATOR)
996           return true;
997       }
998 
999     return false;
1000   }
1001 }
1002