1 /******************************************************************************* 2 * Copyright (c) 2010, 2011 IBM Corporation and others. 3 * 4 * This program and the accompanying materials 5 * are made available under the terms of the Eclipse Public License 2.0 6 * which accompanies this distribution, and is available at 7 * https://www.eclipse.org/legal/epl-2.0/ 8 * 9 * SPDX-License-Identifier: EPL-2.0 10 * 11 * Contributors: 12 * IBM Corporation - initial API and implementation 13 ******************************************************************************/ 14 package org.eclipse.equinox.bidi.internal; 15 16 import org.eclipse.equinox.bidi.advanced.*; 17 import org.eclipse.equinox.bidi.custom.*; 18 19 /** 20 * Implementation for IStructuredTextExpert. 21 */ 22 public class StructuredTextImpl implements IStructuredTextExpert { 23 24 static final String EMPTY_STRING = ""; //$NON-NLS-1$ 25 26 // In the following lines, B, L, R and AL represent bidi categories 27 // as defined in the Unicode Bidirectional Algorithm 28 // ( http://www.unicode.org/reports/tr9/ ). 29 // B represents the category Block Separator. 30 // L represents the category Left to Right character. 31 // R represents the category Right to Left character. 32 // AL represents the category Arabic Letter. 33 // AN represents the category Arabic Number. 34 // EN represents the category European Number. 35 static final byte B = Character.DIRECTIONALITY_PARAGRAPH_SEPARATOR; 36 static final byte L = Character.DIRECTIONALITY_LEFT_TO_RIGHT; 37 static final byte R = Character.DIRECTIONALITY_RIGHT_TO_LEFT; 38 static final byte AL = Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC; 39 static final byte AN = Character.DIRECTIONALITY_ARABIC_NUMBER; 40 static final byte EN = Character.DIRECTIONALITY_EUROPEAN_NUMBER; 41 42 static final char LRM = 0x200E; 43 static final char RLM = 0x200F; 44 static final char LRE = 0x202A; 45 static final char RLE = 0x202B; 46 static final char PDF = 0x202C; 47 static final char[] MARKS = {LRM, RLM}; 48 static final char[] EMBEDS = {LRE, RLE}; 49 static final int PREFIX_LENGTH = 2; 50 static final int SUFFIX_LENGTH = 2; 51 static final int FIXES_LENGTH = PREFIX_LENGTH + SUFFIX_LENGTH; 52 static final int[] EMPTY_INT_ARRAY = new int[0]; 53 54 /** 55 * The structured text handler utilized by this expert. 56 */ 57 protected final StructuredTextTypeHandler handler; 58 /** 59 * The environment associated with the expert. 60 */ 61 protected final StructuredTextEnvironment environment; 62 /** 63 * Flag which is true if the expert is stateful. 64 */ 65 protected final boolean sharedExpert; 66 /** 67 * Last state value set by {@link #setState} or {@link #clearState}. 68 */ 69 protected Object state; 70 71 /** 72 * Constructor used in {@link StructuredTextExpertFactory}. 73 * 74 * @param structuredTextHandler the structured text handler used by this expert. 75 * @param environment the environment associated with this expert. 76 * @param shared flag which is true if the expert is stateful. 77 */ StructuredTextImpl(StructuredTextTypeHandler structuredTextHandler, StructuredTextEnvironment environment, boolean shared)78 public StructuredTextImpl(StructuredTextTypeHandler structuredTextHandler, StructuredTextEnvironment environment, boolean shared) { 79 this.handler = structuredTextHandler; 80 this.environment = environment; 81 sharedExpert = shared; 82 } 83 84 @Override getTypeHandler()85 public StructuredTextTypeHandler getTypeHandler() { 86 return handler; 87 } 88 89 @Override getEnvironment()90 public StructuredTextEnvironment getEnvironment() { 91 return environment; 92 } 93 94 @Override getTextDirection(String text)95 public int getTextDirection(String text) { 96 return handler.getDirection(this, text); 97 } 98 99 @Override clearState()100 public void clearState() { 101 if (sharedExpert) 102 state = null; 103 } 104 105 @Override setState(Object newState)106 public void setState(Object newState) { 107 if (sharedExpert) 108 state = newState; 109 } 110 111 @Override getState()112 public Object getState() { 113 return state; 114 } 115 computeNextLocation(String text, StructuredTextCharTypes charTypes, StructuredTextOffsets offsets, int[] locations, int curPos)116 long computeNextLocation(String text, StructuredTextCharTypes charTypes, StructuredTextOffsets offsets, int[] locations, int curPos) { 117 String separators = handler.getSeparators(this); 118 int separCount = separators.length(); 119 int specialsCount = handler.getSpecialsCount(this); 120 int len = text.length(); 121 int nextLocation = len; 122 int idxLocation = 0; 123 // Start with special sequences to give them precedence over simple 124 // separators. This may apply to cases like slash+asterisk versus slash. 125 for (int i = 0; i < specialsCount; i++) { 126 int location = locations[separCount + i]; 127 if (location < curPos) { 128 location = handler.indexOfSpecial(this, text, charTypes, offsets, i + 1, curPos); 129 if (location < 0) 130 location = len; 131 locations[separCount + i] = location; 132 } 133 if (location < nextLocation) { 134 nextLocation = location; 135 idxLocation = separCount + i; 136 } 137 } 138 for (int i = 0; i < separCount; i++) { 139 int location = locations[i]; 140 if (location < curPos) { 141 location = text.indexOf(separators.charAt(i), curPos); 142 if (location < 0) 143 location = len; 144 locations[i] = location; 145 } 146 if (location < nextLocation) { 147 nextLocation = location; 148 idxLocation = i; 149 } 150 } 151 return nextLocation + (((long) idxLocation) << 32); 152 } 153 154 /** 155 * @see StructuredTextTypeHandler#processSeparator StructuredTextTypeHandler.processSeparator 156 */ processSeparator(String text, StructuredTextCharTypes charTypes, StructuredTextOffsets offsets, int separLocation)157 static public void processSeparator(String text, StructuredTextCharTypes charTypes, StructuredTextOffsets offsets, int separLocation) { 158 int len = text.length(); 159 int direction = charTypes.getDirection(); 160 if (direction == DIR_RTL) { 161 // the structured text base direction is RTL 162 for (int i = separLocation - 1; i >= 0; i--) { 163 byte charType = charTypes.getBidiTypeAt(i); 164 if (charType == R || charType == AL) 165 return; 166 if (charType == L) { 167 for (int j = separLocation; j < len; j++) { 168 charType = charTypes.getBidiTypeAt(j); 169 if (charType == R || charType == AL) 170 return; 171 if (charType == L || charType == EN) { 172 offsets.insertOffset(charTypes, separLocation); 173 return; 174 } 175 } 176 return; 177 } 178 } 179 return; 180 } 181 182 // the structured text base direction is LTR 183 boolean doneAN = false; 184 for (int i = separLocation - 1; i >= 0; i--) { 185 byte charType = charTypes.getBidiTypeAt(i); 186 if (charType == L) 187 return; 188 if (charType == R || charType == AL) { 189 for (int j = separLocation; j < len; j++) { 190 charType = charTypes.getBidiTypeAt(j); 191 if (charType == L) 192 return; 193 if (charType == R || charType == EN || charType == AL || charType == AN) { 194 offsets.insertOffset(charTypes, separLocation); 195 return; 196 } 197 } 198 return; 199 } 200 if (charType == AN && !doneAN) { 201 for (int j = separLocation; j < len; j++) { 202 charType = charTypes.getBidiTypeAt(j); 203 if (charType == L) 204 return; 205 if (charType == AL || charType == AN || charType == R) { 206 offsets.insertOffset(charTypes, separLocation); 207 return; 208 } 209 } 210 doneAN = true; 211 } 212 } 213 } 214 215 /** 216 * When the orientation is <code>ORIENT_LTR</code> and the 217 * structured text has a RTL base direction, 218 * {@link IStructuredTextExpert#leanToFullText leanToFullText} 219 * adds RLE+RLM at the head of the <i>full</i> text and RLM+PDF at its 220 * end. 221 * <p> 222 * When the orientation is <code>ORIENT_RTL</code> and the 223 * structured text has a LTR base direction, 224 * {@link IStructuredTextExpert#leanToFullText leanToFullText} 225 * adds LRE+LRM at the head of the <i>full</i> text and LRM+PDF at its 226 * end. 227 * <p> 228 * When the orientation is <code>ORIENT_CONTEXTUAL_LTR</code> or 229 * <code>ORIENT_CONTEXTUAL_RTL</code> and the data content would resolve 230 * to a RTL orientation while the structured text has a LTR base 231 * direction, {@link IStructuredTextExpert#leanToFullText leanToFullText} 232 * adds LRM at the head of the <i>full</i> text. 233 * <p> 234 * When the orientation is <code>ORIENT_CONTEXTUAL_LTR</code> or 235 * <code>ORIENT_CONTEXTUAL_RTL</code> and the data content would resolve 236 * to a LTR orientation while the structured text has a RTL base 237 * direction, {@link IStructuredTextExpert#leanToFullText leanToFullText} 238 * adds RLM at the head of the <i>full</i> text. 239 * <p> 240 * When the orientation is <code>ORIENT_UNKNOWN</code> and the 241 * structured text has a LTR base direction, 242 * {@link IStructuredTextExpert#leanToFullText leanToFullText} 243 * adds LRE+LRM at the head of the <i>full</i> text and LRM+PDF at its 244 * end. 245 * <p> 246 * When the orientation is <code>ORIENT_UNKNOWN</code> and the 247 * structured text has a RTL base direction, 248 * {@link IStructuredTextExpert#leanToFullText leanToFullText} 249 * adds RLE+RLM at the head of the <i>full</i> text and RLM+PDF at its 250 * end. 251 * <p> 252 * When the orientation is <code>ORIENT_IGNORE</code>, 253 * {@link IStructuredTextExpert#leanToFullText leanToFullText} does not add any directional 254 * formatting characters as either prefix or suffix of the <i>full</i> text. 255 * <p> 256 */ 257 @Override leanToFullText(String text)258 public String leanToFullText(String text) { 259 int len = text.length(); 260 if (len == 0) 261 return text; 262 StructuredTextCharTypes charTypes = new StructuredTextCharTypes(this, text); 263 StructuredTextOffsets offsets = leanToFullCommon(text, charTypes); 264 int prefixLength = offsets.getPrefixLength(); 265 int direction = charTypes.getDirection(); 266 return insertMarks(text, offsets.getOffsets(), direction, prefixLength); 267 } 268 269 @Override leanToFullMap(String text)270 public int[] leanToFullMap(String text) { 271 int len = text.length(); 272 if (len == 0) 273 return EMPTY_INT_ARRAY; 274 StructuredTextCharTypes charTypes = new StructuredTextCharTypes(this, text); 275 StructuredTextOffsets offsets = leanToFullCommon(text, charTypes); 276 int prefixLength = offsets.getPrefixLength(); 277 int[] map = new int[len]; 278 int count = offsets.getCount(); // number of used entries 279 int added = prefixLength; 280 for (int pos = 0, i = 0; pos < len; pos++) { 281 if (i < count && pos == offsets.getOffset(i)) { 282 added++; 283 i++; 284 } 285 map[pos] = pos + added; 286 } 287 return map; 288 } 289 290 @Override leanBidiCharOffsets(String text)291 public int[] leanBidiCharOffsets(String text) { 292 int len = text.length(); 293 if (len == 0) 294 return EMPTY_INT_ARRAY; 295 StructuredTextCharTypes charTypes = new StructuredTextCharTypes(this, text); 296 StructuredTextOffsets offsets = leanToFullCommon(text, charTypes); 297 return offsets.getOffsets(); 298 } 299 leanToFullCommon(String text, StructuredTextCharTypes charTypes)300 private StructuredTextOffsets leanToFullCommon(String text, StructuredTextCharTypes charTypes) { 301 int len = text.length(); 302 int direction = handler.getDirection(this, text, charTypes); 303 StructuredTextOffsets offsets = new StructuredTextOffsets(); 304 if (!handler.skipProcessing(this, text, charTypes)) { 305 // initialize locations 306 int separCount = handler.getSeparators(this).length(); 307 int[] locations = new int[separCount + handler.getSpecialsCount(this)]; 308 for (int i = 0, k = locations.length; i < k; i++) { 309 locations[i] = -1; 310 } 311 // current position 312 int curPos = 0; 313 if (state != null) { 314 curPos = handler.processSpecial(this, text, charTypes, offsets, 0, -1); 315 } 316 while (true) { 317 // location of next token to handle 318 int nextLocation; 319 // index of next token to handle (if < separCount, this is a separator; otherwise a special case 320 int idxLocation; 321 long res = computeNextLocation(text, charTypes, offsets, locations, curPos); 322 nextLocation = (int) (res & 0x00000000FFFFFFFF); /* low word */ 323 if (nextLocation >= len) 324 break; 325 idxLocation = (int) (res >> 32); /* high word */ 326 if (idxLocation < separCount) { 327 processSeparator(text, charTypes, offsets, nextLocation); 328 curPos = nextLocation + 1; 329 } else { 330 idxLocation -= (separCount - 1); // because caseNumber starts from 1 331 curPos = handler.processSpecial(this, text, charTypes, offsets, idxLocation, nextLocation); 332 } 333 if (curPos >= len) 334 break; 335 } // end while 336 } // end if (!handler.skipProcessing()) 337 int prefixLength; 338 int orientation = environment.getOrientation(); 339 if (orientation == StructuredTextEnvironment.ORIENT_IGNORE) 340 prefixLength = 0; 341 else { 342 int resolvedOrientation = charTypes.resolveOrientation(); 343 if (orientation != StructuredTextEnvironment.ORIENT_UNKNOWN && resolvedOrientation == direction) 344 prefixLength = 0; 345 else if ((orientation & StructuredTextEnvironment.ORIENT_CONTEXTUAL) != 0) 346 prefixLength = 1; 347 else 348 prefixLength = 2; 349 } 350 offsets.setPrefixLength(prefixLength); 351 return offsets; 352 } 353 354 @Override fullToLeanText(String full)355 public String fullToLeanText(String full) { 356 if (full.length() == 0) 357 return full; 358 int dir = handler.getDirection(this, full); 359 char curMark = MARKS[dir]; 360 char curEmbed = EMBEDS[dir]; 361 int i; // used as loop index 362 // remove any prefix and leading mark 363 int lenFull = full.length(); 364 for (i = 0; i < lenFull; i++) { 365 char c = full.charAt(i); 366 if (c != curEmbed && c != curMark) 367 break; 368 } 369 if (i > 0) { // found at least one prefix or leading mark 370 full = full.substring(i); 371 lenFull = full.length(); 372 } 373 // remove any suffix and trailing mark 374 for (i = lenFull - 1; i >= 0; i--) { 375 char c = full.charAt(i); 376 if (c != PDF && c != curMark) 377 break; 378 } 379 if (i < 0) // only suffix and trailing marks, no real data 380 return EMPTY_STRING; 381 if (i < (lenFull - 1)) { // found at least one suffix or trailing mark 382 full = full.substring(0, i + 1); 383 lenFull = full.length(); 384 } 385 char[] chars = full.toCharArray(); 386 // remove marks from chars 387 int cnt = 0; 388 for (i = 0; i < lenFull; i++) { 389 char c = chars[i]; 390 if (c == curMark) 391 cnt++; 392 else if (cnt > 0) 393 chars[i - cnt] = c; 394 } 395 String lean = new String(chars, 0, lenFull - cnt); 396 String full2 = leanToFullText(lean); 397 // strip prefix and suffix 398 int beginIndex = 0, endIndex = full2.length(); 399 if (full2.charAt(0) == curMark) 400 beginIndex = 1; 401 else { 402 if (full2.charAt(0) == curEmbed) { 403 beginIndex = 1; 404 if (full2.charAt(0) == curMark) 405 beginIndex = 2; 406 } 407 if (full2.charAt(endIndex - 1) == PDF) { 408 endIndex--; 409 if (full2.charAt(endIndex - 1) == curMark) 410 endIndex--; 411 } 412 } 413 if (beginIndex > 0 || endIndex < full2.length()) 414 full2 = full2.substring(beginIndex, endIndex); 415 if (full2.equals(full)) 416 return lean; 417 418 // There are some marks in full which are not in full2 and/or vice versa. 419 // We need to add to lean any mark appearing in full and not in full2. 420 // The completed lean can never be longer than full itself. 421 char[] newChars = new char[lenFull]; 422 char cFull, cFull2; 423 int idxFull, idxFull2, idxLean, newCharsPos; 424 int lenFull2 = full2.length(); 425 idxFull = idxFull2 = idxLean = newCharsPos = 0; 426 while (idxFull < lenFull && idxFull2 < lenFull2) { 427 cFull2 = full2.charAt(idxFull2); 428 cFull = full.charAt(idxFull); 429 if (cFull2 == cFull) { /* chars are equal, proceed */ 430 if (cFull2 != curMark) 431 newChars[newCharsPos++] = chars[idxLean++]; 432 idxFull++; 433 idxFull2++; 434 continue; 435 } 436 if (cFull2 == curMark) { /* extra Mark in full2 text */ 437 idxFull2++; 438 continue; 439 } 440 if (cFull == curMark) { /* extra Mark in source full text */ 441 idxFull++; 442 // idxFull-2 always >= 0 since leading Marks were removed from full 443 if (full.charAt(idxFull - 2) == curMark) 444 continue; // ignore successive Marks in full after the first one 445 newChars[newCharsPos++] = curMark; 446 continue; 447 } 448 // we should never get here (extra char which is not a Mark) 449 throw new IllegalStateException("Internal error: extra character not a Mark."); //$NON-NLS-1$ 450 } 451 if (idxFull < lenFull) /* full2 ended before full - this should never happen since 452 we removed all marks and PDFs at the end of full */ 453 throw new IllegalStateException("Internal error: unexpected EOL."); //$NON-NLS-1$ 454 455 lean = new String(newChars, 0, newCharsPos); 456 return lean; 457 } 458 459 @Override fullToLeanMap(String full)460 public int[] fullToLeanMap(String full) { 461 int lenFull = full.length(); 462 if (lenFull == 0) 463 return EMPTY_INT_ARRAY; 464 String lean = fullToLeanText(full); 465 int lenLean = lean.length(); 466 int dir = handler.getDirection(this, lean); 467 char curMark = MARKS[dir]; 468 char curEmbed = EMBEDS[dir]; 469 int[] map = new int[lenFull]; 470 int idxFull, idxLean; 471 // skip any prefix and leading mark 472 for (idxFull = 0; idxFull < lenFull; idxFull++) { 473 char c = full.charAt(idxFull); 474 if (c != curEmbed && c != curMark) 475 break; 476 map[idxFull] = -1; 477 } 478 // lean must be a subset of Full, so we only check on iLean < leanLen 479 for (idxLean = 0; idxLean < lenLean; idxFull++) { 480 if (full.charAt(idxFull) == lean.charAt(idxLean)) { 481 map[idxFull] = idxLean; 482 idxLean++; 483 } else 484 map[idxFull] = -1; 485 } 486 for (; idxFull < lenFull; idxFull++) 487 map[idxFull] = -1; 488 return map; 489 } 490 491 @Override fullBidiCharOffsets(String full)492 public int[] fullBidiCharOffsets(String full) { 493 int lenFull = full.length(); 494 if (lenFull == 0) 495 return EMPTY_INT_ARRAY; 496 String lean = fullToLeanText(full); 497 StructuredTextOffsets offsets = new StructuredTextOffsets(); 498 int lenLean = lean.length(); 499 int idxLean, idxFull; 500 // lean must be a subset of Full, so we only check on iLean < leanLen 501 for (idxLean = idxFull = 0; idxLean < lenLean; idxFull++) { 502 if (full.charAt(idxFull) == lean.charAt(idxLean)) 503 idxLean++; 504 else 505 offsets.insertOffset(null, idxFull); 506 } 507 for (; idxFull < lenFull; idxFull++) 508 offsets.insertOffset(null, idxFull); 509 return offsets.getOffsets(); 510 } 511 512 @Override insertMarks(String text, int[] offsets, int direction, int affixLength)513 public String insertMarks(String text, int[] offsets, int direction, int affixLength) { 514 if (direction != DIR_LTR && direction != DIR_RTL) 515 throw new IllegalArgumentException("Invalid direction"); //$NON-NLS-1$ 516 if (affixLength < 0 || affixLength > 2) 517 throw new IllegalArgumentException("Invalid affix length"); //$NON-NLS-1$ 518 int count = offsets == null ? 0 : offsets.length; 519 if (count == 0 && affixLength == 0) 520 return text; 521 int textLength = text.length(); 522 if (textLength == 0) 523 return text; 524 int newLen = textLength + count; 525 if (affixLength == 1) 526 newLen++; /* +1 for a mark char */ 527 else if (affixLength == 2) 528 newLen += FIXES_LENGTH; 529 char[] fullChars = new char[newLen]; 530 int added = affixLength; 531 // add marks at offsets 532 char curMark = MARKS[direction]; 533 for (int i = 0, j = 0; i < textLength; i++) { 534 char c = text.charAt(i); 535 if (j < count && i == offsets[j]) { 536 fullChars[i + added] = curMark; 537 added++; 538 j++; 539 } 540 fullChars[i + added] = c; 541 } 542 if (affixLength > 0) { /* add prefix/suffix ? */ 543 if (affixLength == 1) { /* contextual orientation */ 544 fullChars[0] = curMark; 545 } else { 546 // When the orientation is RTL, we need to add EMBED at the 547 // start of the text and PDF at its end. 548 // However, because of a bug in Windows' handling of LRE/RLE/PDF, 549 // we add LRM or RLM (according to the direction) after the 550 // LRE/RLE and again before the PDF. 551 char curEmbed = EMBEDS[direction]; 552 fullChars[0] = curEmbed; 553 fullChars[1] = curMark; 554 fullChars[newLen - 1] = PDF; 555 fullChars[newLen - 2] = curMark; 556 } 557 } 558 return new String(fullChars); 559 } 560 561 @Override toString()562 public String toString() { 563 return super.toString() + " [handler=" + handler.toString() + "]"; //$NON-NLS-1$ //$NON-NLS-2$ 564 } 565 } 566