1 /* 2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 3 * 4 * This code is free software; you can redistribute it and/or modify it 5 * under the terms of the GNU General Public License version 2 only, as 6 * published by the Free Software Foundation. Oracle designates this 7 * particular file as subject to the "Classpath" exception as provided 8 * by Oracle in the LICENSE file that accompanied this code. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 /* 27 ******************************************************************************* 28 * 29 * Copyright (C) 1999-2003, International Business Machines 30 * Corporation and others. All Rights Reserved. 31 * 32 ******************************************************************************* 33 */ 34 35 package sun.font; 36 37 /** 38 * {@code ScriptRun} is used to find runs of characters in 39 * the same script, as defined in the {@code Script} class. 40 * It implements a simple iterator over an array of characters. 41 * The iterator will assign {@code COMMON} and {@code INHERITED} 42 * characters to the same script as the preceding characters. If the 43 * COMMON and INHERITED characters are first, they will be assigned to 44 * the same script as the following characters. 45 * 46 * The iterator will try to match paired punctuation. If it sees an 47 * opening punctuation character, it will remember the script that 48 * was assigned to that character, and assign the same script to the 49 * matching closing punctuation. 50 * 51 * No attempt is made to combine related scripts into a single run. In 52 * particular, Hiragana, Katakana, and Han characters will appear in seperate 53 * runs. 54 55 * Here is an example of how to iterate over script runs: 56 * <pre> 57 * void printScriptRuns(char[] text) 58 * { 59 * ScriptRun scriptRun = new ScriptRun(text, 0, text.length); 60 * 61 * while (scriptRun.next()) { 62 * int start = scriptRun.getScriptStart(); 63 * int limit = scriptRun.getScriptLimit(); 64 * int script = scriptRun.getScriptCode(); 65 * 66 * System.out.println("Script \"" + Script.getName(script) + "\" from " + 67 * start + " to " + limit + "."); 68 * } 69 * } 70 * </pre> 71 * 72 */ 73 public final class ScriptRun 74 { 75 private char[] text; // fixed once set by constructor 76 private int textStart; 77 private int textLimit; 78 79 private int scriptStart; // change during iteration 80 private int scriptLimit; 81 private int scriptCode; 82 83 private int[] stack; // stack used to handle paired punctuation if encountered 84 private int parenSP; 85 ScriptRun()86 public ScriptRun() { 87 // must call init later or we die. 88 } 89 90 /** 91 * Construct a {@code ScriptRun} object which iterates over a subrange 92 * of the given characetrs. 93 * 94 * @param chars the array of characters over which to iterate. 95 * @param start the index of the first character over which to iterate 96 * @param count the number of characters over which to iterate 97 */ ScriptRun(char[] chars, int start, int count)98 public ScriptRun(char[] chars, int start, int count) 99 { 100 init(chars, start, count); 101 } 102 init(char[] chars, int start, int count)103 public void init(char[] chars, int start, int count) 104 { 105 if (chars == null || start < 0 || count < 0 || count > chars.length - start) { 106 throw new IllegalArgumentException(); 107 } 108 109 text = chars; 110 textStart = start; 111 textLimit = start + count; 112 113 scriptStart = textStart; 114 scriptLimit = textStart; 115 scriptCode = Script.INVALID_CODE; 116 parenSP = 0; 117 } 118 119 /** 120 * Get the starting index of the current script run. 121 * 122 * @return the index of the first character in the current script run. 123 */ getScriptStart()124 public int getScriptStart() { 125 return scriptStart; 126 } 127 128 /** 129 * Get the index of the first character after the current script run. 130 * 131 * @return the index of the first character after the current script run. 132 */ getScriptLimit()133 public int getScriptLimit() { 134 return scriptLimit; 135 } 136 137 /** 138 * Get the script code for the script of the current script run. 139 * 140 * @return the script code for the script of the current script run. 141 * @see Script 142 */ getScriptCode()143 public int getScriptCode() { 144 return scriptCode; 145 } 146 147 /** 148 * Find the next script run. Returns {@code false} if there 149 * isn't another run, returns {@code true} if there is. 150 * 151 * @return {@code false} if there isn't another run, {@code true} if there is. 152 */ next()153 public boolean next() { 154 int startSP = parenSP; // used to find the first new open character 155 156 // if we've fallen off the end of the text, we're done 157 if (scriptLimit >= textLimit) { 158 return false; 159 } 160 161 scriptCode = Script.COMMON; 162 scriptStart = scriptLimit; 163 164 int ch; 165 166 while ((ch = nextCodePoint()) != DONE) { 167 int sc = ScriptRunData.getScript(ch); 168 int pairIndex = sc == Script.COMMON ? getPairIndex(ch) : -1; 169 170 // Paired character handling: 171 // 172 // if it's an open character, push it onto the stack. 173 // if it's a close character, find the matching open on the 174 // stack, and use that script code. Any non-matching open 175 // characters above it on the stack will be popped. 176 if (pairIndex >= 0) { 177 if ((pairIndex & 1) == 0) { 178 if (stack == null) { 179 stack = new int[32]; 180 } else if (parenSP == stack.length) { 181 int[] newstack = new int[stack.length + 32]; 182 System.arraycopy(stack, 0, newstack, 0, stack.length); 183 stack = newstack; 184 } 185 186 stack[parenSP++] = pairIndex; 187 stack[parenSP++] = scriptCode; 188 } else if (parenSP > 0) { 189 int pi = pairIndex & ~1; 190 191 while ((parenSP -= 2) >= 0 && stack[parenSP] != pi); 192 193 if (parenSP >= 0) { 194 sc = stack[parenSP+1]; 195 } else { 196 parenSP = 0; 197 } 198 if (parenSP < startSP) { 199 startSP = parenSP; 200 } 201 } 202 } 203 204 if (sameScript(scriptCode, sc)) { 205 if (scriptCode <= Script.INHERITED && sc > Script.INHERITED) { 206 scriptCode = sc; 207 208 // now that we have a final script code, fix any open 209 // characters we pushed before we knew the script code. 210 while (startSP < parenSP) { 211 stack[startSP+1] = scriptCode; 212 startSP += 2; 213 } 214 } 215 216 // if this character is a close paired character, 217 // pop it from the stack 218 if (pairIndex > 0 && (pairIndex & 1) != 0 && parenSP > 0) { 219 parenSP -= 2; 220 } 221 } else { 222 // We've just seen the first character of 223 // the next run. Back over it so we'll see 224 // it again the next time. 225 pushback(ch); 226 227 // we're outta here 228 break; 229 } 230 } 231 232 return true; 233 } 234 235 static final int SURROGATE_START = 0x10000; 236 static final int LEAD_START = 0xd800; 237 static final int LEAD_LIMIT = 0xdc00; 238 static final int TAIL_START = 0xdc00; 239 static final int TAIL_LIMIT = 0xe000; 240 static final int LEAD_SURROGATE_SHIFT = 10; 241 static final int SURROGATE_OFFSET = SURROGATE_START - (LEAD_START << LEAD_SURROGATE_SHIFT) - TAIL_START; 242 243 static final int DONE = -1; 244 nextCodePoint()245 private int nextCodePoint() { 246 if (scriptLimit >= textLimit) { 247 return DONE; 248 } 249 int ch = text[scriptLimit++]; 250 if (ch >= LEAD_START && ch < LEAD_LIMIT && scriptLimit < textLimit) { 251 int nch = text[scriptLimit]; 252 if (nch >= TAIL_START && nch < TAIL_LIMIT) { 253 ++scriptLimit; 254 ch = (ch << LEAD_SURROGATE_SHIFT) + nch + SURROGATE_OFFSET; 255 } 256 } 257 return ch; 258 } 259 pushback(int ch)260 private void pushback(int ch) { 261 if (ch >= 0) { 262 if (ch >= 0x10000) { 263 scriptLimit -= 2; 264 } else { 265 scriptLimit -= 1; 266 } 267 } 268 } 269 270 /** 271 * Compare two script codes to see if they are in the same script. If one script is 272 * a strong script, and the other is INHERITED or COMMON, it will compare equal. 273 * 274 * @param scriptOne one of the script codes. 275 * @param scriptTwo the other script code. 276 * @return {@code true} if the two scripts are the same. 277 * @see Script 278 */ sameScript(int scriptOne, int scriptTwo)279 private static boolean sameScript(int scriptOne, int scriptTwo) { 280 return scriptOne == scriptTwo || scriptOne <= Script.INHERITED || scriptTwo <= Script.INHERITED; 281 } 282 283 /** 284 * Find the highest bit that's set in a word. Uses a binary search through 285 * the bits. 286 * 287 * @param n the word in which to find the highest bit that's set. 288 * @return the bit number (counting from the low order bit) of the highest bit. 289 */ highBit(int n)290 private static byte highBit(int n) 291 { 292 if (n <= 0) { 293 return -32; 294 } 295 296 byte bit = 0; 297 298 if (n >= 1 << 16) { 299 n >>= 16; 300 bit += 16; 301 } 302 303 if (n >= 1 << 8) { 304 n >>= 8; 305 bit += 8; 306 } 307 308 if (n >= 1 << 4) { 309 n >>= 4; 310 bit += 4; 311 } 312 313 if (n >= 1 << 2) { 314 n >>= 2; 315 bit += 2; 316 } 317 318 if (n >= 1 << 1) { 319 n >>= 1; 320 bit += 1; 321 } 322 323 return bit; 324 } 325 326 /** 327 * Search the pairedChars array for the given character. 328 * 329 * @param ch the character for which to search. 330 * @return the index of the character in the table, or -1 if it's not there. 331 */ getPairIndex(int ch)332 private static int getPairIndex(int ch) 333 { 334 int probe = pairedCharPower; 335 int index = 0; 336 337 if (ch >= pairedChars[pairedCharExtra]) { 338 index = pairedCharExtra; 339 } 340 341 while (probe > (1 << 0)) { 342 probe >>= 1; 343 344 if (ch >= pairedChars[index + probe]) { 345 index += probe; 346 } 347 } 348 349 if (pairedChars[index] != ch) { 350 index = -1; 351 } 352 353 return index; 354 } 355 356 // all common 357 private static int[] pairedChars = { 358 0x0028, 0x0029, // ascii paired punctuation // common 359 0x003c, 0x003e, // common 360 0x005b, 0x005d, // common 361 0x007b, 0x007d, // common 362 0x00ab, 0x00bb, // guillemets // common 363 0x2018, 0x2019, // general punctuation // common 364 0x201c, 0x201d, // common 365 0x2039, 0x203a, // common 366 0x3008, 0x3009, // chinese paired punctuation // common 367 0x300a, 0x300b, 368 0x300c, 0x300d, 369 0x300e, 0x300f, 370 0x3010, 0x3011, 371 0x3014, 0x3015, 372 0x3016, 0x3017, 373 0x3018, 0x3019, 374 0x301a, 0x301b 375 }; 376 377 private static final int pairedCharPower = 1 << highBit(pairedChars.length); 378 private static final int pairedCharExtra = pairedChars.length - pairedCharPower; 379 380 } 381