1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 /* $Id$ */ 19 20 package org.apache.fop.complexscripts.bidi; 21 22 import java.io.BufferedReader; 23 import java.io.FileWriter; 24 import java.io.InputStreamReader; 25 import java.io.PrintWriter; 26 import java.net.URL; 27 import java.util.Arrays; 28 import java.util.Iterator; 29 import java.util.SortedSet; 30 import java.util.TreeSet; 31 32 import org.apache.fop.util.License; 33 34 // CSOFF: LineLength 35 36 /** 37 * <p>Utility for generating a Java class representing bidirectional 38 * class properties from the Unicode property files.</p> 39 * 40 * <p>This code is derived in part from GenerateLineBreakUtils.java.</p> 41 * 42 * <p>This work was originally authored by Glenn Adams (gadams@apache.org).</p> 43 */ 44 public final class GenerateBidiClass { 45 GenerateBidiClass()46 private GenerateBidiClass() { 47 } 48 49 private static byte[] bcL1 = new byte[256]; // ascii and basic latin blocks ( 0x0000 - 0x00FF ) 50 private static byte[] bcR1 = new byte[368]; // hebrew and arabic blocks ( 0x0590 - 0x06FF ) 51 private static int[] bcS1; // interval start indices 52 private static int[] bcE1; // interval end indices 53 private static byte[] bcC1; // interval bid classes 54 55 /** 56 * Generate a class managing bidi class properties for Unicode characters. 57 * 58 * @param bidiFileName name (as URL) of file containing bidi type data 59 * @param outFileName name of the output file 60 * @throws Exception 61 */ convertBidiClassProperties(String bidiFileName, String outFileName)62 private static void convertBidiClassProperties(String bidiFileName, String outFileName) throws Exception { 63 64 readBidiClassProperties(bidiFileName); 65 66 // generate class 67 PrintWriter out = new PrintWriter(new FileWriter(outFileName)); 68 License.writeJavaLicenseId(out); 69 out.println(); 70 out.println("package org.apache.fop.complexscripts.bidi;"); 71 out.println(); 72 out.println("import java.util.Arrays;"); 73 out.println("import org.apache.fop.complexscripts.bidi.BidiConstants;"); 74 out.println(); 75 out.println("// CSOFF: WhitespaceAfterCheck"); 76 out.println("// CSOFF: LineLengthCheck"); 77 out.println(); 78 out.println("/*"); 79 out.println(" * !!! THIS IS A GENERATED FILE !!!"); 80 out.println(" * If updates to the source are needed, then:"); 81 out.println(" * - apply the necessary modifications to"); 82 out.println(" * 'src/codegen/unicode/java/org/apache/fop/complexscripts/bidi/GenerateBidiClass.java'"); 83 out.println(" * - run 'ant codegen-unicode', which will generate a new BidiClass.java"); 84 out.println(" * in 'src/java/org/apache/fop/complexscripts/bidi'"); 85 out.println(" * - commit BOTH changed files"); 86 out.println(" */"); 87 out.println(); 88 out.println("/** Bidirectional class utilities. */"); 89 out.println("public final class BidiClass {"); 90 out.println(); 91 out.println("private BidiClass() {"); 92 out.println("}"); 93 out.println(); 94 dumpData(out); 95 out.println("/**"); 96 out.println(" * Lookup bidi class for character expressed as unicode scalar value."); 97 out.println(" * @param ch a unicode scalar value"); 98 out.println(" * @return bidi class"); 99 out.println(" */"); 100 out.println("public static int getBidiClass ( int ch ) {"); 101 out.println(" if ( ch <= 0x00FF ) {"); 102 out.println(" return bcL1 [ ch - 0x0000 ];"); 103 out.println(" } else if ( ( ch >= 0x0590 ) && ( ch <= 0x06FF ) ) {"); 104 out.println(" return bcR1 [ ch - 0x0590 ];"); 105 out.println(" } else {"); 106 out.println(" return getBidiClass ( ch, bcS1, bcE1, bcC1 );"); 107 out.println(" }"); 108 out.println("}"); 109 out.println(); 110 out.println("private static int getBidiClass ( int ch, int[] sa, int[] ea, byte[] ca ) {"); 111 out.println(" int k = Arrays.binarySearch ( sa, ch );"); 112 out.println(" if ( k >= 0 ) {"); 113 out.println(" return ca [ k ];"); 114 out.println(" } else {"); 115 out.println(" k = - ( k + 1 );"); 116 out.println(" if ( k == 0 ) {"); 117 out.println(" return BidiConstants.L;"); 118 out.println(" } else if ( ch <= ea [ k - 1 ] ) {"); 119 out.println(" return ca [ k - 1 ];"); 120 out.println(" } else {"); 121 out.println(" return BidiConstants.L;"); 122 out.println(" }"); 123 out.println(" }"); 124 out.println("}"); 125 out.println(); 126 out.println("}"); 127 out.flush(); 128 out.close(); 129 } 130 131 /** 132 * Read bidi class property data. 133 * 134 * @param bidiFileName name (as URL) of bidi type data 135 */ readBidiClassProperties(String bidiFileName)136 private static void readBidiClassProperties(String bidiFileName) throws Exception { 137 // read property names 138 BufferedReader b = new BufferedReader(new InputStreamReader(new URL(bidiFileName).openStream())); 139 String line; 140 int lineNumber = 0; 141 TreeSet intervals = new TreeSet(); 142 while ((line = b.readLine()) != null) { 143 lineNumber++; 144 if (line.startsWith("#")) { 145 continue; 146 } else if (line.length() == 0) { 147 continue; 148 } else { 149 if (line.indexOf("#") != -1) { 150 line = (line.split("#")) [ 0 ]; 151 } 152 String[] fa = line.split(";"); 153 if (fa.length == 2) { 154 int[] interval = parseInterval(fa[0].trim()); 155 byte bidiClass = (byte) parseBidiClass(fa[1].trim()); 156 if (interval[1] == interval[0]) { // singleton 157 int c = interval[0]; 158 if (c <= 0x00FF) { 159 if (bcL1 [ c - 0x0000 ] == 0) { 160 bcL1 [ c - 0x0000 ] = bidiClass; 161 } else { 162 throw new Exception("duplicate singleton entry: " + c); 163 } 164 } else if ((c >= 0x0590) && (c <= 0x06FF)) { 165 if (bcR1 [ c - 0x0590 ] == 0) { 166 bcR1 [ c - 0x0590 ] = bidiClass; 167 } else { 168 throw new Exception("duplicate singleton entry: " + c); 169 } 170 } else { 171 addInterval(intervals, c, c, bidiClass); 172 } 173 } else { // non-singleton 174 int s = interval[0]; 175 int e = interval[1]; // inclusive 176 if (s <= 0x00FF) { 177 for (int i = s; i <= e; i++) { 178 if (i <= 0x00FF) { 179 if (bcL1 [ i - 0x0000 ] == 0) { 180 bcL1 [ i - 0x0000 ] = bidiClass; 181 } else { 182 throw new Exception("duplicate singleton entry: " + i); 183 } 184 } else { 185 addInterval(intervals, i, e, bidiClass); 186 break; 187 } 188 } 189 } else if ((s >= 0x0590) && (s <= 0x06FF)) { 190 for (int i = s; i <= e; i++) { 191 if (i <= 0x06FF) { 192 if (bcR1 [ i - 0x0590 ] == 0) { 193 bcR1 [ i - 0x0590 ] = bidiClass; 194 } else { 195 throw new Exception("duplicate singleton entry: " + i); 196 } 197 } else { 198 addInterval(intervals, i, e, bidiClass); 199 break; 200 } 201 } 202 } else { 203 addInterval(intervals, s, e, bidiClass); 204 } 205 } 206 } else { 207 throw new Exception("bad syntax, line(" + lineNumber + "): " + line); 208 } 209 } 210 } 211 // compile interval search data 212 int ivIndex = 0; 213 int niv = intervals.size(); 214 bcS1 = new int [ niv ]; 215 bcE1 = new int [ niv ]; 216 bcC1 = new byte [ niv ]; 217 for (Iterator it = intervals.iterator(); it.hasNext(); ivIndex++) { 218 Interval iv = (Interval) it.next(); 219 bcS1[ivIndex] = iv.start; 220 bcE1[ivIndex] = iv.end; 221 bcC1[ivIndex] = (byte) iv.bidiClass; 222 } 223 // test data 224 test(); 225 } 226 parseInterval(String interval)227 private static int[] parseInterval(String interval) throws Exception { 228 int s; 229 int e; 230 String[] fa = interval.split("\\.\\."); 231 if (fa.length == 1) { 232 s = Integer.parseInt(fa[0], 16); 233 e = s; 234 } else if (fa.length == 2) { 235 s = Integer.parseInt(fa[0], 16); 236 e = Integer.parseInt(fa[1], 16); 237 } else { 238 throw new Exception("bad interval syntax: " + interval); 239 } 240 if (e < s) { 241 throw new Exception("bad interval, start must be less than or equal to end: " + interval); 242 } 243 return new int[] {s, e}; 244 } 245 parseBidiClass(String bidiClass)246 private static int parseBidiClass(String bidiClass) { 247 int bc = 0; 248 if ("L".equals(bidiClass)) { 249 bc = BidiConstants.L; 250 } else if ("LRE".equals(bidiClass)) { 251 bc = BidiConstants.LRE; 252 } else if ("LRO".equals(bidiClass)) { 253 bc = BidiConstants.LRO; 254 } else if ("R".equals(bidiClass)) { 255 bc = BidiConstants.R; 256 } else if ("AL".equals(bidiClass)) { 257 bc = BidiConstants.AL; 258 } else if ("RLE".equals(bidiClass)) { 259 bc = BidiConstants.RLE; 260 } else if ("RLO".equals(bidiClass)) { 261 bc = BidiConstants.RLO; 262 } else if ("PDF".equals(bidiClass)) { 263 bc = BidiConstants.PDF; 264 } else if ("EN".equals(bidiClass)) { 265 bc = BidiConstants.EN; 266 } else if ("ES".equals(bidiClass)) { 267 bc = BidiConstants.ES; 268 } else if ("ET".equals(bidiClass)) { 269 bc = BidiConstants.ET; 270 } else if ("AN".equals(bidiClass)) { 271 bc = BidiConstants.AN; 272 } else if ("CS".equals(bidiClass)) { 273 bc = BidiConstants.CS; 274 } else if ("NSM".equals(bidiClass)) { 275 bc = BidiConstants.NSM; 276 } else if ("BN".equals(bidiClass)) { 277 bc = BidiConstants.BN; 278 } else if ("B".equals(bidiClass)) { 279 bc = BidiConstants.B; 280 } else if ("S".equals(bidiClass)) { 281 bc = BidiConstants.S; 282 } else if ("WS".equals(bidiClass)) { 283 bc = BidiConstants.WS; 284 } else if ("ON".equals(bidiClass)) { 285 bc = BidiConstants.ON; 286 } else { 287 throw new IllegalArgumentException("unknown bidi class: " + bidiClass); 288 } 289 return bc; 290 } 291 addInterval(SortedSet intervals, int start, int end, int bidiClass)292 private static void addInterval(SortedSet intervals, int start, int end, int bidiClass) { 293 intervals.add(new Interval(start, end, bidiClass)); 294 } 295 dumpData(PrintWriter out)296 private static void dumpData(PrintWriter out) { 297 boolean first; 298 StringBuffer sb = new StringBuffer(); 299 300 // bcL1 301 first = true; 302 sb.setLength(0); 303 out.println("private static byte[] bcL1 = {"); 304 for (int i = 0; i < bcL1.length; i++) { 305 if (!first) { 306 sb.append(","); 307 } else { 308 first = false; 309 } 310 sb.append(bcL1[i]); 311 if (sb.length() > 120) { 312 sb.append(','); 313 out.println(sb); 314 first = true; 315 sb.setLength(0); 316 } 317 } 318 if (sb.length() > 0) { 319 out.println(sb); 320 } 321 out.println("};"); 322 out.println(); 323 324 // bcR1 325 first = true; 326 sb.setLength(0); 327 out.println("private static byte[] bcR1 = {"); 328 for (int i = 0; i < bcR1.length; i++) { 329 if (!first) { 330 sb.append(","); 331 } else { 332 first = false; 333 } 334 sb.append(bcR1[i]); 335 if (sb.length() > 120) { 336 sb.append(','); 337 out.println(sb); 338 first = true; 339 sb.setLength(0); 340 } 341 } 342 if (sb.length() > 0) { 343 out.println(sb); 344 } 345 out.println("};"); 346 out.println(); 347 348 // bcS1 349 first = true; 350 sb.setLength(0); 351 out.println("private static int[] bcS1 = {"); 352 for (int i = 0; i < bcS1.length; i++) { 353 if (!first) { 354 sb.append(","); 355 } else { 356 first = false; 357 } 358 sb.append(bcS1[i]); 359 if (sb.length() > 120) { 360 sb.append(','); 361 out.println(sb); 362 first = true; 363 sb.setLength(0); 364 } 365 } 366 if (sb.length() > 0) { 367 out.println(sb); 368 } 369 out.println("};"); 370 out.println(); 371 372 // bcE1 373 first = true; 374 sb.setLength(0); 375 out.println("private static int[] bcE1 = {"); 376 for (int i = 0; i < bcE1.length; i++) { 377 if (!first) { 378 sb.append(","); 379 } else { 380 first = false; 381 } 382 sb.append(bcE1[i]); 383 if (sb.length() > 120) { 384 sb.append(','); 385 out.println(sb); 386 first = true; 387 sb.setLength(0); 388 } 389 } 390 if (sb.length() > 0) { 391 out.println(sb); 392 } 393 out.println("};"); 394 out.println(); 395 396 // bcC1 397 first = true; 398 sb.setLength(0); 399 out.println("private static byte[] bcC1 = {"); 400 for (int i = 0; i < bcC1.length; i++) { 401 if (!first) { 402 sb.append(","); 403 } else { 404 first = false; 405 } 406 sb.append(bcC1[i]); 407 if (sb.length() > 120) { 408 sb.append(','); 409 out.println(sb); 410 first = true; 411 sb.setLength(0); 412 } 413 } 414 if (sb.length() > 0) { 415 out.println(sb); 416 } 417 out.println("};"); 418 out.println(); 419 } 420 getBidiClass(int ch)421 private static int getBidiClass(int ch) { 422 if (ch <= 0x00FF) { 423 return bcL1 [ ch - 0x0000 ]; 424 } else if ((ch >= 0x0590) && (ch <= 0x06FF)) { 425 return bcR1 [ ch - 0x0590 ]; 426 } else { 427 return getBidiClass(ch, bcS1, bcE1, bcC1); 428 } 429 } 430 getBidiClass(int ch, int[] sa, int[] ea, byte[] ca)431 private static int getBidiClass(int ch, int[] sa, int[] ea, byte[] ca) { 432 int k = Arrays.binarySearch(sa, ch); 433 if (k >= 0) { 434 return ca [ k ]; 435 } else { 436 k = -(k + 1); 437 if (k == 0) { 438 return BidiConstants.L; 439 } else if (ch <= ea [ k - 1 ]) { 440 return ca [ k - 1 ]; 441 } else { 442 return BidiConstants.L; 443 } 444 } 445 } 446 447 private static final int[] TEST_DATA = 448 { 449 0x000000, BidiConstants.BN, 450 0x000009, BidiConstants.S, 451 0x00000A, BidiConstants.B, 452 0x00000C, BidiConstants.WS, 453 0x000020, BidiConstants.WS, 454 0x000023, BidiConstants.ET, 455 0x000028, BidiConstants.ON, 456 0x00002B, BidiConstants.ES, 457 0x00002C, BidiConstants.CS, 458 0x000031, BidiConstants.EN, 459 0x00003A, BidiConstants.CS, 460 0x000041, BidiConstants.L, 461 0x000300, BidiConstants.NSM, 462 0x000374, BidiConstants.ON, 463 0x0005BE, BidiConstants.R, 464 0x000601, BidiConstants.AN, 465 0x000608, BidiConstants.AL, 466 0x000670, BidiConstants.NSM, 467 0x000710, BidiConstants.AL, 468 0x0007FA, BidiConstants.R, 469 0x000970, BidiConstants.L, 470 0x001392, BidiConstants.ON, 471 0x002000, BidiConstants.WS, 472 0x00200E, BidiConstants.L, 473 0x00200F, BidiConstants.R, 474 0x00202A, BidiConstants.LRE, 475 0x00202B, BidiConstants.RLE, 476 0x00202C, BidiConstants.PDF, 477 0x00202D, BidiConstants.LRO, 478 0x00202E, BidiConstants.RLO, 479 0x0020E1, BidiConstants.NSM, 480 0x002212, BidiConstants.ES, 481 0x002070, BidiConstants.EN, 482 0x003000, BidiConstants.WS, 483 0x003009, BidiConstants.ON, 484 0x00FBD4, BidiConstants.AL, 485 0x00FE69, BidiConstants.ET, 486 0x00FF0C, BidiConstants.CS, 487 0x00FEFF, BidiConstants.BN, 488 0x01034A, BidiConstants.L, 489 0x010E60, BidiConstants.AN, 490 0x01F100, BidiConstants.EN, 491 0x0E0001, BidiConstants.BN, 492 0x0E0100, BidiConstants.NSM, 493 0x10FFFF, BidiConstants.BN 494 }; 495 test()496 private static void test() throws Exception { 497 for (int i = 0, n = TEST_DATA.length / 2; i < n; i++) { 498 int ch = TEST_DATA [ i * 2 + 0 ]; 499 int tc = TEST_DATA [ i * 2 + 1 ]; 500 int bc = getBidiClass(ch); 501 if (bc != tc) { 502 throw new Exception("test mapping failed for character (0x" + Integer.toHexString(ch) + "): expected " + tc + ", got " + bc); 503 } 504 } 505 } 506 507 /** 508 * Main entry point for generator. 509 * @param args array of command line arguments 510 */ main(String[] args)511 public static void main(String[] args) { 512 String bidiFileName = "http://www.unicode.org/Public/UNIDATA/extracted/DerivedBidiClass.txt"; 513 String outFileName = "BidiClass.java"; 514 boolean ok = true; 515 for (int i = 0; i < args.length; i = i + 2) { 516 if (i + 1 == args.length) { 517 ok = false; 518 } else { 519 String opt = args[i]; 520 if ("-b".equals(opt)) { 521 bidiFileName = args [i + 1]; 522 } else if ("-o".equals(opt)) { 523 outFileName = args [i + 1]; 524 } else { 525 ok = false; 526 } 527 } 528 } 529 if (!ok) { 530 System.out.println("Usage: GenerateBidiClass [-b <bidiFile>] [-o <outputFile>]"); 531 System.out.println(" defaults:"); 532 System.out.println(" <bidiFile>: " + bidiFileName); 533 System.out.println(" <outputFile>: " + outFileName); 534 } else { 535 try { 536 convertBidiClassProperties(bidiFileName, outFileName); 537 System.out.println("Generated " + outFileName + " from"); 538 System.out.println(" <bidiFile>: " + bidiFileName); 539 } catch (Exception e) { 540 System.out.println("An unexpected error occured"); 541 e.printStackTrace(); 542 } 543 } 544 } 545 546 private static class Interval implements Comparable { 547 int start; 548 int end; 549 int bidiClass; Interval(int start, int end, int bidiClass)550 Interval(int start, int end, int bidiClass) { 551 this.start = start; 552 this.end = end; 553 this.bidiClass = bidiClass; 554 } compareTo(Object o)555 public int compareTo(Object o) { 556 Interval iv = (Interval) o; 557 if (start < iv.start) { 558 return -1; 559 } else if (start > iv.start) { 560 return 1; 561 } else if (end < iv.end) { 562 return -1; 563 } else if (end > iv.end) { 564 return 1; 565 } else { 566 return 0; 567 } 568 } 569 } 570 } 571