1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 /* $Id$ */
19 
20 package org.apache.fop.complexscripts.bidi;
21 
22 import java.io.BufferedReader;
23 import java.io.FileWriter;
24 import java.io.InputStreamReader;
25 import java.io.PrintWriter;
26 import java.net.URL;
27 import java.util.Arrays;
28 import java.util.Iterator;
29 import java.util.SortedSet;
30 import java.util.TreeSet;
31 
32 import org.apache.fop.util.License;
33 
34 // CSOFF: LineLength
35 
36 /**
37  * <p>Utility for generating a Java class representing bidirectional
38  * class properties from the Unicode property files.</p>
39  *
40  * <p>This code is derived in part from GenerateLineBreakUtils.java.</p>
41  *
42  * <p>This work was originally authored by Glenn Adams (gadams@apache.org).</p>
43  */
44 public final class GenerateBidiClass {
45 
GenerateBidiClass()46     private GenerateBidiClass() {
47     }
48 
49     private static byte[] bcL1 = new byte[256]; // ascii and basic latin blocks ( 0x0000 - 0x00FF )
50     private static byte[] bcR1 = new byte[368]; // hebrew and arabic blocks     ( 0x0590 - 0x06FF )
51     private static int[]  bcS1;                 // interval start indices
52     private static int[]  bcE1;                 // interval end indices
53     private static byte[] bcC1;                 // interval bid classes
54 
55     /**
56      * Generate a class managing bidi class properties for Unicode characters.
57      *
58      * @param bidiFileName name (as URL) of file containing bidi type data
59      * @param outFileName name of the output file
60      * @throws Exception
61      */
convertBidiClassProperties(String bidiFileName, String outFileName)62     private static void convertBidiClassProperties(String bidiFileName, String outFileName) throws Exception {
63 
64         readBidiClassProperties(bidiFileName);
65 
66         // generate class
67         PrintWriter out = new PrintWriter(new FileWriter(outFileName));
68         License.writeJavaLicenseId(out);
69         out.println();
70         out.println("package org.apache.fop.complexscripts.bidi;");
71         out.println();
72         out.println("import java.util.Arrays;");
73         out.println("import org.apache.fop.complexscripts.bidi.BidiConstants;");
74         out.println();
75         out.println("// CSOFF: WhitespaceAfterCheck");
76         out.println("// CSOFF: LineLengthCheck");
77         out.println();
78         out.println("/*");
79         out.println(" * !!! THIS IS A GENERATED FILE !!!");
80         out.println(" * If updates to the source are needed, then:");
81         out.println(" * - apply the necessary modifications to");
82         out.println(" *   'src/codegen/unicode/java/org/apache/fop/complexscripts/bidi/GenerateBidiClass.java'");
83         out.println(" * - run 'ant codegen-unicode', which will generate a new BidiClass.java");
84         out.println(" *   in 'src/java/org/apache/fop/complexscripts/bidi'");
85         out.println(" * - commit BOTH changed files");
86         out.println(" */");
87         out.println();
88         out.println("/** Bidirectional class utilities. */");
89         out.println("public final class BidiClass {");
90         out.println();
91         out.println("private BidiClass() {");
92         out.println("}");
93         out.println();
94         dumpData(out);
95         out.println("/**");
96         out.println(" * Lookup bidi class for character expressed as unicode scalar value.");
97         out.println(" * @param ch a unicode scalar value");
98         out.println(" * @return bidi class");
99         out.println(" */");
100         out.println("public static int getBidiClass ( int ch ) {");
101         out.println("  if ( ch <= 0x00FF ) {");
102         out.println("    return bcL1 [ ch - 0x0000 ];");
103         out.println("  } else if ( ( ch >= 0x0590 ) && ( ch <= 0x06FF ) ) {");
104         out.println("    return bcR1 [ ch - 0x0590 ];");
105         out.println("  } else {");
106         out.println("    return getBidiClass ( ch, bcS1, bcE1, bcC1 );");
107         out.println("  }");
108         out.println("}");
109         out.println();
110         out.println("private static int getBidiClass ( int ch, int[] sa, int[] ea, byte[] ca ) {");
111         out.println("  int k = Arrays.binarySearch ( sa, ch );");
112         out.println("  if ( k >= 0 ) {");
113         out.println("    return ca [ k ];");
114         out.println("  } else {");
115         out.println("    k = - ( k + 1 );");
116         out.println("    if ( k == 0 ) {");
117         out.println("      return BidiConstants.L;");
118         out.println("    } else if ( ch <= ea [ k - 1 ] ) {");
119         out.println("      return ca [ k - 1 ];");
120         out.println("    } else {");
121         out.println("      return BidiConstants.L;");
122         out.println("    }");
123         out.println("  }");
124         out.println("}");
125         out.println();
126         out.println("}");
127         out.flush();
128         out.close();
129     }
130 
131     /**
132      * Read bidi class property data.
133      *
134      * @param bidiFileName name (as URL) of bidi type data
135      */
readBidiClassProperties(String bidiFileName)136     private static void readBidiClassProperties(String bidiFileName) throws Exception {
137         // read property names
138         BufferedReader b = new BufferedReader(new InputStreamReader(new URL(bidiFileName).openStream()));
139         String line;
140         int lineNumber = 0;
141         TreeSet intervals = new TreeSet();
142         while ((line = b.readLine()) != null) {
143             lineNumber++;
144             if (line.startsWith("#")) {
145                 continue;
146             } else if (line.length() == 0) {
147                 continue;
148             } else {
149                 if (line.indexOf("#") != -1) {
150                     line = (line.split("#")) [ 0 ];
151                 }
152                 String[] fa = line.split(";");
153                 if (fa.length == 2) {
154                     int[] interval = parseInterval(fa[0].trim());
155                     byte bidiClass = (byte) parseBidiClass(fa[1].trim());
156                     if (interval[1] == interval[0]) { // singleton
157                         int c = interval[0];
158                         if (c <= 0x00FF) {
159                             if (bcL1 [ c - 0x0000 ] == 0) {
160                                 bcL1 [ c - 0x0000 ] = bidiClass;
161                             } else {
162                                 throw new Exception("duplicate singleton entry: " + c);
163                             }
164                         } else if ((c >= 0x0590) && (c <= 0x06FF)) {
165                             if (bcR1 [ c - 0x0590 ] == 0) {
166                                 bcR1 [ c - 0x0590 ] = bidiClass;
167                             } else {
168                                 throw new Exception("duplicate singleton entry: " + c);
169                             }
170                         } else {
171                             addInterval(intervals, c, c, bidiClass);
172                         }
173                     } else {                            // non-singleton
174                         int s = interval[0];
175                         int e = interval[1];            // inclusive
176                         if (s <= 0x00FF) {
177                             for (int i = s; i <= e; i++) {
178                                 if (i <= 0x00FF) {
179                                     if (bcL1 [ i - 0x0000 ] == 0) {
180                                         bcL1 [ i - 0x0000 ] = bidiClass;
181                                     } else {
182                                         throw new Exception("duplicate singleton entry: " + i);
183                                     }
184                                 } else {
185                                     addInterval(intervals, i, e, bidiClass);
186                                     break;
187                                 }
188                             }
189                         } else if ((s >= 0x0590) && (s <= 0x06FF)) {
190                             for (int i = s; i <= e; i++) {
191                                 if (i <= 0x06FF) {
192                                     if (bcR1 [ i - 0x0590 ] == 0) {
193                                         bcR1 [ i - 0x0590 ] = bidiClass;
194                                     } else {
195                                         throw new Exception("duplicate singleton entry: " + i);
196                                     }
197                                 } else {
198                                     addInterval(intervals, i, e, bidiClass);
199                                     break;
200                                 }
201                             }
202                         } else {
203                             addInterval(intervals, s, e, bidiClass);
204                         }
205                     }
206                 } else {
207                     throw new Exception("bad syntax, line(" + lineNumber + "): " + line);
208                 }
209             }
210         }
211         // compile interval search data
212         int ivIndex = 0;
213         int niv = intervals.size();
214         bcS1 = new int [ niv ];
215         bcE1 = new int [ niv ];
216         bcC1 = new byte [ niv ];
217         for (Iterator it = intervals.iterator(); it.hasNext(); ivIndex++) {
218             Interval iv = (Interval) it.next();
219             bcS1[ivIndex] = iv.start;
220             bcE1[ivIndex] = iv.end;
221             bcC1[ivIndex] = (byte) iv.bidiClass;
222         }
223         // test data
224         test();
225     }
226 
parseInterval(String interval)227     private static int[] parseInterval(String interval) throws Exception {
228         int s;
229         int e;
230         String[] fa = interval.split("\\.\\.");
231         if (fa.length == 1) {
232             s = Integer.parseInt(fa[0], 16);
233             e = s;
234         } else if (fa.length == 2) {
235             s = Integer.parseInt(fa[0], 16);
236             e = Integer.parseInt(fa[1], 16);
237         } else {
238             throw new Exception("bad interval syntax: " + interval);
239         }
240         if (e < s) {
241             throw new Exception("bad interval, start must be less than or equal to end: " + interval);
242         }
243         return new int[] {s, e};
244     }
245 
parseBidiClass(String bidiClass)246     private static int parseBidiClass(String bidiClass) {
247         int bc = 0;
248         if ("L".equals(bidiClass)) {
249             bc = BidiConstants.L;
250         } else if ("LRE".equals(bidiClass)) {
251             bc = BidiConstants.LRE;
252         } else if ("LRO".equals(bidiClass)) {
253             bc = BidiConstants.LRO;
254         } else if ("R".equals(bidiClass)) {
255             bc = BidiConstants.R;
256         } else if ("AL".equals(bidiClass)) {
257             bc = BidiConstants.AL;
258         } else if ("RLE".equals(bidiClass)) {
259             bc = BidiConstants.RLE;
260         } else if ("RLO".equals(bidiClass)) {
261             bc = BidiConstants.RLO;
262         } else if ("PDF".equals(bidiClass)) {
263             bc = BidiConstants.PDF;
264         } else if ("EN".equals(bidiClass)) {
265             bc = BidiConstants.EN;
266         } else if ("ES".equals(bidiClass)) {
267             bc = BidiConstants.ES;
268         } else if ("ET".equals(bidiClass)) {
269             bc = BidiConstants.ET;
270         } else if ("AN".equals(bidiClass)) {
271             bc = BidiConstants.AN;
272         } else if ("CS".equals(bidiClass)) {
273             bc = BidiConstants.CS;
274         } else if ("NSM".equals(bidiClass)) {
275             bc = BidiConstants.NSM;
276         } else if ("BN".equals(bidiClass)) {
277             bc = BidiConstants.BN;
278         } else if ("B".equals(bidiClass)) {
279             bc = BidiConstants.B;
280         } else if ("S".equals(bidiClass)) {
281             bc = BidiConstants.S;
282         } else if ("WS".equals(bidiClass)) {
283             bc = BidiConstants.WS;
284         } else if ("ON".equals(bidiClass)) {
285             bc = BidiConstants.ON;
286         } else {
287             throw new IllegalArgumentException("unknown bidi class: " + bidiClass);
288         }
289         return bc;
290     }
291 
addInterval(SortedSet intervals, int start, int end, int bidiClass)292     private static void addInterval(SortedSet intervals, int start, int end, int bidiClass) {
293         intervals.add(new Interval(start, end, bidiClass));
294     }
295 
dumpData(PrintWriter out)296     private static void dumpData(PrintWriter out) {
297         boolean first;
298         StringBuffer sb = new StringBuffer();
299 
300         // bcL1
301         first = true;
302         sb.setLength(0);
303         out.println("private static byte[] bcL1 = {");
304         for (int i = 0; i < bcL1.length; i++) {
305             if (!first) {
306                 sb.append(",");
307             } else {
308                 first = false;
309             }
310             sb.append(bcL1[i]);
311             if (sb.length() > 120) {
312                 sb.append(',');
313                 out.println(sb);
314                 first = true;
315                 sb.setLength(0);
316             }
317         }
318         if (sb.length() > 0) {
319             out.println(sb);
320         }
321         out.println("};");
322         out.println();
323 
324         // bcR1
325         first = true;
326         sb.setLength(0);
327         out.println("private static byte[] bcR1 = {");
328         for (int i = 0; i < bcR1.length; i++) {
329             if (!first) {
330                 sb.append(",");
331             } else {
332                 first = false;
333             }
334             sb.append(bcR1[i]);
335             if (sb.length() > 120) {
336                 sb.append(',');
337                 out.println(sb);
338                 first = true;
339                 sb.setLength(0);
340             }
341         }
342         if (sb.length() > 0) {
343             out.println(sb);
344         }
345         out.println("};");
346         out.println();
347 
348         // bcS1
349         first = true;
350         sb.setLength(0);
351         out.println("private static int[] bcS1 = {");
352         for (int i = 0; i < bcS1.length; i++) {
353             if (!first) {
354                 sb.append(",");
355             } else {
356                 first = false;
357             }
358             sb.append(bcS1[i]);
359             if (sb.length() > 120) {
360                 sb.append(',');
361                 out.println(sb);
362                 first = true;
363                 sb.setLength(0);
364             }
365         }
366         if (sb.length() > 0) {
367             out.println(sb);
368         }
369         out.println("};");
370         out.println();
371 
372         // bcE1
373         first = true;
374         sb.setLength(0);
375         out.println("private static int[] bcE1 = {");
376         for (int i = 0; i < bcE1.length; i++) {
377             if (!first) {
378                 sb.append(",");
379             } else {
380                 first = false;
381             }
382             sb.append(bcE1[i]);
383             if (sb.length() > 120) {
384                 sb.append(',');
385                 out.println(sb);
386                 first = true;
387                 sb.setLength(0);
388             }
389         }
390         if (sb.length() > 0) {
391             out.println(sb);
392         }
393         out.println("};");
394         out.println();
395 
396         // bcC1
397         first = true;
398         sb.setLength(0);
399         out.println("private static byte[] bcC1 = {");
400         for (int i = 0; i < bcC1.length; i++) {
401             if (!first) {
402                 sb.append(",");
403             } else {
404                 first = false;
405             }
406             sb.append(bcC1[i]);
407             if (sb.length() > 120) {
408                 sb.append(',');
409                 out.println(sb);
410                 first = true;
411                 sb.setLength(0);
412             }
413         }
414         if (sb.length() > 0) {
415             out.println(sb);
416         }
417         out.println("};");
418         out.println();
419     }
420 
getBidiClass(int ch)421     private static int getBidiClass(int ch) {
422         if (ch <= 0x00FF) {
423             return bcL1 [ ch - 0x0000 ];
424         } else if ((ch >= 0x0590) && (ch <= 0x06FF)) {
425             return bcR1 [ ch - 0x0590 ];
426         } else {
427             return getBidiClass(ch, bcS1, bcE1, bcC1);
428         }
429     }
430 
getBidiClass(int ch, int[] sa, int[] ea, byte[] ca)431     private static int getBidiClass(int ch, int[] sa, int[] ea, byte[] ca) {
432         int k = Arrays.binarySearch(sa, ch);
433         if (k >= 0) {
434             return ca [ k ];
435         } else {
436             k = -(k + 1);
437             if (k == 0) {
438                 return BidiConstants.L;
439             } else if (ch <= ea [ k - 1 ]) {
440                 return ca [ k - 1 ];
441             } else {
442                 return BidiConstants.L;
443             }
444         }
445     }
446 
447     private static final int[] TEST_DATA =
448     {
449         0x000000, BidiConstants.BN,
450         0x000009, BidiConstants.S,
451         0x00000A, BidiConstants.B,
452         0x00000C, BidiConstants.WS,
453         0x000020, BidiConstants.WS,
454         0x000023, BidiConstants.ET,
455         0x000028, BidiConstants.ON,
456         0x00002B, BidiConstants.ES,
457         0x00002C, BidiConstants.CS,
458         0x000031, BidiConstants.EN,
459         0x00003A, BidiConstants.CS,
460         0x000041, BidiConstants.L,
461         0x000300, BidiConstants.NSM,
462         0x000374, BidiConstants.ON,
463         0x0005BE, BidiConstants.R,
464         0x000601, BidiConstants.AN,
465         0x000608, BidiConstants.AL,
466         0x000670, BidiConstants.NSM,
467         0x000710, BidiConstants.AL,
468         0x0007FA, BidiConstants.R,
469         0x000970, BidiConstants.L,
470         0x001392, BidiConstants.ON,
471         0x002000, BidiConstants.WS,
472         0x00200E, BidiConstants.L,
473         0x00200F, BidiConstants.R,
474         0x00202A, BidiConstants.LRE,
475         0x00202B, BidiConstants.RLE,
476         0x00202C, BidiConstants.PDF,
477         0x00202D, BidiConstants.LRO,
478         0x00202E, BidiConstants.RLO,
479         0x0020E1, BidiConstants.NSM,
480         0x002212, BidiConstants.ES,
481         0x002070, BidiConstants.EN,
482         0x003000, BidiConstants.WS,
483         0x003009, BidiConstants.ON,
484         0x00FBD4, BidiConstants.AL,
485         0x00FE69, BidiConstants.ET,
486         0x00FF0C, BidiConstants.CS,
487         0x00FEFF, BidiConstants.BN,
488         0x01034A, BidiConstants.L,
489         0x010E60, BidiConstants.AN,
490         0x01F100, BidiConstants.EN,
491         0x0E0001, BidiConstants.BN,
492         0x0E0100, BidiConstants.NSM,
493         0x10FFFF, BidiConstants.BN
494     };
495 
test()496     private static void test() throws Exception {
497         for (int i = 0, n = TEST_DATA.length / 2; i < n; i++) {
498             int ch = TEST_DATA [ i * 2 + 0 ];
499             int tc = TEST_DATA [ i * 2 + 1 ];
500             int bc = getBidiClass(ch);
501             if (bc != tc) {
502                 throw new Exception("test mapping failed for character (0x" + Integer.toHexString(ch) + "): expected " + tc + ", got " + bc);
503             }
504         }
505     }
506 
507     /**
508      * Main entry point for generator.
509      * @param args array of command line arguments
510      */
main(String[] args)511     public static void main(String[] args) {
512         String bidiFileName = "http://www.unicode.org/Public/UNIDATA/extracted/DerivedBidiClass.txt";
513         String outFileName = "BidiClass.java";
514         boolean ok = true;
515         for (int i = 0; i < args.length; i = i + 2) {
516             if (i + 1 == args.length) {
517                 ok = false;
518             } else {
519                 String opt = args[i];
520                 if ("-b".equals(opt)) {
521                     bidiFileName = args [i + 1];
522                 } else if ("-o".equals(opt)) {
523                     outFileName = args [i + 1];
524                 } else {
525                     ok = false;
526                 }
527             }
528         }
529         if (!ok) {
530             System.out.println("Usage: GenerateBidiClass [-b <bidiFile>] [-o <outputFile>]");
531             System.out.println("  defaults:");
532             System.out.println("    <bidiFile>:     " + bidiFileName);
533             System.out.println("    <outputFile>:        " + outFileName);
534         } else {
535             try {
536                 convertBidiClassProperties(bidiFileName, outFileName);
537                 System.out.println("Generated " + outFileName + " from");
538                 System.out.println("  <bidiFile>:     " + bidiFileName);
539             } catch (Exception e) {
540                 System.out.println("An unexpected error occured");
541                 e.printStackTrace();
542             }
543         }
544     }
545 
546     private static class Interval implements Comparable {
547         int start;
548         int end;
549         int bidiClass;
Interval(int start, int end, int bidiClass)550         Interval(int start, int end, int bidiClass) {
551             this.start = start;
552             this.end = end;
553             this.bidiClass = bidiClass;
554         }
compareTo(Object o)555         public int compareTo(Object o) {
556             Interval iv = (Interval) o;
557             if (start < iv.start) {
558                 return -1;
559             } else if (start > iv.start) {
560                 return 1;
561             } else if (end < iv.end) {
562                 return -1;
563             } else if (end > iv.end) {
564                 return 1;
565             } else {
566                 return 0;
567             }
568         }
569     }
570 }
571