1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 /* $Id$ */
19 package org.apache.fop.complexscripts.scripts;
20 
21 /**
22  * Integrating existing rendering of Android for Khmer Unicode to iText
23  *    The class from the rendering of Mobile Project, Android from Nokor Group (AKA: Nokor-IT)
24  *    The understanding also taking from the Khmum Browser that would lead to build this helper
25  *    (Comment above by Pongsametrey S. <metrey@osify.com>)
26  *    Thanks for Nokor Group & Mr. Pengleng HUOT
27  *
28  * author sok.pongsametrey
29  * @version 1.0
30  */
31 
32 /**
33  * UnicodeRender Class.
34  * author huot.pengleng
35  *
36  * simple classes, they are used in the state table (in this file) to control the length of a syllable
37  * they are also used to know where a character should be placed (location in reference to the base character)
38  * and also to know if a character, when independently displayed, should be displayed with a dotted-circle to
39  * indicate error in syllable construction
40  * Character class tables
41  * xx character does not combine into syllable, such as numbers, puntuation marks, non-Khmer signs...
42  * sa Sign placed above the base
43  * sp Sign placed after the base
44  * c1 Consonant of type 1 or independent vowel (independent vowels behave as type 1 consonants)
45  * c2 Consonant of type 2 (only RO)
46  * c3 Consonant of type 3
47  * rb Khmer sign robat u17CC. combining mark for subscript consonants
48  * cd Consonant-shifter
49  * dl Dependent vowel placed before the base (left of the base)
50  * db Dependent vowel placed below the base
51  * da Dependent vowel placed above the base
52  * dr Dependent vowel placed behind the base (right of the base)
53  * co Khmer combining mark COENG u17D2, combines with the consonant or independent vowel following
54  *     it to create a subscript consonant or independent vowel
55  * va Khmer split vowel in wich the first part is before the base and the second one above the base
56  * vr Khmer split vowel in wich the first part is before the base and the second one behind (right of) the base
57  *
58  */
59 public class KhmerRenderer {
60 
61     private static final int XX = 0;
62     private static final int CC_COENG = 7; // Subscript consonant combining character
63     private static final int CC_CONSONANT = 1; // Consonant of type 1 or independent vowel
64     private static final int CC_CONSONANT_SHIFTER = 5;
65     private static final int CC_CONSONANT2 = 2; // Consonant of type 2
66     private static final int CC_CONSONANT3 = 3; // Consonant of type 3
67     private static final int CC_DEPENDENT_VOWEL = 8;
68     private static final int CC_ROBAT = 6; // Khmer special diacritic accent -treated differently in state table
69     private static final int CC_SIGN_ABOVE = 9;
70     private static final int CC_SIGN_AFTER = 10;
71     private static final int CF_ABOVE_VOWEL = 536870912; // flag to speed up comparing
72     private static final int CF_CLASS_MASK = 65535;
73     private static final int CF_COENG = 134217728; // flag to speed up comparing
74     private static final int CF_CONSONANT = 16777216; // flag to speed up comparing
75     private static final int CF_DOTTED_CIRCLE = 67108864;
76 
77     // add a dotted circle if a character with this flag is the first in a syllable
78     private static final int CF_POS_ABOVE = 131072;
79     private static final int CF_POS_AFTER = 65536;
80     private static final int CF_POS_BEFORE = 524288;
81     private static final int CF_POS_BELOW = 262144;
82     private static final int CF_SHIFTER = 268435456; // flag to speed up comparing
83     private static final int CF_SPLIT_VOWEL = 33554432;
84     private static final int C1 = CC_CONSONANT + CF_CONSONANT;
85     private static final int C2 = CC_CONSONANT2 + CF_CONSONANT;
86     private static final int C3 = CC_CONSONANT3 + CF_CONSONANT;
87     private static final int CO = CC_COENG + CF_COENG + CF_DOTTED_CIRCLE;
88     private static final int CS = CC_CONSONANT_SHIFTER + CF_DOTTED_CIRCLE + CF_SHIFTER;
89     private static final int DA = CC_DEPENDENT_VOWEL + CF_POS_ABOVE + CF_DOTTED_CIRCLE + CF_ABOVE_VOWEL;
90     private static final int DB = CC_DEPENDENT_VOWEL + CF_POS_BELOW + CF_DOTTED_CIRCLE;
91     private static final int DL = CC_DEPENDENT_VOWEL + CF_POS_BEFORE + CF_DOTTED_CIRCLE;
92     private static final int DR = CC_DEPENDENT_VOWEL + CF_POS_AFTER + CF_DOTTED_CIRCLE;
93     private static final int RB = CC_ROBAT + CF_POS_ABOVE + CF_DOTTED_CIRCLE;
94     private static final int SA = CC_SIGN_ABOVE + CF_DOTTED_CIRCLE + CF_POS_ABOVE;
95     private static final int SP = CC_SIGN_AFTER + CF_DOTTED_CIRCLE + CF_POS_AFTER;
96     private static final int VA = DA + CF_SPLIT_VOWEL;
97     private static final int VR = DR + CF_SPLIT_VOWEL;
98     // flag for a split vowel -> the first part is added in front of the syllable
99     private static final char BA = '\u1794';
100     private static final char COENG = '\u17D2';
101     private static final String CONYO = Character.toString('\u17D2').concat(Character.toString('\u1789'));
102     private static final String CORO = Character.toString('\u17D2').concat(Character.toString('\u179A'));
103 
104     private int[] khmerCharClasses = new int[] {
105             C1, C1, C1, C3, C1, C1, C1, C1, C3, C1, C1, C1, C1, C3, C1, C1, C1, C1, C1, C1, C3,
106             C1, C1, C1, C1, C3, C2, C1, C1, C1, C3, C3, C1, C3, C1, C1, C1, C1, C1, C1, C1, C1,
107             C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, DR, DR, DR, DA, DA, DA, DA, DB, DB, DB, VA,
108             VR, VR, DL, DL, DL, VR, VR, SA, SP, SP, CS, CS, SA, RB, SA, SA, SA, SA, SA, CO, SA,
109             XX, XX, XX, XX, XX, XX, XX, XX, XX, SA, XX, XX
110     };
111     private short[][] khmerStateTable = new short[][] {
112             {
113                     1, 2, 2, 2, 1, 1, 1, 6, 1, 1, 1, 2
114             }, {
115             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
116     }, {
117             -1, -1, -1, -1, 3, 4, 5, 6, 16, 17, 1, -1
118     }, {
119             -1, -1, -1, -1, -1, 4, -1, -1, 16, -1, -1, -1
120     }, {
121             -1, -1, -1, -1, 15, -1, -1, 6, 16, 17, 1, 14
122     }, {
123             -1, -1, -1, -1, -1, -1, -1, -1, 20, -1, 1, -1
124     }, {
125             -1, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, -1
126     }, {
127             -1, -1, -1, -1, 12, 13, -1, 10, 16, 17, 1, 14
128     }, {
129             -1, -1, -1, -1, 12, 13, -1, -1, 16, 17, 1, 14
130     }, {
131             -1, -1, -1, -1, 12, 13, -1, 10, 16, 17, 1, 14
132     }, {
133             -1, 11, 11, 11, -1, -1, -1, -1, -1, -1, -1, -1
134     }, {
135             -1, -1, -1, -1, 15, -1, -1, -1, 16, 17, 1, 14
136     }, {
137             -1, -1, -1, -1, -1, 13, -1, -1, 16, -1, -1, -1
138     }, {
139             -1, -1, -1, -1, 15, -1, -1, -1, 16, 17, 1, 14
140     }, {
141             -1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1
142     }, {
143             -1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1
144     }, {
145             -1, -1, -1, -1, -1, -1, -1, -1, -1, 17, 1, 18
146     }, {
147             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 18
148     }, {
149             -1, -1, -1, -1, -1, -1, -1, 19, -1, -1, -1, -1
150     }, {
151             -1, 1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1
152     }, {
153             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1
154     }
155     };
156     private static final char MARK = '\u17EA';
157     private static final char NYO = '\u1789';
158     private static final char SA_C = '\u179F';
159     private static final char SRAAA = '\u17B6';
160     private static final char SRAAU = '\u17C5';
161     private static final char SRAE = '\u17C1';
162     private static final char SRAIE = '\u17C0';
163     private static final char SRAII = '\u17B8';
164     private static final char SRAOE = '\u17BE';
165     private static final char SRAOO = '\u17C4';
166     private static final char SRAU = '\u17BB';
167     private static final char SRAYA = '\u17BF';
168     private static final char TRIISAP = '\u17CA';
169     private static final char YO = '\u1799';
170 
strEcombining(final char chrInput)171     private char strEcombining(final char chrInput) {
172         char retChar = ' ';
173         if (chrInput == SRAOE) {
174             retChar = SRAII;
175         } else if (chrInput == SRAYA) {
176             retChar = SRAYA;
177         } else if (chrInput == SRAIE) {
178             retChar = SRAIE;
179         } else if (chrInput == SRAOO) {
180             retChar = SRAAA;
181         } else if (chrInput == SRAAU) {
182             retChar = SRAAU;
183         }
184 
185         return retChar;
186     }
187 
188     // Gets the charactor class.
getCharClass(final char uniChar)189     private int getCharClass(final char uniChar) {
190         int retValue = 0;
191         int ch;
192         ch = uniChar;
193         if (ch > 255) {
194             if (ch >= '\u1780') {
195                 ch -= '\u1780';
196                 if (ch < khmerCharClasses.length) {
197                     retValue = khmerCharClasses[ch];
198                 }
199             }
200         }
201         return retValue;
202     }
203 
204     /**
205      * Re-order Khmer unicode for display with Khmer.ttf file on Android.
206      * @param strInput Khmer unicode string.
207      * @return String after render.
208      */
render(final String strInput)209     public String render(final String strInput) {
210         //Given an input String of unicode cluster to reorder.
211         //The return is the visual based cluster (legacy style) String.
212 
213         int cursor = 0;
214         short state = 0;
215         int charCount = strInput.length();
216         StringBuilder result = new StringBuilder();
217 
218         while (cursor < charCount) {
219             String reserved = "";
220             String signAbove = "";
221             String signAfter = "";
222             String base = "";
223             String robat = "";
224             String shifter = "";
225             String vowelBefore = "";
226             String vowelBelow = "";
227             String vowelAbove = "";
228             String vowelAfter = "";
229             boolean coeng = false;
230             String cluster;
231 
232             String coeng1 = "";
233             String coeng2 = "";
234 
235             boolean shifterAfterCoeng = false;
236 
237             while (cursor < charCount) {
238                 char curChar = strInput.charAt(cursor);
239                 int kChar = getCharClass(curChar);
240                 int charClass = kChar & CF_CLASS_MASK;
241                 try {
242                     state = khmerStateTable[state][charClass];
243                 } catch (Exception ex) {
244                     state = -1;
245                 }
246 
247                 if (state < 0) {
248                     break;
249                 }
250 
251                 //collect variable for cluster here
252 
253                 if (kChar == XX) {
254                     reserved = Character.toString(curChar);
255                 } else if (kChar == SA) { //Sign placed above the base
256                     signAbove = Character.toString(curChar);
257                 } else if (kChar == SP) { //Sign placed after the base
258                     signAfter = Character.toString(curChar);
259                 } else if (kChar == C1 || kChar == C2 || kChar == C3) { //Consonant
260                     if (coeng) {
261                         if ("".equalsIgnoreCase(coeng1)) {
262                             coeng1 = Character.toString(COENG).concat(Character.toString(curChar));
263                         } else {
264                             coeng2 = Character.toString(COENG).concat(Character.toString(curChar));
265                         }
266                         coeng = false;
267                     } else {
268                         base = Character.toString(curChar);
269                     }
270                 } else if (kChar == RB) { //Khmer sign robat u17CC
271                     robat = Character.toString(curChar);
272                 } else if (kChar == CS) { //Consonant-shifter
273                     if (!"".equalsIgnoreCase(coeng1)) {
274                         shifterAfterCoeng = true;
275                     }
276 
277                     shifter = Character.toString(curChar);
278                 } else if (kChar == DL) { //Dependent vowel placed before the base
279                     vowelBefore = Character.toString(curChar);
280                 } else if (kChar == DB) { //Dependent vowel placed below the base
281                     vowelBelow = Character.toString(curChar);
282                 } else if (kChar == DA) { //Dependent vowel placed above the base
283                     vowelAbove = Character.toString(curChar);
284                 } else if (kChar == DR) { //Dependent vowel placed behind the base
285                     vowelAfter = Character.toString(curChar);
286                 } else if (kChar == CO) { //Khmer combining mark COENG
287                     coeng = true;
288                 } else if (kChar == VA) { //Khmer split vowel, see da
289                     vowelBefore = Character.toString(SRAE);
290                     vowelAbove = Character.toString(strEcombining(curChar));
291                 } else if (kChar == VR) { //Khmer split vowel, see dr
292                     vowelBefore = Character.toString(SRAE);
293                     vowelAfter = Character.toString(strEcombining(curChar));
294                 }
295 
296                 cursor += 1;
297             }
298             // end of while (a cluster has found)
299 
300             // logic when cluster has coeng
301             // should coeng be located on left side
302             String coengBefore = "";
303             if (CORO.equalsIgnoreCase(coeng1)) {
304                 coengBefore = coeng1;
305                 coeng1 = "";
306             } else if (CORO.equalsIgnoreCase(coeng2)) {
307                 coengBefore = coeng2;
308                 coeng2 = "";
309             }
310 
311             //logic of shifter with base character
312             if (!"".equalsIgnoreCase(base) && !"".equalsIgnoreCase(shifter)) {
313                 if (!"".equalsIgnoreCase(vowelAbove)) {
314                     shifter = "";
315                     vowelBelow = Character.toString(SRAU);
316                 }
317             }
318 
319             // uncomplete coeng
320             if (coeng && "".equalsIgnoreCase(coeng1)) {
321                 coeng1 = Character.toString(COENG);
322             } else if (coeng && "".equalsIgnoreCase(coeng2)) {
323                 coeng2 = Character.toString(MARK).concat(Character.toString(COENG));
324             }
325 
326             //place of shifter
327             String shifter1 = "";
328             String shifter2 = "";
329 
330             if (shifterAfterCoeng) {
331                 shifter2 = shifter;
332             } else {
333                 shifter1 = shifter;
334             }
335 
336             boolean specialCaseBA = false;
337             String strMARKSRAAA = Character.toString(MARK).concat(Character.toString(SRAAA));
338             String strMARKSRAAU = Character.toString(MARK).concat(Character.toString(SRAAU));
339 
340             if (Character.toString(BA).equalsIgnoreCase(base)
341                     && (Character.toString(SRAAA).equalsIgnoreCase(vowelAfter)
342                     || Character.toString(SRAAU).equalsIgnoreCase(vowelAfter)
343                     || strMARKSRAAA.equalsIgnoreCase(vowelAfter) || strMARKSRAAU.equalsIgnoreCase(vowelAfter))) {
344                 specialCaseBA = true;
345 
346                 if (!"".equalsIgnoreCase(coeng1)) {
347                     String coeng1Complete = coeng1.substring(0, coeng1.length() - 1);
348                     if (Character.toString(BA).equalsIgnoreCase(coeng1Complete)
349                             || Character.toString(YO).equalsIgnoreCase(coeng1Complete)
350                             || Character.toString(SA_C).equalsIgnoreCase(coeng1Complete)) {
351                         specialCaseBA = false;
352 
353                     }
354                 }
355             }
356 
357             // cluster formation
358             if (specialCaseBA) {
359                 cluster = vowelBefore + coengBefore + base + vowelAfter + robat + shifter1 + coeng1 + coeng2
360                         + shifter2 + vowelBelow + vowelAbove + signAbove + signAfter;
361             } else {
362                 cluster = vowelBefore + coengBefore + base + robat + shifter1 + coeng1 + coeng2 + shifter2
363                         + vowelBelow + vowelAbove + vowelAfter + signAbove + signAfter;
364             }
365             result.append(cluster + reserved);
366             state = 0;
367             //end of while
368         }
369 
370         return result.toString();
371     }
372 }
373