1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 /* $Id$ */ 19 package org.apache.fop.complexscripts.scripts; 20 21 /** 22 * Integrating existing rendering of Android for Khmer Unicode to iText 23 * The class from the rendering of Mobile Project, Android from Nokor Group (AKA: Nokor-IT) 24 * The understanding also taking from the Khmum Browser that would lead to build this helper 25 * (Comment above by Pongsametrey S. <metrey@osify.com>) 26 * Thanks for Nokor Group & Mr. Pengleng HUOT 27 * 28 * author sok.pongsametrey 29 * @version 1.0 30 */ 31 32 /** 33 * UnicodeRender Class. 34 * author huot.pengleng 35 * 36 * simple classes, they are used in the state table (in this file) to control the length of a syllable 37 * they are also used to know where a character should be placed (location in reference to the base character) 38 * and also to know if a character, when independently displayed, should be displayed with a dotted-circle to 39 * indicate error in syllable construction 40 * Character class tables 41 * xx character does not combine into syllable, such as numbers, puntuation marks, non-Khmer signs... 42 * sa Sign placed above the base 43 * sp Sign placed after the base 44 * c1 Consonant of type 1 or independent vowel (independent vowels behave as type 1 consonants) 45 * c2 Consonant of type 2 (only RO) 46 * c3 Consonant of type 3 47 * rb Khmer sign robat u17CC. combining mark for subscript consonants 48 * cd Consonant-shifter 49 * dl Dependent vowel placed before the base (left of the base) 50 * db Dependent vowel placed below the base 51 * da Dependent vowel placed above the base 52 * dr Dependent vowel placed behind the base (right of the base) 53 * co Khmer combining mark COENG u17D2, combines with the consonant or independent vowel following 54 * it to create a subscript consonant or independent vowel 55 * va Khmer split vowel in wich the first part is before the base and the second one above the base 56 * vr Khmer split vowel in wich the first part is before the base and the second one behind (right of) the base 57 * 58 */ 59 public class KhmerRenderer { 60 61 private static final int XX = 0; 62 private static final int CC_COENG = 7; // Subscript consonant combining character 63 private static final int CC_CONSONANT = 1; // Consonant of type 1 or independent vowel 64 private static final int CC_CONSONANT_SHIFTER = 5; 65 private static final int CC_CONSONANT2 = 2; // Consonant of type 2 66 private static final int CC_CONSONANT3 = 3; // Consonant of type 3 67 private static final int CC_DEPENDENT_VOWEL = 8; 68 private static final int CC_ROBAT = 6; // Khmer special diacritic accent -treated differently in state table 69 private static final int CC_SIGN_ABOVE = 9; 70 private static final int CC_SIGN_AFTER = 10; 71 private static final int CF_ABOVE_VOWEL = 536870912; // flag to speed up comparing 72 private static final int CF_CLASS_MASK = 65535; 73 private static final int CF_COENG = 134217728; // flag to speed up comparing 74 private static final int CF_CONSONANT = 16777216; // flag to speed up comparing 75 private static final int CF_DOTTED_CIRCLE = 67108864; 76 77 // add a dotted circle if a character with this flag is the first in a syllable 78 private static final int CF_POS_ABOVE = 131072; 79 private static final int CF_POS_AFTER = 65536; 80 private static final int CF_POS_BEFORE = 524288; 81 private static final int CF_POS_BELOW = 262144; 82 private static final int CF_SHIFTER = 268435456; // flag to speed up comparing 83 private static final int CF_SPLIT_VOWEL = 33554432; 84 private static final int C1 = CC_CONSONANT + CF_CONSONANT; 85 private static final int C2 = CC_CONSONANT2 + CF_CONSONANT; 86 private static final int C3 = CC_CONSONANT3 + CF_CONSONANT; 87 private static final int CO = CC_COENG + CF_COENG + CF_DOTTED_CIRCLE; 88 private static final int CS = CC_CONSONANT_SHIFTER + CF_DOTTED_CIRCLE + CF_SHIFTER; 89 private static final int DA = CC_DEPENDENT_VOWEL + CF_POS_ABOVE + CF_DOTTED_CIRCLE + CF_ABOVE_VOWEL; 90 private static final int DB = CC_DEPENDENT_VOWEL + CF_POS_BELOW + CF_DOTTED_CIRCLE; 91 private static final int DL = CC_DEPENDENT_VOWEL + CF_POS_BEFORE + CF_DOTTED_CIRCLE; 92 private static final int DR = CC_DEPENDENT_VOWEL + CF_POS_AFTER + CF_DOTTED_CIRCLE; 93 private static final int RB = CC_ROBAT + CF_POS_ABOVE + CF_DOTTED_CIRCLE; 94 private static final int SA = CC_SIGN_ABOVE + CF_DOTTED_CIRCLE + CF_POS_ABOVE; 95 private static final int SP = CC_SIGN_AFTER + CF_DOTTED_CIRCLE + CF_POS_AFTER; 96 private static final int VA = DA + CF_SPLIT_VOWEL; 97 private static final int VR = DR + CF_SPLIT_VOWEL; 98 // flag for a split vowel -> the first part is added in front of the syllable 99 private static final char BA = '\u1794'; 100 private static final char COENG = '\u17D2'; 101 private static final String CONYO = Character.toString('\u17D2').concat(Character.toString('\u1789')); 102 private static final String CORO = Character.toString('\u17D2').concat(Character.toString('\u179A')); 103 104 private int[] khmerCharClasses = new int[] { 105 C1, C1, C1, C3, C1, C1, C1, C1, C3, C1, C1, C1, C1, C3, C1, C1, C1, C1, C1, C1, C3, 106 C1, C1, C1, C1, C3, C2, C1, C1, C1, C3, C3, C1, C3, C1, C1, C1, C1, C1, C1, C1, C1, 107 C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, DR, DR, DR, DA, DA, DA, DA, DB, DB, DB, VA, 108 VR, VR, DL, DL, DL, VR, VR, SA, SP, SP, CS, CS, SA, RB, SA, SA, SA, SA, SA, CO, SA, 109 XX, XX, XX, XX, XX, XX, XX, XX, XX, SA, XX, XX 110 }; 111 private short[][] khmerStateTable = new short[][] { 112 { 113 1, 2, 2, 2, 1, 1, 1, 6, 1, 1, 1, 2 114 }, { 115 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 116 }, { 117 -1, -1, -1, -1, 3, 4, 5, 6, 16, 17, 1, -1 118 }, { 119 -1, -1, -1, -1, -1, 4, -1, -1, 16, -1, -1, -1 120 }, { 121 -1, -1, -1, -1, 15, -1, -1, 6, 16, 17, 1, 14 122 }, { 123 -1, -1, -1, -1, -1, -1, -1, -1, 20, -1, 1, -1 124 }, { 125 -1, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, -1 126 }, { 127 -1, -1, -1, -1, 12, 13, -1, 10, 16, 17, 1, 14 128 }, { 129 -1, -1, -1, -1, 12, 13, -1, -1, 16, 17, 1, 14 130 }, { 131 -1, -1, -1, -1, 12, 13, -1, 10, 16, 17, 1, 14 132 }, { 133 -1, 11, 11, 11, -1, -1, -1, -1, -1, -1, -1, -1 134 }, { 135 -1, -1, -1, -1, 15, -1, -1, -1, 16, 17, 1, 14 136 }, { 137 -1, -1, -1, -1, -1, 13, -1, -1, 16, -1, -1, -1 138 }, { 139 -1, -1, -1, -1, 15, -1, -1, -1, 16, 17, 1, 14 140 }, { 141 -1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1 142 }, { 143 -1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1 144 }, { 145 -1, -1, -1, -1, -1, -1, -1, -1, -1, 17, 1, 18 146 }, { 147 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 18 148 }, { 149 -1, -1, -1, -1, -1, -1, -1, 19, -1, -1, -1, -1 150 }, { 151 -1, 1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1 152 }, { 153 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1 154 } 155 }; 156 private static final char MARK = '\u17EA'; 157 private static final char NYO = '\u1789'; 158 private static final char SA_C = '\u179F'; 159 private static final char SRAAA = '\u17B6'; 160 private static final char SRAAU = '\u17C5'; 161 private static final char SRAE = '\u17C1'; 162 private static final char SRAIE = '\u17C0'; 163 private static final char SRAII = '\u17B8'; 164 private static final char SRAOE = '\u17BE'; 165 private static final char SRAOO = '\u17C4'; 166 private static final char SRAU = '\u17BB'; 167 private static final char SRAYA = '\u17BF'; 168 private static final char TRIISAP = '\u17CA'; 169 private static final char YO = '\u1799'; 170 strEcombining(final char chrInput)171 private char strEcombining(final char chrInput) { 172 char retChar = ' '; 173 if (chrInput == SRAOE) { 174 retChar = SRAII; 175 } else if (chrInput == SRAYA) { 176 retChar = SRAYA; 177 } else if (chrInput == SRAIE) { 178 retChar = SRAIE; 179 } else if (chrInput == SRAOO) { 180 retChar = SRAAA; 181 } else if (chrInput == SRAAU) { 182 retChar = SRAAU; 183 } 184 185 return retChar; 186 } 187 188 // Gets the charactor class. getCharClass(final char uniChar)189 private int getCharClass(final char uniChar) { 190 int retValue = 0; 191 int ch; 192 ch = uniChar; 193 if (ch > 255) { 194 if (ch >= '\u1780') { 195 ch -= '\u1780'; 196 if (ch < khmerCharClasses.length) { 197 retValue = khmerCharClasses[ch]; 198 } 199 } 200 } 201 return retValue; 202 } 203 204 /** 205 * Re-order Khmer unicode for display with Khmer.ttf file on Android. 206 * @param strInput Khmer unicode string. 207 * @return String after render. 208 */ render(final String strInput)209 public String render(final String strInput) { 210 //Given an input String of unicode cluster to reorder. 211 //The return is the visual based cluster (legacy style) String. 212 213 int cursor = 0; 214 short state = 0; 215 int charCount = strInput.length(); 216 StringBuilder result = new StringBuilder(); 217 218 while (cursor < charCount) { 219 String reserved = ""; 220 String signAbove = ""; 221 String signAfter = ""; 222 String base = ""; 223 String robat = ""; 224 String shifter = ""; 225 String vowelBefore = ""; 226 String vowelBelow = ""; 227 String vowelAbove = ""; 228 String vowelAfter = ""; 229 boolean coeng = false; 230 String cluster; 231 232 String coeng1 = ""; 233 String coeng2 = ""; 234 235 boolean shifterAfterCoeng = false; 236 237 while (cursor < charCount) { 238 char curChar = strInput.charAt(cursor); 239 int kChar = getCharClass(curChar); 240 int charClass = kChar & CF_CLASS_MASK; 241 try { 242 state = khmerStateTable[state][charClass]; 243 } catch (Exception ex) { 244 state = -1; 245 } 246 247 if (state < 0) { 248 break; 249 } 250 251 //collect variable for cluster here 252 253 if (kChar == XX) { 254 reserved = Character.toString(curChar); 255 } else if (kChar == SA) { //Sign placed above the base 256 signAbove = Character.toString(curChar); 257 } else if (kChar == SP) { //Sign placed after the base 258 signAfter = Character.toString(curChar); 259 } else if (kChar == C1 || kChar == C2 || kChar == C3) { //Consonant 260 if (coeng) { 261 if ("".equalsIgnoreCase(coeng1)) { 262 coeng1 = Character.toString(COENG).concat(Character.toString(curChar)); 263 } else { 264 coeng2 = Character.toString(COENG).concat(Character.toString(curChar)); 265 } 266 coeng = false; 267 } else { 268 base = Character.toString(curChar); 269 } 270 } else if (kChar == RB) { //Khmer sign robat u17CC 271 robat = Character.toString(curChar); 272 } else if (kChar == CS) { //Consonant-shifter 273 if (!"".equalsIgnoreCase(coeng1)) { 274 shifterAfterCoeng = true; 275 } 276 277 shifter = Character.toString(curChar); 278 } else if (kChar == DL) { //Dependent vowel placed before the base 279 vowelBefore = Character.toString(curChar); 280 } else if (kChar == DB) { //Dependent vowel placed below the base 281 vowelBelow = Character.toString(curChar); 282 } else if (kChar == DA) { //Dependent vowel placed above the base 283 vowelAbove = Character.toString(curChar); 284 } else if (kChar == DR) { //Dependent vowel placed behind the base 285 vowelAfter = Character.toString(curChar); 286 } else if (kChar == CO) { //Khmer combining mark COENG 287 coeng = true; 288 } else if (kChar == VA) { //Khmer split vowel, see da 289 vowelBefore = Character.toString(SRAE); 290 vowelAbove = Character.toString(strEcombining(curChar)); 291 } else if (kChar == VR) { //Khmer split vowel, see dr 292 vowelBefore = Character.toString(SRAE); 293 vowelAfter = Character.toString(strEcombining(curChar)); 294 } 295 296 cursor += 1; 297 } 298 // end of while (a cluster has found) 299 300 // logic when cluster has coeng 301 // should coeng be located on left side 302 String coengBefore = ""; 303 if (CORO.equalsIgnoreCase(coeng1)) { 304 coengBefore = coeng1; 305 coeng1 = ""; 306 } else if (CORO.equalsIgnoreCase(coeng2)) { 307 coengBefore = coeng2; 308 coeng2 = ""; 309 } 310 311 //logic of shifter with base character 312 if (!"".equalsIgnoreCase(base) && !"".equalsIgnoreCase(shifter)) { 313 if (!"".equalsIgnoreCase(vowelAbove)) { 314 shifter = ""; 315 vowelBelow = Character.toString(SRAU); 316 } 317 } 318 319 // uncomplete coeng 320 if (coeng && "".equalsIgnoreCase(coeng1)) { 321 coeng1 = Character.toString(COENG); 322 } else if (coeng && "".equalsIgnoreCase(coeng2)) { 323 coeng2 = Character.toString(MARK).concat(Character.toString(COENG)); 324 } 325 326 //place of shifter 327 String shifter1 = ""; 328 String shifter2 = ""; 329 330 if (shifterAfterCoeng) { 331 shifter2 = shifter; 332 } else { 333 shifter1 = shifter; 334 } 335 336 boolean specialCaseBA = false; 337 String strMARKSRAAA = Character.toString(MARK).concat(Character.toString(SRAAA)); 338 String strMARKSRAAU = Character.toString(MARK).concat(Character.toString(SRAAU)); 339 340 if (Character.toString(BA).equalsIgnoreCase(base) 341 && (Character.toString(SRAAA).equalsIgnoreCase(vowelAfter) 342 || Character.toString(SRAAU).equalsIgnoreCase(vowelAfter) 343 || strMARKSRAAA.equalsIgnoreCase(vowelAfter) || strMARKSRAAU.equalsIgnoreCase(vowelAfter))) { 344 specialCaseBA = true; 345 346 if (!"".equalsIgnoreCase(coeng1)) { 347 String coeng1Complete = coeng1.substring(0, coeng1.length() - 1); 348 if (Character.toString(BA).equalsIgnoreCase(coeng1Complete) 349 || Character.toString(YO).equalsIgnoreCase(coeng1Complete) 350 || Character.toString(SA_C).equalsIgnoreCase(coeng1Complete)) { 351 specialCaseBA = false; 352 353 } 354 } 355 } 356 357 // cluster formation 358 if (specialCaseBA) { 359 cluster = vowelBefore + coengBefore + base + vowelAfter + robat + shifter1 + coeng1 + coeng2 360 + shifter2 + vowelBelow + vowelAbove + signAbove + signAfter; 361 } else { 362 cluster = vowelBefore + coengBefore + base + robat + shifter1 + coeng1 + coeng2 + shifter2 363 + vowelBelow + vowelAbove + vowelAfter + signAbove + signAfter; 364 } 365 result.append(cluster + reserved); 366 state = 0; 367 //end of while 368 } 369 370 return result.toString(); 371 } 372 } 373