1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 /* $Id$ */ 19 20 package org.apache.fop.complexscripts.scripts; 21 22 import org.apache.commons.logging.Log; 23 import org.apache.commons.logging.LogFactory; 24 25 import org.apache.fop.complexscripts.util.CharAssociation; 26 import org.apache.fop.complexscripts.util.GlyphSequence; 27 28 // CSOFF: LineLengthCheck 29 30 /** 31 * <p>The <code>TamilScriptProcessor</code> class implements a script processor for 32 * performing glyph substitution and positioning operations on content associated with the Tamil script.</p> 33 * 34 * <p>This work was originally authored by Glenn Adams (gadams@apache.org).</p> 35 */ 36 public class TamilScriptProcessor extends IndicScriptProcessor { 37 38 /** logging instance */ 39 private static final Log log = LogFactory.getLog(TamilScriptProcessor.class); 40 TamilScriptProcessor(String script)41 TamilScriptProcessor(String script) { 42 super(script); 43 } 44 45 @Override getSyllabizerClass()46 protected Class<? extends TamilSyllabizer> getSyllabizerClass() { 47 return TamilSyllabizer.class; 48 } 49 50 @Override 51 // find rightmost pre-base matra findPreBaseMatra(GlyphSequence gs)52 protected int findPreBaseMatra(GlyphSequence gs) { 53 int ng = gs.getGlyphCount(); 54 int lk = -1; 55 for (int i = ng; i > 0; i--) { 56 int k = i - 1; 57 if (containsPreBaseMatra(gs, k)) { 58 lk = k; 59 break; 60 } 61 } 62 return lk; 63 } 64 65 @Override 66 // find leftmost pre-base matra target, starting from source findPreBaseMatraTarget(GlyphSequence gs, int source)67 protected int findPreBaseMatraTarget(GlyphSequence gs, int source) { 68 int ng = gs.getGlyphCount(); 69 int lk = -1; 70 for (int i = (source < ng) ? source : ng; i > 0; i--) { 71 int k = i - 1; 72 if (containsConsonant(gs, k)) { 73 if (containsHalfConsonant(gs, k)) { 74 lk = k; 75 } else if (lk == -1) { 76 lk = k; 77 } else { 78 break; 79 } 80 } 81 } 82 return lk; 83 } 84 containsPreBaseMatra(GlyphSequence gs, int k)85 private static boolean containsPreBaseMatra(GlyphSequence gs, int k) { 86 CharAssociation a = gs.getAssociation(k); 87 int[] ca = gs.getCharacterArray(false); 88 for (int i = a.getStart(), e = a.getEnd(); i < e; i++) { 89 if (isPreM(ca [ i ])) { 90 return true; 91 } 92 } 93 return false; 94 } 95 containsConsonant(GlyphSequence gs, int k)96 private static boolean containsConsonant(GlyphSequence gs, int k) { 97 CharAssociation a = gs.getAssociation(k); 98 int[] ca = gs.getCharacterArray(false); 99 for (int i = a.getStart(), e = a.getEnd(); i < e; i++) { 100 if (isC(ca [ i ])) { 101 return true; 102 } 103 } 104 return false; 105 } 106 containsHalfConsonant(GlyphSequence gs, int k)107 private static boolean containsHalfConsonant(GlyphSequence gs, int k) { 108 Boolean half = (Boolean) gs.getAssociation(k).getPredication("half"); 109 return (half != null) ? half : false; 110 } 111 112 @Override findReph(GlyphSequence gs)113 protected int findReph(GlyphSequence gs) { 114 int ng = gs.getGlyphCount(); 115 int li = -1; 116 for (int i = 0; i < ng; i++) { 117 if (containsReph(gs, i)) { 118 li = i; 119 break; 120 } 121 } 122 return li; 123 } 124 125 @Override findRephTarget(GlyphSequence gs, int source)126 protected int findRephTarget(GlyphSequence gs, int source) { 127 int ng = gs.getGlyphCount(); 128 int c1 = -1; 129 int c2 = -1; 130 // first candidate target is after first non-half consonant 131 for (int i = 0; i < ng; i++) { 132 if ((i != source) && containsConsonant(gs, i)) { 133 if (!containsHalfConsonant(gs, i)) { 134 c1 = i + 1; 135 break; 136 } 137 } 138 } 139 // second candidate target is after last non-prebase matra after first candidate or before first syllable or vedic mark 140 for (int i = (c1 >= 0) ? c1 : 0; i < ng; i++) { 141 if (containsMatra(gs, i) && !containsPreBaseMatra(gs, i)) { 142 c2 = i + 1; 143 } else if (containsOtherMark(gs, i)) { 144 c2 = i; 145 break; 146 } 147 } 148 if (c2 >= 0) { 149 return c2; 150 } else if (c1 >= 0) { 151 return c1; 152 } else { 153 return source; 154 } 155 } 156 containsReph(GlyphSequence gs, int k)157 private static boolean containsReph(GlyphSequence gs, int k) { 158 Boolean rphf = (Boolean) gs.getAssociation(k).getPredication("rphf"); 159 return (rphf != null) ? rphf : false; 160 } 161 containsMatra(GlyphSequence gs, int k)162 private static boolean containsMatra(GlyphSequence gs, int k) { 163 CharAssociation a = gs.getAssociation(k); 164 int[] ca = gs.getCharacterArray(false); 165 for (int i = a.getStart(), e = a.getEnd(); i < e; i++) { 166 if (isM(ca [ i ])) { 167 return true; 168 } 169 } 170 return false; 171 } 172 containsOtherMark(GlyphSequence gs, int k)173 private static boolean containsOtherMark(GlyphSequence gs, int k) { 174 CharAssociation a = gs.getAssociation(k); 175 int[] ca = gs.getCharacterArray(false); 176 for (int i = a.getStart(), e = a.getEnd(); i < e; i++) { 177 switch (typeOf(ca [ i ])) { 178 case C_T: // tone (e.g., udatta, anudatta) 179 case C_A: // accent (e.g., acute, grave) 180 case C_O: // other (e.g., candrabindu, anusvara, visarga, etc) 181 return true; 182 default: 183 break; 184 } 185 } 186 return false; 187 } 188 189 private static class TamilSyllabizer extends DefaultSyllabizer { TamilSyllabizer(String script, String language)190 TamilSyllabizer(String script, String language) { 191 super(script, language); 192 } 193 @Override 194 // | C ... findStartOfSyllable(int[] ca, int s, int e)195 protected int findStartOfSyllable(int[] ca, int s, int e) { 196 if ((s < 0) || (s >= e)) { 197 return -1; 198 } else { 199 while (s < e) { 200 int c = ca [ s ]; 201 if (isC(c)) { 202 break; 203 } else { 204 s++; 205 } 206 } 207 return s; 208 } 209 } 210 @Override 211 // D* L? | ... findEndOfSyllable(int[] ca, int s, int e)212 protected int findEndOfSyllable(int[] ca, int s, int e) { 213 if ((s < 0) || (s >= e)) { 214 return -1; 215 } else { 216 int nd = 0; 217 int nl = 0; 218 int i; 219 // consume dead consonants 220 while ((i = isDeadConsonant(ca, s, e)) > s) { 221 s = i; 222 nd++; 223 } 224 // consume zero or one live consonant 225 if ((i = isLiveConsonant(ca, s, e)) > s) { 226 s = i; 227 nl++; 228 } 229 return ((nd > 0) || (nl > 0)) ? s : -1; 230 } 231 } 232 // D := ( C N? H )? isDeadConsonant(int[] ca, int s, int e)233 private int isDeadConsonant(int[] ca, int s, int e) { 234 if (s < 0) { 235 return -1; 236 } else { 237 int c; 238 int i = 0; 239 int nc = 0; 240 int nh = 0; 241 do { 242 // C 243 if ((s + i) < e) { 244 c = ca [ s + i ]; 245 if (isC(c)) { 246 i++; 247 nc++; 248 } else { 249 break; 250 } 251 } 252 // N? 253 if ((s + i) < e) { 254 c = ca [ s + 1 ]; 255 if (isN(c)) { 256 i++; 257 } 258 } 259 // H 260 if ((s + i) < e) { 261 c = ca [ s + i ]; 262 if (isH(c)) { 263 i++; 264 nh++; 265 } else { 266 break; 267 } 268 } 269 } while (false); 270 return (nc > 0) && (nh > 0) ? s + i : -1; 271 } 272 } 273 // L := ( (C|V) N? X* )?; where X = ( MATRA | ACCENT MARK | TONE MARK | OTHER MARK ) isLiveConsonant(int[] ca, int s, int e)274 private int isLiveConsonant(int[] ca, int s, int e) { 275 if (s < 0) { 276 return -1; 277 } else { 278 int c; 279 int i = 0; 280 int nc = 0; 281 int nv = 0; 282 int nx = 0; 283 do { 284 // C 285 if ((s + i) < e) { 286 c = ca [ s + i ]; 287 if (isC(c)) { 288 i++; 289 nc++; 290 } else if (isV(c)) { 291 i++; 292 nv++; 293 } else { 294 break; 295 } 296 } 297 // N? 298 if ((s + i) < e) { 299 c = ca [ s + i ]; 300 if (isN(c)) { 301 i++; 302 } 303 } 304 // X* 305 while ((s + i) < e) { 306 c = ca [ s + i ]; 307 if (isX(c)) { 308 i++; 309 nx++; 310 } else { 311 break; 312 } 313 } 314 } while (false); 315 // if no X but has H, then ignore C|I 316 if (nx == 0) { 317 if ((s + i) < e) { 318 c = ca [ s + i ]; 319 if (isH(c)) { 320 if (nc > 0) { 321 nc--; 322 } else if (nv > 0) { 323 nv--; 324 } 325 } 326 } 327 } 328 return ((nc > 0) || (nv > 0)) ? s + i : -1; 329 } 330 } 331 } 332 333 // tamil character types 334 static final short C_U = 0; // unassigned 335 static final short C_C = 1; // consonant 336 static final short C_V = 2; // vowel 337 static final short C_M = 3; // vowel sign (matra) 338 static final short C_S = 4; // symbol or sign 339 static final short C_T = 5; // tone mark 340 static final short C_A = 6; // accent mark 341 static final short C_P = 7; // punctuation 342 static final short C_D = 8; // digit 343 static final short C_H = 9; // halant (virama) 344 static final short C_O = 10; // other signs 345 static final short C_N = 0x0100; // nukta(ized) 346 static final short C_R = 0x0200; // reph(ized) 347 static final short C_PRE = 0x0400; // pre-base 348 static final short C_POST = 0x1000; // post-base 349 static final short C_WRAP = C_PRE | C_POST; // wrap (two part) vowel 350 static final short C_M_TYPE = 0x00FF; // type mask 351 static final short C_M_FLAGS = 0x7F00; // flag mask 352 // tamil block range 353 static final int CCA_START = 0x0B80; // first code point mapped by cca 354 static final int CCA_END = 0x0C00; // last code point + 1 mapped by cca 355 // tamil character type lookups 356 static final short[] CCA = { 357 C_U, // 0x0B80 // 358 C_U, // 0x0B81 // 359 C_O, // 0x0B82 // ANUSVARA 360 C_O, // 0x0B83 // VISARGA 361 C_U, // 0x0B84 // 362 C_V, // 0x0B85 // A 363 C_V, // 0x0B86 // AA 364 C_V, // 0x0B87 // I 365 C_V, // 0x0B88 // II 366 C_V, // 0x0B89 // U 367 C_V, // 0x0B8A // UU 368 C_U, // 0x0B8B // 369 C_U, // 0x0B8C // 370 C_U, // 0x0B8D // 371 C_V, // 0x0B8E // E 372 C_V, // 0x0B8F // EE 373 C_V, // 0x0B90 // AI 374 C_U, // 0x0B91 // 375 C_V, // 0x0B92 // O 376 C_V, // 0x0B93 // OO 377 C_V, // 0x0B94 // AU 378 C_C, // 0x0B95 // KA 379 C_U, // 0x0B96 // 380 C_U, // 0x0B97 // 381 C_U, // 0x0B98 // 382 C_C, // 0x0B99 // NGA 383 C_C, // 0x0B9A // CA 384 C_U, // 0x0B9B // 385 C_C, // 0x0B9C // JA 386 C_U, // 0x0B9D // 387 C_C, // 0x0B9E // NYA 388 C_C, // 0x0B9F // TTA 389 C_U, // 0x0BA0 // 390 C_U, // 0x0BA1 // 391 C_U, // 0x0BA2 // 392 C_C, // 0x0BA3 // NNA 393 C_C, // 0x0BA4 // TA 394 C_U, // 0x0BA5 // 395 C_U, // 0x0BA6 // 396 C_U, // 0x0BA7 // 397 C_C, // 0x0BA8 // NA 398 C_C, // 0x0BA9 // NNNA 399 C_C, // 0x0BAA // PA 400 C_U, // 0x0BAB // 401 C_U, // 0x0BAC // 402 C_U, // 0x0BAD // 403 C_C, // 0x0BAE // MA 404 C_C, // 0x0BAF // YA 405 C_C | C_R, // 0x0BB0 // RA 406 C_C | C_R, // 0x0BB1 // RRA 407 C_C, // 0x0BB2 // LA 408 C_C, // 0x0BB3 // LLA 409 C_C, // 0x0BB4 // LLLA 410 C_C, // 0x0BB5 // VA 411 C_C, // 0x0BB6 // SHA 412 C_C, // 0x0BB7 // SSA 413 C_C, // 0x0BB8 // SA 414 C_C, // 0x0BB9 // HA 415 C_U, // 0x0BBA // 416 C_U, // 0x0BBB // 417 C_U, // 0x0BBC // 418 C_U, // 0x0BBD // 419 C_M, // 0x0BBE // AA 420 C_M, // 0x0BBF // I 421 C_M, // 0x0BC0 // II 422 C_M, // 0x0BC1 // U 423 C_M, // 0x0BC2 // UU 424 C_U, // 0x0BC3 // 425 C_U, // 0x0BC4 // 426 C_U, // 0x0BC5 // 427 C_M | C_PRE, // 0x0BC6 // E 428 C_M | C_PRE, // 0x0BC7 // EE 429 C_M | C_PRE, // 0x0BC8 // AI 430 C_U, // 0x0BC9 // 431 C_M | C_WRAP, // 0x0BCA // O 432 C_M | C_WRAP, // 0x0BCB // OO 433 C_M | C_WRAP, // 0x0BCC // AU 434 C_H, // 0x0BCD // VIRAMA (HALANT) 435 C_U, // 0x0BCE // 436 C_U, // 0x0BCF // 437 C_S, // 0x0BD0 // OM 438 C_U, // 0x0BD1 // 439 C_U, // 0x0BD2 // 440 C_U, // 0x0BD3 // 441 C_U, // 0x0BD4 // 442 C_U, // 0x0BD5 // 443 C_U, // 0x0BD6 // 444 C_M, // 0x0BD7 // AU LENGTH MARK 445 C_U, // 0x0BD8 // 446 C_U, // 0x0BD9 // 447 C_U, // 0x0BDA // 448 C_U, // 0x0BDB // 449 C_U, // 0x0BDC // 450 C_U, // 0x0BDD // 451 C_U, // 0x0BDE // 452 C_U, // 0x0BDF // 453 C_U, // 0x0BE0 // 454 C_U, // 0x0BE1 // 455 C_U, // 0x0BE2 // 456 C_U, // 0x0BE3 // 457 C_U, // 0x0BE4 // 458 C_U, // 0x0BE5 // 459 C_D, // 0x0BE6 // ZERO 460 C_D, // 0x0BE7 // ONE 461 C_D, // 0x0BE8 // TWO 462 C_D, // 0x0BE9 // THREE 463 C_D, // 0x0BEA // FOUR 464 C_D, // 0x0BEB // FIVE 465 C_D, // 0x0BEC // SIX 466 C_D, // 0x0BED // SEVEN 467 C_D, // 0x0BEE // EIGHT 468 C_D, // 0x0BEF // NINE 469 C_S, // 0x0BF0 // TEN 470 C_S, // 0x0BF1 // ONE HUNDRED 471 C_S, // 0x0BF2 // ONE THOUSAND 472 C_S, // 0x0BF3 // DAY SIGN (naal) 473 C_S, // 0x0BF4 // MONTH SIGN (maatham) 474 C_S, // 0x0BF5 // YEAR SIGN (varudam) 475 C_S, // 0x0BF6 // DEBIT SIGN (patru) 476 C_S, // 0x0BF7 // CREDIT SIGN (varavu) 477 C_S, // 0x0BF8 // AS ABOVE SIGN (merpadi) 478 C_S, // 0x0BF9 // RUPEE SIGN (rupai) 479 C_S, // 0x0BFA // NUMBER SIGN (enn) 480 C_U, // 0x0BFB // 481 C_U, // 0x0BFC // 482 C_U, // 0x0BFD // 483 C_U, // 0x0BFE // 484 C_U // 0x0BFF // 485 }; typeOf(int c)486 static int typeOf(int c) { 487 if ((c >= CCA_START) && (c < CCA_END)) { 488 return CCA [ c - CCA_START ] & C_M_TYPE; 489 } else { 490 return C_U; 491 } 492 } isType(int c, int t)493 static boolean isType(int c, int t) { 494 return typeOf(c) == t; 495 } hasFlag(int c, int f)496 static boolean hasFlag(int c, int f) { 497 if ((c >= CCA_START) && (c < CCA_END)) { 498 return (CCA [ c - CCA_START ] & f) == f; 499 } else { 500 return false; 501 } 502 } isC(int c)503 static boolean isC(int c) { 504 return isType(c, C_C); 505 } isR(int c)506 static boolean isR(int c) { 507 return isType(c, C_C) && hasR(c); 508 } isV(int c)509 static boolean isV(int c) { 510 return isType(c, C_V); 511 } isN(int c)512 static boolean isN(int c) { 513 return c == 0x093C; 514 } isH(int c)515 static boolean isH(int c) { 516 return c == 0x094D; 517 } isM(int c)518 static boolean isM(int c) { 519 return isType(c, C_M); 520 } isPreM(int c)521 static boolean isPreM(int c) { 522 return isType(c, C_M) && hasFlag(c, C_PRE); 523 } isX(int c)524 static boolean isX(int c) { 525 switch (typeOf(c)) { 526 case C_M: // matra (combining vowel) 527 case C_A: // accent mark 528 case C_T: // tone mark 529 case C_O: // other (modifying) mark 530 return true; 531 default: 532 return false; 533 } 534 } hasR(int c)535 static boolean hasR(int c) { 536 return hasFlag(c, C_R); 537 } hasN(int c)538 static boolean hasN(int c) { 539 return hasFlag(c, C_N); 540 } 541 542 } 543