1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 /* $Id$ */ 19 20 package org.apache.fop.complexscripts.scripts; 21 22 import java.lang.reflect.Constructor; 23 import java.lang.reflect.InvocationTargetException; 24 import java.util.Collections; 25 import java.util.HashMap; 26 import java.util.HashSet; 27 import java.util.Map; 28 import java.util.Set; 29 import java.util.Vector; 30 31 import org.apache.commons.logging.Log; 32 import org.apache.commons.logging.LogFactory; 33 34 import org.apache.fop.complexscripts.fonts.GlyphTable; 35 import org.apache.fop.complexscripts.util.CharAssociation; 36 import org.apache.fop.complexscripts.util.CharScript; 37 import org.apache.fop.complexscripts.util.GlyphContextTester; 38 import org.apache.fop.complexscripts.util.GlyphSequence; 39 import org.apache.fop.complexscripts.util.ScriptContextTester; 40 41 // CSOFF: LineLengthCheck 42 43 /** 44 * <p>The <code>IndicScriptProcessor</code> class implements a script processor for 45 * performing glyph substitution and positioning operations on content associated with the Indic script.</p> 46 * 47 * <p>This work was originally authored by Glenn Adams (gadams@apache.org).</p> 48 */ 49 public class IndicScriptProcessor extends DefaultScriptProcessor { 50 51 /** logging instance */ 52 private static final Log log = LogFactory.getLog(IndicScriptProcessor.class); 53 54 /** required features to use for substitutions */ 55 private static final String[] GSUB_REQ_FEATURES = 56 { 57 "abvf", // above base forms 58 "abvs", // above base substitutions 59 "akhn", // akhand 60 "blwf", // below base forms 61 "blws", // below base substitutions 62 "ccmp", // glyph composition/decomposition 63 "cjct", // conjunct forms 64 "clig", // contextual ligatures 65 "half", // half forms 66 "haln", // halant forms 67 "locl", // localized forms 68 "nukt", // nukta forms 69 "pref", // pre-base forms 70 "pres", // pre-base substitutions 71 "pstf", // post-base forms 72 "psts", // post-base substitutions 73 "rkrf", // rakar forms 74 "rphf", // reph form 75 "vatu" // vattu variants 76 }; 77 78 /** optional features to use for substitutions */ 79 private static final String[] GSUB_OPT_FEATURES = 80 { 81 "afrc", // alternative fractions 82 "calt", // contextual alternatives 83 "dlig" // discretionary ligatures 84 }; 85 86 /** required features to use for positioning */ 87 private static final String[] GPOS_REQ_FEATURES = 88 { 89 "abvm", // above base marks 90 "blwm", // below base marks 91 "dist", // distance (adjustment) 92 "kern" // kerning 93 }; 94 95 /** required features to use for positioning */ 96 private static final String[] GPOS_OPT_FEATURES = 97 { 98 }; 99 100 private static class SubstitutionScriptContextTester implements ScriptContextTester { 101 private static Map<String, GlyphContextTester> testerMap = new HashMap<String, GlyphContextTester>(); getTester(String feature)102 public GlyphContextTester getTester(String feature) { 103 return testerMap.get(feature); 104 } 105 } 106 107 private static class PositioningScriptContextTester implements ScriptContextTester { 108 private static Map<String, GlyphContextTester> testerMap = new HashMap<String, GlyphContextTester>(); getTester(String feature)109 public GlyphContextTester getTester(String feature) { 110 return testerMap.get(feature); 111 } 112 } 113 114 /** 115 * Make script specific flavor of Indic script processor. 116 * @param script tag 117 * @return script processor instance 118 */ makeProcessor(String script)119 public static ScriptProcessor makeProcessor(String script) { 120 switch (CharScript.scriptCodeFromTag(script)) { 121 case CharScript.SCRIPT_DEVANAGARI: 122 case CharScript.SCRIPT_DEVANAGARI_2: 123 return new DevanagariScriptProcessor(script); 124 case CharScript.SCRIPT_GUJARATI: 125 case CharScript.SCRIPT_GUJARATI_2: 126 return new GujaratiScriptProcessor(script); 127 case CharScript.SCRIPT_GURMUKHI: 128 case CharScript.SCRIPT_GURMUKHI_2: 129 return new GurmukhiScriptProcessor(script); 130 case CharScript.SCRIPT_TAMIL: 131 case CharScript.SCRIPT_TAMIL_2: 132 return new TamilScriptProcessor(script); 133 case CharScript.SCRIPT_KHMER: 134 return new KhmerScriptProcessor(script); 135 // [TBD] implement other script processors 136 default: 137 return new IndicScriptProcessor(script); 138 } 139 } 140 141 private final ScriptContextTester subContextTester; 142 private final ScriptContextTester posContextTester; 143 IndicScriptProcessor(String script)144 IndicScriptProcessor(String script) { 145 super(script); 146 this.subContextTester = new SubstitutionScriptContextTester(); 147 this.posContextTester = new PositioningScriptContextTester(); 148 } 149 150 /** {@inheritDoc} */ getSubstitutionFeatures()151 public String[] getSubstitutionFeatures() { 152 return GSUB_REQ_FEATURES; 153 } 154 155 /** {@inheritDoc} */ getOptionalSubstitutionFeatures()156 public String[] getOptionalSubstitutionFeatures() { 157 return GSUB_OPT_FEATURES; 158 } 159 160 /** {@inheritDoc} */ getSubstitutionContextTester()161 public ScriptContextTester getSubstitutionContextTester() { 162 return subContextTester; 163 } 164 165 /** {@inheritDoc} */ getPositioningFeatures()166 public String[] getPositioningFeatures() { 167 return GPOS_REQ_FEATURES; 168 } 169 170 /** {@inheritDoc} */ getOptionalPositioningFeatures()171 public String[] getOptionalPositioningFeatures() { 172 return GPOS_OPT_FEATURES; 173 } 174 175 /** {@inheritDoc} */ getPositioningContextTester()176 public ScriptContextTester getPositioningContextTester() { 177 return posContextTester; 178 } 179 180 /** {@inheritDoc} */ 181 @Override substitute(GlyphSequence gs, String script, String language, GlyphTable.UseSpec[] usa, ScriptContextTester sct)182 public GlyphSequence substitute(GlyphSequence gs, String script, String language, GlyphTable.UseSpec[] usa, ScriptContextTester sct) { 183 assert usa != null; 184 // 1. syllabize 185 GlyphSequence[] sa = syllabize(gs, script, language); 186 // 2. process each syllable 187 for (int i = 0, n = sa.length; i < n; i++) { 188 GlyphSequence s = sa [ i ]; 189 // apply basic shaping subs 190 for (GlyphTable.UseSpec us : usa) { 191 if (isBasicShapingUse(us)) { 192 s.setPredications(true); 193 s = us.substitute(s, script, language, sct); 194 } 195 } 196 // reorder pre-base matra 197 s = reorderPreBaseMatra(s); 198 // reorder reph 199 s = reorderReph(s); 200 // apply presentation subs 201 for (GlyphTable.UseSpec us : usa) { 202 if (isPresentationUse(us)) { 203 s.setPredications(true); 204 s = us.substitute(s, script, language, sct); 205 } 206 } 207 // record result 208 sa [ i ] = s; 209 } 210 // 3. return reassembled substituted syllables 211 return unsyllabize(gs, sa); 212 } 213 214 /** 215 * Get script specific syllabizer class. 216 * @return a syllabizer class object or null 217 */ getSyllabizerClass()218 protected Class<? extends Syllabizer> getSyllabizerClass() { 219 return null; 220 } 221 syllabize(GlyphSequence gs, String script, String language)222 private GlyphSequence[] syllabize(GlyphSequence gs, String script, String language) { 223 return Syllabizer.getSyllabizer(script, language, getSyllabizerClass()).syllabize(gs); 224 } 225 unsyllabize(GlyphSequence gs, GlyphSequence[] sa)226 private GlyphSequence unsyllabize(GlyphSequence gs, GlyphSequence[] sa) { 227 return GlyphSequence.join(gs, sa); 228 } 229 230 private static Set<String> basicShapingFeatures; 231 private static final String[] BASIC_SHAPING_FEATURE_STRINGS = { 232 "abvf", 233 "akhn", 234 "blwf", 235 "cjct", 236 "half", 237 "locl", 238 "nukt", 239 "pref", 240 "pstf", 241 "rkrf", 242 "rphf", 243 "vatu", 244 "ccmp" 245 }; 246 static { 247 basicShapingFeatures = new HashSet<String>(); Collections.addAll(basicShapingFeatures, BASIC_SHAPING_FEATURE_STRINGS)248 Collections.addAll(basicShapingFeatures, BASIC_SHAPING_FEATURE_STRINGS); 249 } isBasicShapingUse(GlyphTable.UseSpec us)250 private boolean isBasicShapingUse(GlyphTable.UseSpec us) { 251 assert us != null; 252 if (basicShapingFeatures != null) { 253 return basicShapingFeatures.contains(us.getFeature()); 254 } else { 255 return false; 256 } 257 } 258 259 private static Set<String> presentationFeatures; 260 private static final String[] PRESENTATION_FEATURE_STRINGS = { 261 "abvs", 262 "blws", 263 "calt", 264 "haln", 265 "pres", 266 "psts", 267 "clig" 268 }; 269 static { 270 presentationFeatures = new HashSet<String>(); Collections.addAll(presentationFeatures, PRESENTATION_FEATURE_STRINGS)271 Collections.addAll(presentationFeatures, PRESENTATION_FEATURE_STRINGS); 272 } isPresentationUse(GlyphTable.UseSpec us)273 private boolean isPresentationUse(GlyphTable.UseSpec us) { 274 assert us != null; 275 if (presentationFeatures != null) { 276 return presentationFeatures.contains(us.getFeature()); 277 } else { 278 return false; 279 } 280 } 281 reorderPreBaseMatra(GlyphSequence gs)282 private GlyphSequence reorderPreBaseMatra(GlyphSequence gs) { 283 int source; 284 if ((source = findPreBaseMatra(gs)) >= 0) { 285 int target; 286 if ((target = findPreBaseMatraTarget(gs, source)) >= 0) { 287 if (target != source) { 288 gs = reorder(gs, source, target); 289 } 290 } 291 } 292 return gs; 293 } 294 295 /** 296 * Find pre-base matra in sequence. 297 * @param gs input sequence 298 * @return index of pre-base matra or -1 if not found 299 */ findPreBaseMatra(GlyphSequence gs)300 protected int findPreBaseMatra(GlyphSequence gs) { 301 return -1; 302 } 303 304 /** 305 * Find pre-base matra target in sequence. 306 * @param gs input sequence 307 * @param source index of pre-base matra 308 * @return index of pre-base matra target or -1 309 */ findPreBaseMatraTarget(GlyphSequence gs, int source)310 protected int findPreBaseMatraTarget(GlyphSequence gs, int source) { 311 return -1; 312 } 313 reorderReph(GlyphSequence gs)314 private GlyphSequence reorderReph(GlyphSequence gs) { 315 int source; 316 if ((source = findReph(gs)) >= 0) { 317 int target; 318 if ((target = findRephTarget(gs, source)) >= 0) { 319 if (target != source) { 320 gs = reorder(gs, source, target); 321 } 322 } 323 } 324 return gs; 325 } 326 327 /** 328 * Find reph in sequence. 329 * @param gs input sequence 330 * @return index of reph or -1 if not found 331 */ findReph(GlyphSequence gs)332 protected int findReph(GlyphSequence gs) { 333 return -1; 334 } 335 336 /** 337 * Find reph target in sequence. 338 * @param gs input sequence 339 * @param source index of reph 340 * @return index of reph target or -1 341 */ findRephTarget(GlyphSequence gs, int source)342 protected int findRephTarget(GlyphSequence gs, int source) { 343 return -1; 344 } 345 reorder(GlyphSequence gs, int source, int target)346 private GlyphSequence reorder(GlyphSequence gs, int source, int target) { 347 return GlyphSequence.reorder(gs, source, 1, target); 348 } 349 350 /** {@inheritDoc} */ 351 @Override position(GlyphSequence gs, String script, String language, int fontSize, GlyphTable.UseSpec[] usa, int[] widths, int[][] adjustments, ScriptContextTester sct)352 public boolean position(GlyphSequence gs, String script, String language, int fontSize, GlyphTable.UseSpec[] usa, int[] widths, int[][] adjustments, ScriptContextTester sct) { 353 boolean adjusted = super.position(gs, script, language, fontSize, usa, widths, adjustments, sct); 354 return adjusted; 355 } 356 357 /** Abstract syllabizer. */ 358 protected abstract static class Syllabizer implements Comparable { 359 private String script; 360 private String language; Syllabizer(String script, String language)361 Syllabizer(String script, String language) { 362 this.script = script; 363 this.language = language; 364 } 365 /** 366 * Subdivide glyph sequence GS into syllabic segments each represented by a distinct 367 * output glyph sequence. 368 * @param gs input glyph sequence 369 * @return segmented syllabic glyph sequences 370 */ syllabize(GlyphSequence gs)371 abstract GlyphSequence[] syllabize(GlyphSequence gs); 372 /** {@inheritDoc} */ hashCode()373 public int hashCode() { 374 int hc = 0; 375 hc = 7 * hc + (hc ^ script.hashCode()); 376 hc = 11 * hc + (hc ^ language.hashCode()); 377 return hc; 378 } 379 /** {@inheritDoc} */ equals(Object o)380 public boolean equals(Object o) { 381 if (o instanceof Syllabizer) { 382 Syllabizer s = (Syllabizer) o; 383 if (!s.script.equals(script)) { 384 return false; 385 } else { 386 return s.language.equals(language); 387 } 388 } else { 389 return false; 390 } 391 } 392 /** {@inheritDoc} */ compareTo(Object o)393 public int compareTo(Object o) { 394 int d; 395 if (o instanceof Syllabizer) { 396 Syllabizer s = (Syllabizer) o; 397 if ((d = script.compareTo(s.script)) == 0) { 398 d = language.compareTo(s.language); 399 } 400 } else { 401 d = -1; 402 } 403 return d; 404 } 405 private static Map<String, Syllabizer> syllabizers = new HashMap<String, Syllabizer>(); getSyllabizer(String script, String language, Class<? extends Syllabizer> syllabizerClass)406 static Syllabizer getSyllabizer(String script, String language, Class<? extends Syllabizer> syllabizerClass) { 407 String sid = makeSyllabizerId(script, language); 408 Syllabizer s = syllabizers.get(sid); 409 if (s == null) { 410 if ((syllabizerClass == null) || ((s = makeSyllabizer(script, language, syllabizerClass)) == null)) { 411 log.warn("No syllabizer available for script '" + script + "', language '" + language + "', using default Indic syllabizer."); 412 s = new DefaultSyllabizer(script, language); 413 } 414 syllabizers.put(sid, s); 415 } 416 return s; 417 } makeSyllabizerId(String script, String language)418 static String makeSyllabizerId(String script, String language) { 419 return script + ":" + language; 420 } makeSyllabizer(String script, String language, Class<? extends Syllabizer> syllabizerClass)421 static Syllabizer makeSyllabizer(String script, String language, Class<? extends Syllabizer> syllabizerClass) { 422 Syllabizer s; 423 try { 424 Constructor<? extends Syllabizer> cf = syllabizerClass.getDeclaredConstructor(new Class[] { String.class, String.class }); 425 s = (Syllabizer) cf.newInstance(script, language); 426 } catch (NoSuchMethodException e) { 427 s = null; 428 } catch (InstantiationException e) { 429 s = null; 430 } catch (IllegalAccessException e) { 431 s = null; 432 } catch (InvocationTargetException e) { 433 s = null; 434 } 435 return s; 436 } 437 } 438 439 /** Default syllabizer. */ 440 protected static class DefaultSyllabizer extends Syllabizer { DefaultSyllabizer(String script, String language)441 DefaultSyllabizer(String script, String language) { 442 super(script, language); 443 } 444 /** {@inheritDoc} */ 445 @Override syllabize(GlyphSequence gs)446 GlyphSequence[] syllabize(GlyphSequence gs) { 447 int[] ca = gs.getCharacterArray(false); 448 int nc = gs.getCharacterCount(); 449 if (nc == 0) { 450 return new GlyphSequence[] { gs }; 451 } else { 452 return segmentize(gs, segmentize(ca, nc)); 453 } 454 } 455 /** 456 * Construct array of segements from original character array (associated with original glyph sequence) 457 * @param ca input character sequence 458 * @param nc number of characters in sequence 459 * @return array of syllable segments 460 */ segmentize(int[] ca, int nc)461 protected Segment[] segmentize(int[] ca, int nc) { 462 Vector<Segment> sv = new Vector<Segment>(nc); 463 for (int s = 0, e = nc; s < e; ) { 464 int i; 465 if ((i = findStartOfSyllable(ca, s, e)) < e) { 466 if (s < i) { 467 // from s to i is non-syllable segment 468 sv.add(new Segment(s, i, Segment.OTHER)); 469 } 470 s = i; // move s to start of syllable 471 } else { 472 if (s < e) { 473 // from s to e is non-syllable segment 474 sv.add(new Segment(s, e, Segment.OTHER)); 475 } 476 s = e; // move s to end of input sequence 477 } 478 if ((i = findEndOfSyllable(ca, s, e)) > s) { 479 if (s < i) { 480 // from s to i is syllable segment 481 sv.add(new Segment(s, i, Segment.SYLLABLE)); 482 } 483 s = i; // move s to end of syllable 484 } else { 485 if (s < e) { 486 // from s to e is non-syllable segment 487 sv.add(new Segment(s, e, Segment.OTHER)); 488 } 489 s = e; // move s to end of input sequence 490 } 491 } 492 return sv.toArray(new Segment [ sv.size() ]); 493 } 494 /** 495 * Construct array of glyph sequences from original glyph sequence and segment array. 496 * @param gs original input glyph sequence 497 * @param sa segment array 498 * @return array of glyph sequences each belonging to an (ordered) segment in SA 499 */ segmentize(GlyphSequence gs, Segment[] sa)500 protected GlyphSequence[] segmentize(GlyphSequence gs, Segment[] sa) { 501 int ng = gs.getGlyphCount(); 502 int[] ga = gs.getGlyphArray(false); 503 CharAssociation[] aa = gs.getAssociations(0, -1); 504 Vector<GlyphSequence> nsv = new Vector<GlyphSequence>(); 505 for (Segment s : sa) { 506 Vector<Integer> ngv = new Vector<Integer>(ng); 507 Vector<CharAssociation> nav = new Vector<CharAssociation>(ng); 508 for (int j = 0; j < ng; j++) { 509 CharAssociation ca = aa[j]; 510 if (ca.contained(s.getOffset(), s.getCount())) { 511 ngv.add(ga[j]); 512 nav.add(ca); 513 } 514 } 515 if (ngv.size() > 0) { 516 nsv.add(new GlyphSequence(gs, null, toIntArray(ngv), null, null, nav.toArray(new CharAssociation[nav.size()]), null)); 517 } 518 } 519 if (nsv.size() > 0) { 520 return nsv.toArray(new GlyphSequence [ nsv.size() ]); 521 } else { 522 return new GlyphSequence[] { gs }; 523 } 524 } 525 /** 526 * Find start of syllable in character array, starting at S, ending at E. 527 * @param ca character array 528 * @param s start index 529 * @param e end index 530 * @return index of start or E if no start found 531 */ findStartOfSyllable(int[] ca, int s, int e)532 protected int findStartOfSyllable(int[] ca, int s, int e) { 533 return e; 534 } 535 /** 536 * Find end of syllable in character array, starting at S, ending at E. 537 * @param ca character array 538 * @param s start index 539 * @param e end index 540 * @return index of start or S if no end found 541 */ findEndOfSyllable(int[] ca, int s, int e)542 protected int findEndOfSyllable(int[] ca, int s, int e) { 543 return s; 544 } toIntArray(Vector<Integer> iv)545 private static int[] toIntArray(Vector<Integer> iv) { 546 int ni = iv.size(); 547 int[] ia = new int [ iv.size() ]; 548 for (int i = 0, n = ni; i < n; i++) { 549 ia [ i ] = (int) iv.get(i); 550 } 551 return ia; 552 } 553 } 554 555 /** Syllabic segment. */ 556 protected static class Segment { 557 558 static final int OTHER = 0; // other (non-syllable) characters 559 static final int SYLLABLE = 1; // (orthographic) syllable 560 561 private int start; 562 private int end; 563 private int type; 564 Segment(int start, int end, int type)565 Segment(int start, int end, int type) { 566 this.start = start; 567 this.end = end; 568 this.type = type; 569 } 570 getStart()571 int getStart() { 572 return start; 573 } 574 getEnd()575 int getEnd() { 576 return end; 577 } 578 getOffset()579 int getOffset() { 580 return start; 581 } 582 getCount()583 int getCount() { 584 return end - start; 585 } 586 getType()587 int getType() { 588 return type; 589 } 590 } 591 } 592