1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 /* $Id$ */
19 
20 package org.apache.fop.complexscripts.scripts;
21 
22 import java.lang.reflect.Constructor;
23 import java.lang.reflect.InvocationTargetException;
24 import java.util.Collections;
25 import java.util.HashMap;
26 import java.util.HashSet;
27 import java.util.Map;
28 import java.util.Set;
29 import java.util.Vector;
30 
31 import org.apache.commons.logging.Log;
32 import org.apache.commons.logging.LogFactory;
33 
34 import org.apache.fop.complexscripts.fonts.GlyphTable;
35 import org.apache.fop.complexscripts.util.CharAssociation;
36 import org.apache.fop.complexscripts.util.CharScript;
37 import org.apache.fop.complexscripts.util.GlyphContextTester;
38 import org.apache.fop.complexscripts.util.GlyphSequence;
39 import org.apache.fop.complexscripts.util.ScriptContextTester;
40 
41 // CSOFF: LineLengthCheck
42 
43 /**
44  * <p>The <code>IndicScriptProcessor</code> class implements a script processor for
45  * performing glyph substitution and positioning operations on content associated with the Indic script.</p>
46  *
47  * <p>This work was originally authored by Glenn Adams (gadams@apache.org).</p>
48  */
49 public class IndicScriptProcessor extends DefaultScriptProcessor {
50 
51     /** logging instance */
52     private static final Log log = LogFactory.getLog(IndicScriptProcessor.class);
53 
54     /** required features to use for substitutions */
55     private static final String[] GSUB_REQ_FEATURES =
56     {
57         "abvf",                                                 // above base forms
58         "abvs",                                                 // above base substitutions
59         "akhn",                                                 // akhand
60         "blwf",                                                 // below base forms
61         "blws",                                                 // below base substitutions
62         "ccmp",                                                 // glyph composition/decomposition
63         "cjct",                                                 // conjunct forms
64         "clig",                                                 // contextual ligatures
65         "half",                                                 // half forms
66         "haln",                                                 // halant forms
67         "locl",                                                 // localized forms
68         "nukt",                                                 // nukta forms
69         "pref",                                                 // pre-base forms
70         "pres",                                                 // pre-base substitutions
71         "pstf",                                                 // post-base forms
72         "psts",                                                 // post-base substitutions
73         "rkrf",                                                 // rakar forms
74         "rphf",                                                 // reph form
75         "vatu"                                                  // vattu variants
76     };
77 
78     /** optional features to use for substitutions */
79     private static final String[] GSUB_OPT_FEATURES =
80     {
81         "afrc",                                                 // alternative fractions
82         "calt",                                                 // contextual alternatives
83         "dlig"                                                  // discretionary ligatures
84     };
85 
86     /** required features to use for positioning */
87     private static final String[] GPOS_REQ_FEATURES =
88     {
89         "abvm",                                                 // above base marks
90         "blwm",                                                 // below base marks
91         "dist",                                                 // distance (adjustment)
92         "kern"                                                  // kerning
93     };
94 
95     /** required features to use for positioning */
96     private static final String[] GPOS_OPT_FEATURES =
97     {
98     };
99 
100     private static class SubstitutionScriptContextTester implements ScriptContextTester {
101         private static Map<String, GlyphContextTester> testerMap = new HashMap<String, GlyphContextTester>();
getTester(String feature)102         public GlyphContextTester getTester(String feature) {
103             return testerMap.get(feature);
104         }
105     }
106 
107     private static class PositioningScriptContextTester implements ScriptContextTester {
108         private static Map<String, GlyphContextTester> testerMap = new HashMap<String, GlyphContextTester>();
getTester(String feature)109         public GlyphContextTester getTester(String feature) {
110             return testerMap.get(feature);
111         }
112     }
113 
114     /**
115      * Make script specific flavor of Indic script processor.
116      * @param script tag
117      * @return script processor instance
118      */
makeProcessor(String script)119     public static ScriptProcessor makeProcessor(String script) {
120         switch (CharScript.scriptCodeFromTag(script)) {
121         case CharScript.SCRIPT_DEVANAGARI:
122         case CharScript.SCRIPT_DEVANAGARI_2:
123             return new DevanagariScriptProcessor(script);
124         case CharScript.SCRIPT_GUJARATI:
125         case CharScript.SCRIPT_GUJARATI_2:
126             return new GujaratiScriptProcessor(script);
127         case CharScript.SCRIPT_GURMUKHI:
128         case CharScript.SCRIPT_GURMUKHI_2:
129             return new GurmukhiScriptProcessor(script);
130         case CharScript.SCRIPT_TAMIL:
131         case CharScript.SCRIPT_TAMIL_2:
132             return new TamilScriptProcessor(script);
133         case CharScript.SCRIPT_KHMER:
134             return new KhmerScriptProcessor(script);
135         // [TBD] implement other script processors
136         default:
137             return new IndicScriptProcessor(script);
138         }
139     }
140 
141     private final ScriptContextTester subContextTester;
142     private final ScriptContextTester posContextTester;
143 
IndicScriptProcessor(String script)144     IndicScriptProcessor(String script) {
145         super(script);
146         this.subContextTester = new SubstitutionScriptContextTester();
147         this.posContextTester = new PositioningScriptContextTester();
148     }
149 
150     /** {@inheritDoc} */
getSubstitutionFeatures()151     public String[] getSubstitutionFeatures() {
152         return GSUB_REQ_FEATURES;
153     }
154 
155     /** {@inheritDoc} */
getOptionalSubstitutionFeatures()156     public String[] getOptionalSubstitutionFeatures() {
157         return GSUB_OPT_FEATURES;
158     }
159 
160     /** {@inheritDoc} */
getSubstitutionContextTester()161     public ScriptContextTester getSubstitutionContextTester() {
162         return subContextTester;
163     }
164 
165     /** {@inheritDoc} */
getPositioningFeatures()166     public String[] getPositioningFeatures() {
167         return GPOS_REQ_FEATURES;
168     }
169 
170     /** {@inheritDoc} */
getOptionalPositioningFeatures()171     public String[] getOptionalPositioningFeatures() {
172         return GPOS_OPT_FEATURES;
173     }
174 
175     /** {@inheritDoc} */
getPositioningContextTester()176     public ScriptContextTester getPositioningContextTester() {
177         return posContextTester;
178     }
179 
180     /** {@inheritDoc} */
181     @Override
substitute(GlyphSequence gs, String script, String language, GlyphTable.UseSpec[] usa, ScriptContextTester sct)182     public GlyphSequence substitute(GlyphSequence gs, String script, String language, GlyphTable.UseSpec[] usa, ScriptContextTester sct) {
183         assert usa != null;
184         // 1. syllabize
185         GlyphSequence[] sa = syllabize(gs, script, language);
186         // 2. process each syllable
187         for (int i = 0, n = sa.length; i < n; i++) {
188             GlyphSequence s = sa [ i ];
189             // apply basic shaping subs
190             for (GlyphTable.UseSpec us : usa) {
191                 if (isBasicShapingUse(us)) {
192                     s.setPredications(true);
193                     s = us.substitute(s, script, language, sct);
194                 }
195             }
196             // reorder pre-base matra
197             s = reorderPreBaseMatra(s);
198             // reorder reph
199             s = reorderReph(s);
200             // apply presentation subs
201             for (GlyphTable.UseSpec us : usa) {
202                 if (isPresentationUse(us)) {
203                     s.setPredications(true);
204                     s = us.substitute(s, script, language, sct);
205                 }
206             }
207             // record result
208             sa [ i ] = s;
209         }
210         // 3. return reassembled substituted syllables
211         return unsyllabize(gs, sa);
212     }
213 
214     /**
215      * Get script specific syllabizer class.
216      * @return a syllabizer class object or null
217      */
getSyllabizerClass()218     protected Class<? extends Syllabizer> getSyllabizerClass() {
219         return null;
220     }
221 
syllabize(GlyphSequence gs, String script, String language)222     private GlyphSequence[] syllabize(GlyphSequence gs, String script, String language) {
223         return Syllabizer.getSyllabizer(script, language, getSyllabizerClass()).syllabize(gs);
224     }
225 
unsyllabize(GlyphSequence gs, GlyphSequence[] sa)226     private GlyphSequence unsyllabize(GlyphSequence gs, GlyphSequence[] sa) {
227         return GlyphSequence.join(gs, sa);
228     }
229 
230     private static Set<String> basicShapingFeatures;
231     private static final String[] BASIC_SHAPING_FEATURE_STRINGS = {
232         "abvf",
233         "akhn",
234         "blwf",
235         "cjct",
236         "half",
237         "locl",
238         "nukt",
239         "pref",
240         "pstf",
241         "rkrf",
242         "rphf",
243         "vatu",
244         "ccmp"
245     };
246     static {
247         basicShapingFeatures = new HashSet<String>();
Collections.addAll(basicShapingFeatures, BASIC_SHAPING_FEATURE_STRINGS)248         Collections.addAll(basicShapingFeatures, BASIC_SHAPING_FEATURE_STRINGS);
249     }
isBasicShapingUse(GlyphTable.UseSpec us)250     private boolean isBasicShapingUse(GlyphTable.UseSpec us) {
251         assert us != null;
252         if (basicShapingFeatures != null) {
253             return basicShapingFeatures.contains(us.getFeature());
254         } else {
255             return false;
256         }
257     }
258 
259     private static  Set<String> presentationFeatures;
260     private static final String[] PRESENTATION_FEATURE_STRINGS = {
261         "abvs",
262         "blws",
263         "calt",
264         "haln",
265         "pres",
266         "psts",
267         "clig"
268     };
269     static {
270         presentationFeatures = new HashSet<String>();
Collections.addAll(presentationFeatures, PRESENTATION_FEATURE_STRINGS)271         Collections.addAll(presentationFeatures, PRESENTATION_FEATURE_STRINGS);
272     }
isPresentationUse(GlyphTable.UseSpec us)273     private boolean isPresentationUse(GlyphTable.UseSpec us) {
274         assert us != null;
275         if (presentationFeatures != null) {
276             return presentationFeatures.contains(us.getFeature());
277         } else {
278             return false;
279         }
280     }
281 
reorderPreBaseMatra(GlyphSequence gs)282     private GlyphSequence reorderPreBaseMatra(GlyphSequence gs) {
283         int source;
284         if ((source = findPreBaseMatra(gs)) >= 0) {
285             int target;
286             if ((target = findPreBaseMatraTarget(gs, source)) >= 0) {
287                 if (target != source) {
288                     gs = reorder(gs, source, target);
289                 }
290             }
291         }
292         return gs;
293     }
294 
295     /**
296      * Find pre-base matra in sequence.
297      * @param gs input sequence
298      * @return index of pre-base matra or -1 if not found
299      */
findPreBaseMatra(GlyphSequence gs)300     protected int findPreBaseMatra(GlyphSequence gs) {
301         return -1;
302     }
303 
304     /**
305      * Find pre-base matra target in sequence.
306      * @param gs input sequence
307      * @param source index of pre-base matra
308      * @return index of pre-base matra target or -1
309      */
findPreBaseMatraTarget(GlyphSequence gs, int source)310     protected int findPreBaseMatraTarget(GlyphSequence gs, int source) {
311         return -1;
312     }
313 
reorderReph(GlyphSequence gs)314     private GlyphSequence reorderReph(GlyphSequence gs) {
315         int source;
316         if ((source = findReph(gs)) >= 0) {
317             int target;
318             if ((target = findRephTarget(gs, source)) >= 0) {
319                 if (target != source) {
320                     gs = reorder(gs, source, target);
321                 }
322             }
323         }
324         return gs;
325     }
326 
327     /**
328      * Find reph in sequence.
329      * @param gs input sequence
330      * @return index of reph or -1 if not found
331      */
findReph(GlyphSequence gs)332     protected int findReph(GlyphSequence gs) {
333         return -1;
334     }
335 
336     /**
337      * Find reph target in sequence.
338      * @param gs input sequence
339      * @param source index of reph
340      * @return index of reph target or -1
341      */
findRephTarget(GlyphSequence gs, int source)342     protected int findRephTarget(GlyphSequence gs, int source) {
343         return -1;
344     }
345 
reorder(GlyphSequence gs, int source, int target)346     private GlyphSequence reorder(GlyphSequence gs, int source, int target) {
347         return GlyphSequence.reorder(gs, source, 1, target);
348     }
349 
350     /** {@inheritDoc} */
351     @Override
position(GlyphSequence gs, String script, String language, int fontSize, GlyphTable.UseSpec[] usa, int[] widths, int[][] adjustments, ScriptContextTester sct)352     public boolean position(GlyphSequence gs, String script, String language, int fontSize, GlyphTable.UseSpec[] usa, int[] widths, int[][] adjustments, ScriptContextTester sct) {
353         boolean adjusted = super.position(gs, script, language, fontSize, usa, widths, adjustments, sct);
354         return adjusted;
355     }
356 
357     /** Abstract syllabizer. */
358     protected abstract static class Syllabizer implements Comparable {
359         private String script;
360         private String language;
Syllabizer(String script, String language)361         Syllabizer(String script, String language) {
362             this.script = script;
363             this.language = language;
364         }
365         /**
366          * Subdivide glyph sequence GS into syllabic segments each represented by a distinct
367          * output glyph sequence.
368          * @param gs input glyph sequence
369          * @return segmented syllabic glyph sequences
370          */
syllabize(GlyphSequence gs)371         abstract GlyphSequence[] syllabize(GlyphSequence gs);
372         /** {@inheritDoc} */
hashCode()373         public int hashCode() {
374             int hc = 0;
375             hc =  7 * hc + (hc ^ script.hashCode());
376             hc = 11 * hc + (hc ^ language.hashCode());
377             return hc;
378         }
379         /** {@inheritDoc} */
equals(Object o)380         public boolean equals(Object o) {
381             if (o instanceof Syllabizer) {
382                 Syllabizer s = (Syllabizer) o;
383                 if (!s.script.equals(script)) {
384                     return false;
385                 } else {
386                     return s.language.equals(language);
387                 }
388             } else {
389                 return false;
390             }
391         }
392         /** {@inheritDoc} */
compareTo(Object o)393         public int compareTo(Object o) {
394             int d;
395             if (o instanceof Syllabizer) {
396                 Syllabizer s = (Syllabizer) o;
397                 if ((d = script.compareTo(s.script)) == 0) {
398                     d = language.compareTo(s.language);
399                 }
400             } else {
401                 d = -1;
402             }
403             return d;
404         }
405         private static Map<String, Syllabizer> syllabizers = new HashMap<String, Syllabizer>();
getSyllabizer(String script, String language, Class<? extends Syllabizer> syllabizerClass)406         static Syllabizer getSyllabizer(String script, String language, Class<? extends Syllabizer> syllabizerClass) {
407             String sid = makeSyllabizerId(script, language);
408             Syllabizer s = syllabizers.get(sid);
409             if (s == null) {
410                 if ((syllabizerClass == null) || ((s = makeSyllabizer(script, language, syllabizerClass)) == null)) {
411                     log.warn("No syllabizer available for script '" + script + "', language '" + language + "', using default Indic syllabizer.");
412                     s = new DefaultSyllabizer(script, language);
413                 }
414                 syllabizers.put(sid, s);
415             }
416             return s;
417         }
makeSyllabizerId(String script, String language)418         static String makeSyllabizerId(String script, String language) {
419             return script + ":" + language;
420         }
makeSyllabizer(String script, String language, Class<? extends Syllabizer> syllabizerClass)421         static Syllabizer makeSyllabizer(String script, String language, Class<? extends Syllabizer> syllabizerClass) {
422             Syllabizer s;
423             try {
424                 Constructor<? extends Syllabizer> cf = syllabizerClass.getDeclaredConstructor(new Class[] { String.class, String.class });
425                 s = (Syllabizer) cf.newInstance(script, language);
426             } catch (NoSuchMethodException e) {
427                 s = null;
428             } catch (InstantiationException e) {
429                 s = null;
430             } catch (IllegalAccessException e) {
431                 s = null;
432             } catch (InvocationTargetException e) {
433                 s = null;
434             }
435             return s;
436         }
437     }
438 
439     /** Default syllabizer. */
440     protected static class DefaultSyllabizer extends Syllabizer {
DefaultSyllabizer(String script, String language)441         DefaultSyllabizer(String script, String language) {
442             super(script, language);
443         }
444         /** {@inheritDoc} */
445         @Override
syllabize(GlyphSequence gs)446         GlyphSequence[] syllabize(GlyphSequence gs) {
447             int[] ca = gs.getCharacterArray(false);
448             int   nc = gs.getCharacterCount();
449             if (nc == 0) {
450                 return new GlyphSequence[] { gs };
451             } else {
452                 return segmentize(gs, segmentize(ca, nc));
453             }
454         }
455         /**
456          * Construct array of segements from original character array (associated with original glyph sequence)
457          * @param ca input character sequence
458          * @param nc number of characters in sequence
459          * @return array of syllable segments
460          */
segmentize(int[] ca, int nc)461         protected Segment[] segmentize(int[] ca, int nc) {
462             Vector<Segment> sv = new Vector<Segment>(nc);
463             for (int s = 0, e = nc; s < e; ) {
464                 int i;
465                 if ((i = findStartOfSyllable(ca, s, e)) < e) {
466                     if (s < i) {
467                         // from s to i is non-syllable segment
468                         sv.add(new Segment(s, i, Segment.OTHER));
469                     }
470                     s = i; // move s to start of syllable
471                 } else {
472                     if (s < e) {
473                         // from s to e is non-syllable segment
474                         sv.add(new Segment(s, e, Segment.OTHER));
475                     }
476                     s = e; // move s to end of input sequence
477                 }
478                 if ((i = findEndOfSyllable(ca, s, e)) > s) {
479                     if (s < i) {
480                         // from s to i is syllable segment
481                         sv.add(new Segment(s, i, Segment.SYLLABLE));
482                     }
483                     s = i; // move s to end of syllable
484                 } else {
485                     if (s < e) {
486                         // from s to e is non-syllable segment
487                         sv.add(new Segment(s, e, Segment.OTHER));
488                     }
489                     s = e; // move s to end of input sequence
490                 }
491             }
492             return sv.toArray(new Segment [ sv.size() ]);
493         }
494         /**
495          * Construct array of glyph sequences from original glyph sequence and segment array.
496          * @param gs original input glyph sequence
497          * @param sa segment array
498          * @return array of glyph sequences each belonging to an (ordered) segment in SA
499          */
segmentize(GlyphSequence gs, Segment[] sa)500         protected GlyphSequence[] segmentize(GlyphSequence gs, Segment[] sa) {
501             int   ng = gs.getGlyphCount();
502             int[] ga = gs.getGlyphArray(false);
503             CharAssociation[] aa = gs.getAssociations(0, -1);
504             Vector<GlyphSequence> nsv = new Vector<GlyphSequence>();
505             for (Segment s : sa) {
506                 Vector<Integer> ngv = new Vector<Integer>(ng);
507                 Vector<CharAssociation> nav = new Vector<CharAssociation>(ng);
508                 for (int j = 0; j < ng; j++) {
509                     CharAssociation ca = aa[j];
510                     if (ca.contained(s.getOffset(), s.getCount())) {
511                         ngv.add(ga[j]);
512                         nav.add(ca);
513                     }
514                 }
515                 if (ngv.size() > 0) {
516                     nsv.add(new GlyphSequence(gs, null, toIntArray(ngv), null, null, nav.toArray(new CharAssociation[nav.size()]), null));
517                 }
518             }
519             if (nsv.size() > 0) {
520                 return nsv.toArray(new GlyphSequence [ nsv.size() ]);
521             } else {
522                 return new GlyphSequence[] { gs };
523             }
524         }
525         /**
526          * Find start of syllable in character array, starting at S, ending at E.
527          * @param ca character array
528          * @param s start index
529          * @param e end index
530          * @return index of start or E if no start found
531          */
findStartOfSyllable(int[] ca, int s, int e)532         protected int findStartOfSyllable(int[] ca, int s, int e) {
533             return e;
534         }
535         /**
536          * Find end of syllable in character array, starting at S, ending at E.
537          * @param ca character array
538          * @param s start index
539          * @param e end index
540          * @return index of start or S if no end found
541          */
findEndOfSyllable(int[] ca, int s, int e)542         protected int findEndOfSyllable(int[] ca, int s, int e) {
543             return s;
544         }
toIntArray(Vector<Integer> iv)545         private static int[] toIntArray(Vector<Integer> iv) {
546             int ni = iv.size();
547             int[] ia = new int [ iv.size() ];
548             for (int i = 0, n = ni; i < n; i++) {
549                 ia [ i ] = (int) iv.get(i);
550             }
551             return ia;
552         }
553     }
554 
555     /** Syllabic segment. */
556     protected static class Segment {
557 
558         static final int OTHER = 0;            // other (non-syllable) characters
559         static final int SYLLABLE = 1;         // (orthographic) syllable
560 
561         private int start;
562         private int end;
563         private int type;
564 
Segment(int start, int end, int type)565         Segment(int start, int end, int type) {
566             this.start = start;
567             this.end = end;
568             this.type = type;
569         }
570 
getStart()571         int getStart() {
572             return start;
573         }
574 
getEnd()575         int getEnd() {
576             return end;
577         }
578 
getOffset()579         int getOffset() {
580             return start;
581         }
582 
getCount()583         int getCount() {
584             return end - start;
585         }
586 
getType()587         int getType() {
588             return type;
589         }
590     }
591 }
592