1 /*
2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
3  *
4  * This code is free software; you can redistribute it and/or modify it
5  * under the terms of the GNU General Public License version 2 only, as
6  * published by the Free Software Foundation.  Oracle designates this
7  * particular file as subject to the "Classpath" exception as provided
8  * by Oracle in the LICENSE file that accompanied this code.
9  *
10  * This code is distributed in the hope that it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13  * version 2 for more details (a copy is included in the LICENSE file that
14  * accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License version
17  * 2 along with this work; if not, write to the Free Software Foundation,
18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19  *
20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21  * or visit www.oracle.com if you need additional information or have any
22  * questions.
23  *
24  */
25 
26 /*
27  * HangulLayoutEngine.cpp: OpenType processing for Han fonts.
28  *
29  * (C) Copyright IBM Corp. 1998-2010 - All Rights Reserved.
30  */
31 
32 #include "LETypes.h"
33 #include "LEScripts.h"
34 #include "LELanguages.h"
35 
36 #include "LayoutEngine.h"
37 #include "OpenTypeLayoutEngine.h"
38 #include "HangulLayoutEngine.h"
39 #include "ScriptAndLanguageTags.h"
40 #include "LEGlyphStorage.h"
41 #include "OpenTypeTables.h"
42 
43 U_NAMESPACE_BEGIN
44 
45 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(HangulOpenTypeLayoutEngine)
46 
47 
48 #define FEATURE_MAP(name) {name ## FeatureTag, name ## FeatureMask}
49 
50 #define LJMO_FIRST 0x1100
51 #define LJMO_LAST  0x1159
52 #define LJMO_FILL  0x115F
53 #define LJMO_COUNT 19
54 
55 #define VJMO_FIRST 0x1161
56 #define VJMO_LAST  0x11A2
57 #define VJMO_FILL  0x1160
58 #define VJMO_COUNT 21
59 
60 #define TJMO_FIRST 0x11A7
61 #define TJMO_LAST  0x11F9
62 #define TJMO_COUNT 28
63 
64 #define HSYL_FIRST 0xAC00
65 #define HSYL_COUNT 11172
66 #define HSYL_LVCNT (VJMO_COUNT * TJMO_COUNT)
67 
68 // Character classes
69 enum
70 {
71     CC_L = 0,
72     CC_V,
73     CC_T,
74     CC_LV,
75     CC_LVT,
76     CC_X,
77     CC_COUNT
78 };
79 
80 // Action flags
81 #define AF_L 1
82 #define AF_V 2
83 #define AF_T 4
84 
85 // Actions
86 #define a_N   0
87 #define a_L   (AF_L)
88 #define a_V   (AF_V)
89 #define a_T   (AF_T)
90 #define a_VT  (AF_V | AF_T)
91 #define a_LV  (AF_L | AF_V)
92 #define a_LVT (AF_L | AF_V | AF_T)
93 
94 typedef struct
95 {
96     le_int32 newState;
97     le_int32 actionFlags;
98 } StateTransition;
99 
100 static const StateTransition stateTable[][CC_COUNT] =
101 {
102 //       L          V          T          LV         LVT           X
103     { {1, a_L},  {2, a_LV}, {3, a_LVT}, {2, a_LV}, {3, a_LVT},  {4, a_T}}, // 0 - start
104     { {1, a_L},  {2, a_V},  {3, a_VT},  {2, a_LV}, {3, a_LVT}, {-1, a_V}}, // 1 - L+
105     {{-1, a_N},  {2, a_V},  {3, a_T},  {-1, a_N}, {-1, a_N},   {-1, a_N}}, // 2 - L+V+
106     {{-1, a_N}, {-1, a_N},  {3, a_T},  {-1, a_N}, {-1, a_N},   {-1, a_N}}, // 3 - L+V+T*
107     {{-1, a_N}, {-1, a_N}, {-1, a_N},  {-1, a_N}, {-1, a_N},    {4, a_T}}  // 4 - X+
108 };
109 
110 
111 #define ccmpFeatureTag LE_CCMP_FEATURE_TAG
112 #define ljmoFeatureTag LE_LJMO_FEATURE_TAG
113 #define vjmoFeatureTag LE_VJMO_FEATURE_TAG
114 #define tjmoFeatureTag LE_TJMO_FEATURE_TAG
115 
116 #define ccmpFeatureMask 0x80000000UL
117 #define ljmoFeatureMask 0x40000000UL
118 #define vjmoFeatureMask 0x20000000UL
119 #define tjmoFeatureMask 0x10000000UL
120 
121 static const FeatureMap featureMap[] =
122 {
123     {ccmpFeatureTag, ccmpFeatureMask},
124     {ljmoFeatureTag, ljmoFeatureMask},
125     {vjmoFeatureTag, vjmoFeatureMask},
126     {tjmoFeatureTag, tjmoFeatureMask}
127 };
128 
129 static const le_int32 featureMapCount = LE_ARRAY_SIZE(featureMap);
130 
131 #define nullFeatures 0
132 #define ljmoFeatures (ccmpFeatureMask | ljmoFeatureMask)
133 #define vjmoFeatures (ccmpFeatureMask | vjmoFeatureMask | ljmoFeatureMask | tjmoFeatureMask)
134 #define tjmoFeatures (ccmpFeatureMask | tjmoFeatureMask | ljmoFeatureMask | vjmoFeatureMask)
135 
compose(LEUnicode lead,LEUnicode vowel,LEUnicode trail,LEUnicode & syllable)136 static le_int32 compose(LEUnicode lead, LEUnicode vowel, LEUnicode trail, LEUnicode &syllable)
137 {
138     le_int32 lIndex = lead  - LJMO_FIRST;
139     le_int32 vIndex = vowel - VJMO_FIRST;
140     le_int32 tIndex = trail - TJMO_FIRST;
141     le_int32 result = 3;
142 
143     if ((lIndex < 0 || lIndex >= LJMO_COUNT ) || (vIndex < 0 || vIndex >= VJMO_COUNT)) {
144         return 0;
145     }
146 
147     if (tIndex <= 0 || tIndex >= TJMO_COUNT) {
148         tIndex = 0;
149         result = 2;
150     }
151 
152     syllable = (LEUnicode) ((lIndex * VJMO_COUNT + vIndex) * TJMO_COUNT + tIndex + HSYL_FIRST);
153 
154     return result;
155 }
156 
decompose(LEUnicode syllable,LEUnicode & lead,LEUnicode & vowel,LEUnicode & trail)157 static le_int32 decompose(LEUnicode syllable, LEUnicode &lead, LEUnicode &vowel, LEUnicode &trail)
158 {
159     le_int32 sIndex = syllable - HSYL_FIRST;
160 
161     if (sIndex < 0 || sIndex >= HSYL_COUNT) {
162         return 0;
163     }
164 
165     lead  = (LEUnicode)(LJMO_FIRST + (sIndex / HSYL_LVCNT));
166     vowel = VJMO_FIRST + (sIndex % HSYL_LVCNT) / TJMO_COUNT;
167     trail = TJMO_FIRST + (sIndex % TJMO_COUNT);
168 
169     if (trail == TJMO_FIRST) {
170         return 2;
171     }
172 
173     return 3;
174 }
175 
getCharClass(LEUnicode ch,LEUnicode & lead,LEUnicode & vowel,LEUnicode & trail)176 static le_int32 getCharClass(LEUnicode ch, LEUnicode &lead, LEUnicode &vowel, LEUnicode &trail)
177 {
178     lead  = LJMO_FILL;
179     vowel = VJMO_FILL;
180     trail = TJMO_FIRST;
181 
182     if (ch >= LJMO_FIRST && ch <= LJMO_LAST) {
183         lead  = ch;
184         return CC_L;
185     }
186 
187     if (ch >= VJMO_FIRST && ch <= VJMO_LAST) {
188         vowel = ch;
189         return CC_V;
190     }
191 
192     if (ch > TJMO_FIRST && ch <= TJMO_LAST) {
193         trail = ch;
194         return CC_T;
195     }
196 
197     le_int32 c = decompose(ch, lead, vowel, trail);
198 
199     if (c == 2) {
200         return CC_LV;
201     }
202 
203     if (c == 3) {
204         return CC_LVT;
205     }
206 
207     trail = ch;
208     return CC_X;
209 }
210 
HangulOpenTypeLayoutEngine(const LEFontInstance * fontInstance,le_int32 scriptCode,le_int32,le_int32 typoFlags,const LEReferenceTo<GlyphSubstitutionTableHeader> & gsubTable,LEErrorCode & success)211 HangulOpenTypeLayoutEngine::HangulOpenTypeLayoutEngine(const LEFontInstance *fontInstance, le_int32 scriptCode, le_int32 /*languageCode*/,
212                                                        le_int32 typoFlags, const LEReferenceTo<GlyphSubstitutionTableHeader> &gsubTable, LEErrorCode &success)
213     : OpenTypeLayoutEngine(fontInstance, scriptCode, korLanguageCode, typoFlags, gsubTable, success)
214 {
215     fFeatureMap = featureMap;
216     fFeatureMapCount = featureMapCount;
217     fFeatureOrder = TRUE;
218 }
219 
HangulOpenTypeLayoutEngine(const LEFontInstance * fontInstance,le_int32 scriptCode,le_int32,le_int32 typoFlags,LEErrorCode & success)220 HangulOpenTypeLayoutEngine::HangulOpenTypeLayoutEngine(const LEFontInstance *fontInstance, le_int32 scriptCode, le_int32 /*languageCode*/,
221                                                            le_int32 typoFlags, LEErrorCode &success)
222     : OpenTypeLayoutEngine(fontInstance, scriptCode, korLanguageCode, typoFlags, success)
223 {
224     fFeatureMap = featureMap;
225     fFeatureMapCount = featureMapCount;
226     fFeatureOrder = TRUE;
227 }
228 
~HangulOpenTypeLayoutEngine()229 HangulOpenTypeLayoutEngine::~HangulOpenTypeLayoutEngine()
230 {
231     // nothing to do
232 }
233 
characterProcessing(const LEUnicode chars[],le_int32 offset,le_int32 count,le_int32 max,le_bool rightToLeft,LEUnicode * & outChars,LEGlyphStorage & glyphStorage,LEErrorCode & success)234 le_int32 HangulOpenTypeLayoutEngine::characterProcessing(const LEUnicode chars[], le_int32 offset, le_int32 count, le_int32 max, le_bool rightToLeft,
235         LEUnicode *&outChars, LEGlyphStorage &glyphStorage, LEErrorCode &success)
236 {
237     if (LE_FAILURE(success)) {
238         return 0;
239     }
240 
241     if (chars == NULL || offset < 0 || count < 0 || max < 0 || offset >= max || offset + count > max) {
242         success = LE_ILLEGAL_ARGUMENT_ERROR;
243         return 0;
244     }
245 
246     le_int32 worstCase = count * 3;
247 
248     outChars = LE_NEW_ARRAY(LEUnicode, worstCase);
249 
250     if (outChars == NULL) {
251         success = LE_MEMORY_ALLOCATION_ERROR;
252         return 0;
253     }
254 
255     glyphStorage.allocateGlyphArray(worstCase, rightToLeft, success);
256     glyphStorage.allocateAuxData(success);
257 
258     if (LE_FAILURE(success)) {
259         LE_DELETE_ARRAY(outChars);
260         return 0;
261     }
262 
263     le_int32 outCharCount = 0;
264     le_int32 limit = offset + count;
265     le_int32 i = offset;
266 
267     while (i < limit) {
268         le_int32 state    = 0;
269         le_int32 inStart  = i;
270         le_int32 outStart = outCharCount;
271 
272         while( i < limit) {
273             LEUnicode lead  = 0;
274             LEUnicode vowel = 0;
275             LEUnicode trail = 0;
276             le_int32 chClass = getCharClass(chars[i], lead, vowel, trail);
277             const StateTransition transition = stateTable[state][chClass];
278 
279             if (chClass == CC_X) {
280                 /* Any character of type X will be stored as a trail jamo */
281                 if ((transition.actionFlags & AF_T) != 0) {
282                     outChars[outCharCount] = trail;
283                     glyphStorage.setCharIndex(outCharCount, i-offset, success);
284                     glyphStorage.setAuxData(outCharCount++, nullFeatures, success);
285                 }
286             } else {
287                 /* Any Hangul will be fully decomposed. Output the decomposed characters. */
288                 if ((transition.actionFlags & AF_L) != 0) {
289                     outChars[outCharCount] = lead;
290                     glyphStorage.setCharIndex(outCharCount, i-offset, success);
291                     glyphStorage.setAuxData(outCharCount++, ljmoFeatures, success);
292                 }
293 
294                 if ((transition.actionFlags & AF_V) != 0) {
295                     outChars[outCharCount] = vowel;
296                     glyphStorage.setCharIndex(outCharCount, i-offset, success);
297                     glyphStorage.setAuxData(outCharCount++, vjmoFeatures, success);
298                 }
299 
300                 if ((transition.actionFlags & AF_T) != 0) {
301                     outChars[outCharCount] = trail;
302                     glyphStorage.setCharIndex(outCharCount, i-offset, success);
303                     glyphStorage.setAuxData(outCharCount++, tjmoFeatures, success);
304                 }
305             }
306 
307             state = transition.newState;
308 
309             /* Negative next state means stop. */
310             if (state < 0) {
311                 break;
312             }
313 
314             i += 1;
315         }
316 
317         le_int32 inLength  = i - inStart;
318         le_int32 outLength = outCharCount - outStart;
319 
320         /*
321          * See if the syllable can be composed into a single character. There are 5
322          * possible cases:
323          *
324          *   Input     Decomposed to    Compose to
325          *   LV        L, V             LV
326          *   LVT       L, V, T          LVT
327          *   L, V      L, V             LV, DEL
328          *   LV, T     L, V, T          LVT, DEL
329          *   L, V, T   L, V, T          LVT, DEL, DEL
330          */
331         if ((inLength >= 1 && inLength <= 3) && (outLength == 2 || outLength == 3)) {
332             LEUnicode syllable = 0x0000;
333             LEUnicode lead  = outChars[outStart];
334             LEUnicode vowel = outChars[outStart + 1];
335             LEUnicode trail = outLength == 3? outChars[outStart + 2] : TJMO_FIRST;
336 
337             /*
338              * If the composition consumes the whole decomposed syllable,
339              * we can use it.
340              */
341             if (compose(lead, vowel, trail, syllable) == outLength) {
342                 outCharCount = outStart;
343                 outChars[outCharCount] = syllable;
344                 glyphStorage.setCharIndex(outCharCount, inStart-offset, success);
345                 glyphStorage.setAuxData(outCharCount++, nullFeatures, success);
346 
347                 /*
348                  * Replace the rest of the input characters with DEL.
349                  */
350                 for(le_int32 d = inStart + 1; d < i; d += 1) {
351                     outChars[outCharCount] = 0xFFFF;
352                     glyphStorage.setCharIndex(outCharCount, d - offset, success);
353                     glyphStorage.setAuxData(outCharCount++, nullFeatures, success);
354                 }
355             }
356         }
357     }
358 
359     glyphStorage.adoptGlyphCount(outCharCount);
360     return outCharCount;
361 }
362 
363 U_NAMESPACE_END
364