1 /*
2  * Copyright (C) 2015 The Qt Company Ltd
3  *
4  * This is part of HarfBuzz, an OpenType Layout engine library.
5  *
6  * Permission is hereby granted, without written agreement and without
7  * license or royalty fees, to use, copy, modify, and distribute this
8  * software and its documentation for any purpose, provided that the
9  * above copyright notice and the following two paragraphs appear in
10  * all copies of this software.
11  *
12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16  * DAMAGE.
17  *
18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23  */
24 
25 #include "harfbuzz-shaper.h"
26 #include "harfbuzz-shaper-private.h"
27 #include "harfbuzz-external.h"
28 
29 #include <assert.h>
30 #include <stdio.h>
31 
32 #define LIBTHAI_MAJOR   0
33 
34 /*
35  * if libthai changed please update these codes too.
36  */
37 struct thcell_t {
38     unsigned char base;      /**< base character */
39     unsigned char hilo;      /**< upper/lower vowel/diacritic */
40     unsigned char top;       /**< top-level mark */
41 };
42 typedef int (*th_brk_def) (const unsigned char*, int*, size_t);
43 typedef int (*th_render_cell_tis_def) (struct thcell_t cell, unsigned char res[], size_t res_sz, int is_decomp_am);
44 typedef int (*th_render_cell_win_def) (struct thcell_t cell, unsigned char res[], size_t res_sz, int is_decomp_am);
45 typedef int (*th_render_cell_mac_def) (struct thcell_t cell, unsigned char res[], size_t res_sz, int is_decomp_am);
46 typedef size_t (*th_next_cell_def) (const unsigned char *, size_t, struct thcell_t *, int);
47 
48 /* libthai releated function handles */
49 static th_brk_def th_brk = 0;
50 static th_next_cell_def th_next_cell = 0;
51 static th_render_cell_tis_def th_render_cell_tis = 0;
52 static th_render_cell_win_def th_render_cell_win = 0;
53 static th_render_cell_mac_def th_render_cell_mac = 0;
54 
init_libthai()55 static int init_libthai() {
56     static HB_Bool initialized = false;
57     if (!initialized && (!th_brk || !th_next_cell || !th_render_cell_tis || !th_render_cell_win || !th_render_cell_mac)) {
58         th_brk = (th_brk_def) HB_Library_Resolve("thai", (int)LIBTHAI_MAJOR, "th_brk");
59         th_next_cell = (th_next_cell_def)HB_Library_Resolve("thai", LIBTHAI_MAJOR, "th_next_cell");
60         th_render_cell_tis = (th_render_cell_tis_def) HB_Library_Resolve("thai", (int)LIBTHAI_MAJOR, "th_render_cell_tis");
61         th_render_cell_win = (th_render_cell_win_def) HB_Library_Resolve("thai", (int)LIBTHAI_MAJOR, "th_render_cell_win");
62         th_render_cell_mac = (th_render_cell_mac_def) HB_Library_Resolve("thai", (int)LIBTHAI_MAJOR, "th_render_cell_mac");
63         initialized = true;
64     }
65     if (th_brk && th_next_cell && th_render_cell_tis && th_render_cell_win && th_render_cell_mac)
66         return 1;
67     else
68         return 0;
69 }
70 
to_tis620(const HB_UChar16 * string,hb_uint32 len,char * cstr)71 static void to_tis620(const HB_UChar16 *string, hb_uint32 len, char *cstr)
72 {
73     hb_uint32 i;
74     unsigned char *result = (unsigned char *)cstr;
75 
76     for (i = 0; i < len; ++i) {
77         if (string[i] <= 0xa0)
78             result[i] = (unsigned char)string[i];
79         else if (string[i] >= 0xe01 && string[i] <= 0xe5b)
80             result[i] = (unsigned char)(string[i] - 0xe00 + 0xa0);
81         else
82             result[i] = (unsigned char)~0; // Same encoding as libthai uses for invalid chars
83     }
84 
85     result[len] = 0;
86 }
87 
88 /*
89  * ---------------------------------------------------------------------------
90  * Thai Shaper / Attributes
91  * ---------------------------------------------------------------------------
92  */
93 
94 /*
95  * USe basic_features prepare for future adding.
96  */
97 #ifndef NO_OPENTYPE
98 static const HB_OpenTypeFeature thai_features[] = {
99     { HB_MAKE_TAG('c', 'c', 'm', 'p'), CcmpProperty },
100     { HB_MAKE_TAG('l', 'i', 'g', 'a'), CcmpProperty },
101     { HB_MAKE_TAG('c', 'l', 'i', 'g'), CcmpProperty },
102     {0, 0}
103 };
104 #endif
105 
106 /* TIS-to-Unicode glyph maps for characters 0x80-0xff */
107 static int tis620_0[128] = {
108     /**/ 0,      0,      0,      0,      0,      0,      0,      0,
109     /**/ 0,      0,      0,      0,      0,      0,      0,      0,
110     /**/ 0,      0,      0,      0,      0,      0,      0,      0,
111     /**/ 0,      0,      0,      0,      0,      0,      0,      0,
112     0x0020, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
113     0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
114     0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
115     0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
116     0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
117     0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
118     0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
119     0x0e38, 0x0e39, 0x0e3a,      0,      0,      0,      0, 0x0e3f,
120     0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
121     0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
122     0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
123     0x0e58, 0x0e59, 0x0e5a, 0x0e5b,      0,      0,      0,      0
124 };
125 
126 static int tis620_1[128] = {
127     0xf89e,      0,      0, 0xf88c, 0xf88f, 0xf892, 0xf895, 0xf898,
128     0xf88b, 0xf88e, 0xf891, 0xf894, 0xf897,      0,      0, 0xf899,
129     0xf89a,      0, 0xf884, 0xf889, 0xf885, 0xf886, 0xf887, 0xf888,
130     0xf88a, 0xf88d, 0xf890, 0xf893, 0xf896,      0,      0,      0,
131     /**/ 0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
132     0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
133     0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
134     0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
135     0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
136     0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
137     0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
138     0x0e38, 0x0e39, 0x0e3a,      0,      0,      0,      0, 0x0e3f,
139     0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
140     0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d,      0, 0x0e4f,
141     0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
142     0x0e58, 0x0e59,      0,      0, 0xf89b, 0xf89c, 0xf89d,      0
143 };
144 
145 static int tis620_2[128] = {
146     0xf700, 0xf701, 0xf702, 0xf703, 0xf704, 0x2026, 0xf705, 0xf706,
147     0xf707, 0xf708, 0xf709, 0xf70a, 0xf70b, 0xf70c, 0xf70d, 0xf70e,
148     0xf70f, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
149     0xf710, 0xf711, 0xf712, 0xf713, 0xf714, 0xf715, 0xf716, 0xf717,
150     0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
151     0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
152     0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
153     0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
154     0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
155     0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
156     0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
157     0x0e38, 0x0e39, 0x0e3a,      0,      0,      0,      0, 0x0e3f,
158     0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
159     0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
160     0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
161     0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0xf718, 0xf719, 0xf71a,      0
162 };
163 
164 enum ThaiFontType {
165     TIS,
166     WIN,
167     MAC
168 };
169 
thai_get_glyph_index(ThaiFontType font_type,unsigned char c)170 static int thai_get_glyph_index (ThaiFontType font_type, unsigned char c)
171 {
172     switch (font_type){
173         case TIS: return (c & 0x80) ? tis620_0[c & 0x7f] : c;
174         case WIN: return (c & 0x80) ? tis620_1[c & 0x7f] : c;
175         case MAC: return (c & 0x80) ? tis620_2[c & 0x7f] : c;
176         default:  return 0;
177     }
178 }
179 
thai_contain_glyphs(HB_ShaperItem * shaper_item,const int glyph_map[128])180 static int thai_contain_glyphs (HB_ShaperItem *shaper_item, const int glyph_map[128])
181 {
182     unsigned char c;
183 
184     for (c = 0; c < 0x80; c++) {
185         if ( glyph_map[c] ) {
186             if ( !shaper_item->font->klass->canRender (shaper_item->font, (const HB_UChar16 *) &glyph_map[c], 1) )
187                 return 0;
188         }
189     }
190     return 1;
191 }
192 
getThaiFontType(HB_ShaperItem * shaper_item)193 static ThaiFontType getThaiFontType(HB_ShaperItem *shaper_item)
194 {
195     if ( thai_contain_glyphs (shaper_item, tis620_2) )
196         return MAC;
197     else if ( thai_contain_glyphs (shaper_item, tis620_1) )
198         return WIN;
199     else
200         return TIS;
201 }
202 
203 /*
204  * convert to the correct display level of THAI vowels and marks.
205  */
HB_ThaiConvertStringToGlyphIndices(HB_ShaperItem * item)206 static HB_Bool HB_ThaiConvertStringToGlyphIndices (HB_ShaperItem *item)
207 {
208     char s[128];
209     char *cstr = s;
210     const HB_UChar16 *string = item->string + item->item.pos;
211     const hb_uint32 len = item->item.length;
212     unsigned short *logClusters = item->log_clusters;
213     hb_uint32 i = 0, slen = 0;
214 
215     if (!init_libthai())
216         return HB_BasicShape (item);
217 
218     if (len >= 128)
219         cstr = (char *)malloc(len*sizeof(char) + 1);
220 
221     if (!cstr)
222         return HB_BasicShape (item);
223 
224     to_tis620(string, len, cstr);
225 
226     /* Get font type */
227     static ThaiFontType font_type;
228     static HB_Font itemFont;
229     if (itemFont != item->font) {
230         font_type = getThaiFontType (item);
231         itemFont = item->font;
232     }
233 
234     /* allocate temporary glyphs buffers */
235     HB_STACKARRAY (HB_UChar16, glyphString, (item->item.length * 2));
236 
237     while (i < item->item.length) {
238         struct thcell_t tis_cell;
239         unsigned char rglyphs[4];
240         int cell_length;
241         int lgn = 0;
242         HB_Bool haveSaraAm = false;
243 
244         cell_length = (int)(th_next_cell ((const unsigned char *)cstr + i, len - i, &tis_cell, true)); /* !item->fixedPitch); */
245         haveSaraAm  = (cstr[i + cell_length - 1] == (char)0xd3);
246 
247         /* set shaper item's log_clusters */
248         logClusters[i] = slen;
249         for (int j = 1; j < cell_length; j++) {
250             logClusters[i + j] = logClusters[i];
251         }
252 
253         /* Find Logical Glyphs by font type */
254         switch (font_type) {
255             case TIS: lgn = th_render_cell_tis (tis_cell, rglyphs, sizeof(rglyphs) / sizeof(rglyphs[0]), true); break;
256             case WIN: lgn = th_render_cell_mac (tis_cell, rglyphs, sizeof(rglyphs) / sizeof(rglyphs[0]), true); break;
257             case MAC: lgn = th_render_cell_win (tis_cell, rglyphs, sizeof(rglyphs) / sizeof(rglyphs[0]), true); break;
258         }
259 
260         /* Add glyphs to glyphs string and setting some attributes */
261         for (int lgi = 0; lgi < lgn; lgi++) {
262             if ( rglyphs[lgi] == 0xdd/*TH_BLANK_BASE_GLYPH*/ ) {
263                 glyphString[slen++] = C_DOTTED_CIRCLE;
264             } else if ((unsigned char)cstr[i] == (unsigned char)~0) {
265                 // The only glyphs that should be passed to this function that cannot be mapped to
266                 // tis620 are the ones of type Inherited class.  Pass these glyphs untouched.
267                 glyphString[slen++] = string[i];
268                 if (string[i] == 0x200D || string[i] == 0x200C) {
269                     // Check that we do not run out of bounds when setting item->attributes.  If we do
270                     // run out of bounds then this function will return false, the necessary amount of
271                     // memory is reallocated, and this function will then be called again.
272                     if (slen <= item->num_glyphs)
273                         item->attributes[slen-1].dontPrint = true; // Hide ZWJ and ZWNJ characters
274                 }
275             } else {
276                 glyphString[slen++] = (HB_UChar16) thai_get_glyph_index (font_type, rglyphs[lgi]);
277             }
278         }
279 
280         /* Special case to handle U+0E33 (SARA AM): SARA AM is normally written at the end of a
281          * word with a base character and an optional top character before it. For example, U+0E0B
282          * (base), U+0E49 (top), U+0E33 (SARA AM). The sequence should be converted to 4 glyphs:
283          * base, hilo (the little circle in the top left part of SARA AM, NIKHAHIT), top, then the
284          * right part of SARA AM (SARA AA).
285          *
286          * The painting process finds out the starting glyph and ending glyph of a character
287          * sequence by checking the logClusters array. In this case, logClusters array should
288          * ideally be [ 0, 1, 3 ] so that glyphsStart = 0 and glyphsEnd = 3 (slen - 1) to paint out
289          * all the glyphs generated.
290          *
291          * A special case in this special case is when we have no base character. When an isolated
292          * SARA AM is processed (cell_length = 1), libthai will produce 3 glyphs: dotted circle
293          * (indicates that the base is empty), NIKHAHIT then SARA AA. If logClusters[0] = 1, it will
294          * paint from the second glyph in the glyphs array. So in this case logClusters[0] should
295          * point to the first glyph it produces, aka. the dotted circle. */
296         if (haveSaraAm) {
297             logClusters[i + cell_length - 1] = cell_length == 1 ? slen - 3 : slen - 1;
298             if (tis_cell.top != 0) {
299                 if (cell_length > 1) {
300                     /* set the logClusters[top character] to slen - 2 as it points to the second to
301                      * lastglyph (slen - 2) */
302                     logClusters[i + cell_length - 2] = slen - 2;
303                 }
304             }
305             /* check for overflow */
306             if (logClusters[i + cell_length - 1] > slen)
307                 logClusters[i + cell_length - 1] = 0;
308         }
309 
310         i += cell_length;
311     }
312     glyphString[slen] = (HB_UChar16) '\0';
313 
314     /* for check, should reallocate space or not */
315     HB_Bool spaceOK = (item->num_glyphs >= slen);
316 
317     /* Convert to Glyph indices */
318     HB_Bool haveGlyphs = item->font->klass->convertStringToGlyphIndices (
319                                           item->font,
320                                           glyphString, slen,
321                                           item->glyphs, &item->num_glyphs,
322                                           item->shaperFlags);
323 
324     HB_FREE_STACKARRAY (glyphString);
325 
326     if (len >= 128)
327         free(cstr);
328 
329     return (haveGlyphs && spaceOK);
330 }
331 
332 /*
333  * set the glyph attributes heuristically.
334  */
HB_ThaiHeuristicSetGlyphAttributes(HB_ShaperItem * item)335 static void HB_ThaiHeuristicSetGlyphAttributes (HB_ShaperItem *item)
336 {
337     /* Set Glyph Attributes */
338     hb_uint32 iCluster = 0;
339     hb_uint32 length = item->item.length;
340     while (iCluster < length) {
341         int cluster_start = item->log_clusters[iCluster];
342         ++iCluster;
343         while (iCluster < length && item->log_clusters[iCluster] == cluster_start) {
344             ++iCluster;
345         }
346         int cluster_end = (iCluster < length) ? item->log_clusters[iCluster] : item->num_glyphs;
347         item->attributes[cluster_start].clusterStart = true;
348         for (int i = cluster_start + 1; i < cluster_end; i++) {
349             item->attributes[i].clusterStart = false;
350         }
351     }
352 }
353 
354 /*
355  * THAI Shaping.
356  */
HB_ThaiShape(HB_ShaperItem * shaper_item)357 HB_Bool HB_ThaiShape (HB_ShaperItem *shaper_item)
358 {
359     if ( !HB_ThaiConvertStringToGlyphIndices (shaper_item) )
360         return false;
361 
362     HB_ThaiHeuristicSetGlyphAttributes (shaper_item);
363 
364 #ifndef NO_OPENTYPE
365     const int availableGlyphs = shaper_item->num_glyphs;
366     if ( HB_SelectScript (shaper_item, thai_features) ) {
367         HB_OpenTypeShape (shaper_item, /*properties*/0);
368         return HB_OpenTypePosition (shaper_item, availableGlyphs, /*doLogClusters*/true);
369     }
370 #endif
371 
372     HB_HeuristicPosition (shaper_item);
373     return true;
374 }
375 
376 /*
377  * Thai Attributes: computes Word Break, Word Boundary and Char stop for THAI.
378  */
HB_ThaiAssignAttributes(const HB_UChar16 * string,hb_uint32 len,HB_CharAttributes * attributes)379 static void HB_ThaiAssignAttributes(const HB_UChar16 *string, hb_uint32 len, HB_CharAttributes *attributes)
380 {
381     char s[128];
382     char *cstr = s;
383     int *break_positions = 0;
384     int brp[128];
385     int brp_size = 0;
386     hb_uint32 numbreaks, i, j, cell_length;
387     struct thcell_t tis_cell;
388 
389     if (!init_libthai())
390         return ;
391 
392     if (len >= 128)
393         cstr = (char *)malloc(len*sizeof(char) + 1);
394 
395     to_tis620(string, len, cstr);
396 
397     for (i = 0; i < len; ++i) {
398         attributes[i].wordBreak = FALSE;
399         attributes[i].wordStart = FALSE;
400         attributes[i].wordEnd = FALSE;
401         attributes[i].lineBreak = FALSE;
402     }
403 
404     if (len > 128) {
405         break_positions = (int*) malloc (sizeof(int) * len);
406         memset (break_positions, 0, sizeof(int) * len);
407         brp_size = len;
408     }
409     else {
410         break_positions = brp;
411         brp_size = 128;
412     }
413 
414     if (break_positions) {
415         attributes[0].wordBreak = TRUE;
416         attributes[0].wordStart = TRUE;
417         attributes[0].wordEnd = FALSE;
418         numbreaks = th_brk((const unsigned char *)cstr, break_positions, brp_size);
419         for (i = 0; i < numbreaks; ++i) {
420             attributes[break_positions[i]].wordBreak = TRUE;
421             attributes[break_positions[i]].wordStart = TRUE;
422             attributes[break_positions[i]].wordEnd = TRUE;
423             attributes[break_positions[i]].lineBreak = TRUE;
424         }
425         if (numbreaks > 0)
426             attributes[break_positions[numbreaks - 1]].wordStart = FALSE;
427 
428         if (break_positions != brp)
429             free(break_positions);
430     }
431 
432     /* manage grapheme boundaries */
433     i = 0;
434     while (i < len) {
435         cell_length = (hb_uint32)(th_next_cell((const unsigned char *)cstr + i, len - i, &tis_cell, true));
436 
437         attributes[i].graphemeBoundary = true;
438         for (j = 1; j < cell_length; j++)
439             attributes[i + j].graphemeBoundary = false;
440 
441         /* Set graphemeBoundary for SARA AM */
442         if (cstr[i + cell_length - 1] == (char)0xd3)
443             attributes[i + cell_length - 1].graphemeBoundary = true;
444 
445         i += cell_length;
446     }
447 
448     if (len >= 128)
449         free(cstr);
450 }
451 
HB_ThaiAttributes(HB_Script script,const HB_UChar16 * text,hb_uint32 from,hb_uint32 len,HB_CharAttributes * attributes)452 void HB_ThaiAttributes(HB_Script script, const HB_UChar16 *text, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes)
453 {
454     assert(script == HB_Script_Thai);
455     const HB_UChar16 *uc = text + from;
456     attributes += from;
457     HB_UNUSED(script);
458     HB_ThaiAssignAttributes(uc, len, attributes);
459 }
460 
461