1 /******************************************************************************
2  *
3  * Project:  MapServer
4  * Purpose:  Text Layout functions, eventually using Harfbuzz and Fribidi/ICU
5  * Author:   Thomas Bonfort and the MapServer team.
6  *
7  ******************************************************************************
8  * Copyright (c) 1996-2013 Regents of the University of Minnesota.
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a
11  * copy of this software and associated documentation files (the "Software"),
12  * to deal in the Software without restriction, including without limitation
13  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
14  * and/or sell copies of the Software, and to permit persons to whom the
15  * Software is furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice shall be included in
18  * all copies of this Software or works derived from this Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26  * DEALINGS IN THE SOFTWARE.
27  *****************************************************************************/
28 
29 
30 #include <float.h>
31 #include "mapserver.h"
32 
33 #ifdef USE_ICONV
34 #include <iconv.h>
35 #include <wchar.h>
36 #endif
37 #include "fontcache.h"
38 #include FT_ADVANCES_H
39 #include FT_TYPES_H
40 
41 typedef struct{
42   unsigned int *unicodes;
43   unsigned int *codepoints;
44 #ifdef USE_FRIBIDI
45   FriBidiCharType *ctypes;
46   FriBidiLevel *bidi_levels;
47 #endif
48 #ifdef USE_HARFBUZZ
49   hb_script_t *scripts;
50 #endif
51 } TextInfo;
52 
53 typedef struct {
54   int offset; /* offset in TextInfo entries where the current run is starting */
55   int length; /* number of unicode glyphs in this run */
56 #ifdef USE_FRIBIDI
57   FriBidiLevel rtl; /* bidi embedding level of run: -1 to skip shaping, otherwise if pair:ltr, odd:rtl */
58   hb_script_t script; /* script: latin, arabic, thai, etc... */
59 #endif
60   int line_number;
61   face_element *face; /* font face to use for this run */
62 } text_run;
63 
64 #ifdef USE_HARFBUZZ
65 struct _ms_hb_user_data {
66   text_run *run;
67   TextInfo *info;
68   int glyph_size;
69 };
70 
71 const char *_ms_script_prefix_en = "en:";
72 const char *_ms_script_prefix_ar = "ar:";
73 const char *_ms_script_prefix_cn = "cn:";
74 const char *_ms_script_prefix_hy = "hy:";
75 const char *_ms_script_prefix_bn = "bn:";
76 const char *_ms_script_prefix_iu = "iu";
77 const char *_ms_script_prefix_chr= "chr:";
78 const char *_ms_script_prefix_cop= "cop:";
79 const char *_ms_script_prefix_ru = "ru:";
80 const char *_ms_script_prefix_hi = "hi:";
81 const char *_ms_script_prefix_ka = "ka:";
82 const char *_ms_script_prefix_el = "el:";
83 const char *_ms_script_prefix_gu = "gu:";
84 const char *_ms_script_prefix_pa = "pa:";
85 const char *_ms_script_prefix_ko = "ko:";
86 const char *_ms_script_prefix_he = "he:";
87 const char *_ms_script_prefix_ja = "ja:";
88 const char *_ms_script_prefix_kn = "kn:";
89 const char *_ms_script_prefix_lo = "lo:";
90 const char *_ms_script_prefix_ml = "ml:";
91 const char *_ms_script_prefix_mn = "mn:";
92 const char *_ms_script_prefix_or = "or:";
93 const char *_ms_script_prefix_syr= "syr:";
94 const char *_ms_script_prefix_ta = "ta:";
95 const char *_ms_script_prefix_te = "te:";
96 const char *_ms_script_prefix_th = "th:";
97 const char *_ms_script_prefix_bo = "bo:";
98 const char *_ms_script_prefix_am = "am:";
99 const char *_ms_script_prefix_km = "km:";
100 const char *_ms_script_prefix_my = "my:";
101 const char *_ms_script_prefix_si = "si:";
102 const char *_ms_script_prefix_dv = "dv:";
103 const char *_ms_script_prefix_bku= "bku:";
104 const char *_ms_script_prefix_hnn= "hnn:";
105 const char *_ms_script_prefix_tl = "tl:";
106 const char *_ms_script_prefix_tbw= "tbw:";
107 const char *_ms_script_prefix_uga= "uga:";
108 const char *_ms_script_prefix_bug= "bug:";
109 const char *_ms_script_prefix_peo= "peo:";
110 const char *_ms_script_prefix_syl= "syl:";
111 const char *_ms_script_prefix_nko= "nko:";
112 
prefix_from_script(hb_script_t script)113 const char* prefix_from_script(hb_script_t script) {
114   switch(script) {
115   case HB_SCRIPT_LATIN:               return _ms_script_prefix_en;
116   case HB_SCRIPT_ARABIC:              return _ms_script_prefix_ar;
117   case HB_SCRIPT_HAN:                 return _ms_script_prefix_cn;
118   case HB_SCRIPT_ARMENIAN:            return _ms_script_prefix_hy;
119   case HB_SCRIPT_BENGALI:             return _ms_script_prefix_bn;
120   case HB_SCRIPT_CANADIAN_ABORIGINAL: return _ms_script_prefix_iu;
121   case HB_SCRIPT_CHEROKEE:            return _ms_script_prefix_chr;
122   case HB_SCRIPT_COPTIC:              return _ms_script_prefix_cop;
123   case HB_SCRIPT_CYRILLIC:            return _ms_script_prefix_ru;
124   case HB_SCRIPT_DEVANAGARI:          return _ms_script_prefix_hi;
125   case HB_SCRIPT_GEORGIAN:            return _ms_script_prefix_ka;
126   case HB_SCRIPT_GREEK:               return _ms_script_prefix_el;
127   case HB_SCRIPT_GUJARATI:            return _ms_script_prefix_gu;
128   case HB_SCRIPT_GURMUKHI:            return _ms_script_prefix_pa;
129   case HB_SCRIPT_HANGUL:              return _ms_script_prefix_ko;
130   case HB_SCRIPT_HEBREW:              return _ms_script_prefix_he;
131   case HB_SCRIPT_HIRAGANA:            return _ms_script_prefix_ja;
132   case HB_SCRIPT_KANNADA:             return _ms_script_prefix_kn;
133   case HB_SCRIPT_KATAKANA:            return _ms_script_prefix_ja;
134   case HB_SCRIPT_LAO:                 return _ms_script_prefix_lo;
135   case HB_SCRIPT_MALAYALAM:           return _ms_script_prefix_ml;
136   case HB_SCRIPT_MONGOLIAN:           return _ms_script_prefix_mn;
137   case HB_SCRIPT_ORIYA:               return _ms_script_prefix_or;
138   case HB_SCRIPT_SYRIAC:              return _ms_script_prefix_syr;
139   case HB_SCRIPT_TAMIL:               return _ms_script_prefix_ta;
140   case HB_SCRIPT_TELUGU:              return _ms_script_prefix_te;
141   case HB_SCRIPT_THAI:                return _ms_script_prefix_th;
142   case HB_SCRIPT_TIBETAN:             return _ms_script_prefix_bo;
143   case HB_SCRIPT_ETHIOPIC:            return _ms_script_prefix_am;
144   case HB_SCRIPT_KHMER:               return _ms_script_prefix_km;
145   case HB_SCRIPT_MYANMAR:             return _ms_script_prefix_my;
146   case HB_SCRIPT_SINHALA:             return _ms_script_prefix_si;
147   case HB_SCRIPT_THAANA:              return _ms_script_prefix_dv;
148   case HB_SCRIPT_BUHID:               return _ms_script_prefix_bku;
149   case HB_SCRIPT_HANUNOO:             return _ms_script_prefix_hnn;
150   case HB_SCRIPT_TAGALOG:             return _ms_script_prefix_tl;
151   case HB_SCRIPT_TAGBANWA:            return _ms_script_prefix_tbw;
152   case HB_SCRIPT_UGARITIC:            return _ms_script_prefix_uga;
153   case HB_SCRIPT_BUGINESE:            return _ms_script_prefix_bug;
154   case HB_SCRIPT_OLD_PERSIAN:         return _ms_script_prefix_peo;
155   case HB_SCRIPT_SYLOTI_NAGRI:        return _ms_script_prefix_syl;
156   case HB_SCRIPT_NKO:                 return _ms_script_prefix_nko;
157   default:
158     return NULL;
159 
160   }
161 }
162 
163 hb_feature_t hbfeatures[2] = {
164     {HB_TAG('v','e','r','t'),0,0,INT_MAX},
165     {HB_TAG('k','e','r','n'),0,0,INT_MAX}
166 };
167 
_ms_get_glyph_func(hb_font_t * font,void * font_data,hb_codepoint_t unicode,hb_codepoint_t variation_selector,hb_codepoint_t * glyph,void * user_data)168 static hb_bool_t _ms_get_glyph_func (hb_font_t *font, void *font_data,
169     hb_codepoint_t unicode, hb_codepoint_t variation_selector, hb_codepoint_t *glyph,
170 		void *user_data)
171 
172 {
173   /* first check our run, as we have probably already computed this */
174   int i;
175   struct _ms_hb_user_data *ud = font_data;
176   unsigned int *unicodes = ud->info->unicodes + ud->run->offset;
177 
178   for(i=0; i<ud->run->length; i++) {
179     if(unicodes[i] == unicode) {
180       *glyph = *(ud->info->codepoints + ud->run->offset + i);
181       return *glyph != 0;
182     }
183   }
184 
185   {
186     FT_Face ft_face = ud->run->face->face;
187 
188 #ifdef HAVE_FT_FACE_GETCHARVARIANTINDEX
189     if ((variation_selector)) {
190       *glyph = FT_Face_GetCharVariantIndex (ft_face, unicode, variation_selector);
191       return *glyph != 0;
192     }
193 #endif
194 
195     *glyph = FT_Get_Char_Index (ft_face, unicode);
196     return *glyph != 0;
197   }
198 }
199 
_ms_get_glyph_h_advance_func(hb_font_t * font,void * font_data,hb_codepoint_t glyph,void * user_data)200 static hb_position_t _ms_get_glyph_h_advance_func (hb_font_t *font, void *font_data,
201 			   hb_codepoint_t glyph, void *user_data)
202 {
203   struct _ms_hb_user_data *ud = font_data;
204   glyph_element *glyphc = msGetGlyphByIndex(ud->run->face,ud->glyph_size,glyph);
205   if(!glyphc)
206     return 0;
207   return glyphc->metrics.advance * 64;
208 }
209 
_ms_get_glyph_v_advance_func(hb_font_t * font,void * font_data,hb_codepoint_t glyph,void * user_data)210 static hb_position_t _ms_get_glyph_v_advance_func (hb_font_t *font, void *font_data,
211     hb_codepoint_t glyph, void *user_data)
212 {
213   return 0; /* we don't support vertical layouts */
214 }
215 #endif
216 
check_single_font(fontSetObj * fontset,char * fontkey,text_run * run,TextInfo * glyphs,int ignore_missing)217 static int check_single_font(fontSetObj *fontset, char *fontkey, text_run *run, TextInfo *glyphs, int ignore_missing) {
218   int i;
219   face_element *fcache = NULL;
220   if(fontset && fontkey) {
221     char *fontkey2 = strchr(fontkey,':'); /* try skipping prefix */
222     if(fontkey2) {
223       fcache = msGetFontFace(fontkey2+1, fontset);
224     }
225   }
226   if(!fcache)
227     fcache = msGetFontFace(fontkey, fontset);
228   run->face = fcache;
229   if(UNLIKELY(!fcache)) return MS_FAILURE;
230   for(i=0; i<run->length; i++) {
231     unsigned int codepoint = msGetGlyphIndex(fcache, glyphs->unicodes[run->offset+i]);
232     if(codepoint || ignore_missing)
233     {
234       if( codepoint == 0 )
235       {
236           msDebug("Unable to find glyph for codepoint %u. Using ? as fallback.\n", glyphs->unicodes[run->offset+i]);
237           codepoint = msGetGlyphIndex(fcache, '?');
238       }
239       glyphs->codepoints[run->offset+i] = codepoint;
240     }
241     else
242       return MS_FAILURE;
243   }
244   return MS_SUCCESS;
245 }
246 
get_face_for_run(fontSetObj * fontset,char * fontlist,text_run * run,TextInfo * glyphs)247 static int get_face_for_run(fontSetObj *fontset, char *fontlist, text_run *run, TextInfo *glyphs) {
248   char *startfont, *endfont;
249   int ok;
250 #if defined(USE_HARFBUZZ) && defined(USE_FRIBIDI)
251   const char *prefix = NULL;
252 #endif
253 
254   if(!fontset || !fontlist) {
255     ok = check_single_font(fontset,fontlist,run,glyphs,0);
256     return MS_SUCCESS;
257   }
258 
259 #if defined(USE_HARFBUZZ) && defined(USE_FRIBIDI)
260   if(run->rtl >= 0) {
261     prefix = prefix_from_script(run->script);
262   } else {
263     prefix = _ms_script_prefix_en;
264   }
265 
266   if(prefix) {
267     /* we'll first look for a font who's prefixed by the current script prefix, e.g, given the
268      * fontlist "arial,ar:arialuni,cn:cjk" check the "cjk" font first for HAN scripts
269      */
270     int prefixlen = strlen(prefix);
271     startfont = fontlist;
272     for(;;) {
273       if(!*startfont) break;
274       endfont = strchr(startfont,',');
275       if(!strncmp(startfont,prefix,prefixlen)) {
276         startfont += strlen(prefix);
277         if(endfont) *endfont = 0;
278         ok = check_single_font(fontset,startfont,run,glyphs,0);
279         if(endfont) {
280           *endfont = ',';
281           if(ok == MS_SUCCESS) return MS_SUCCESS;
282           startfont = endfont+1; /* go to next font in list */
283         } else {
284           if(ok == MS_SUCCESS) return MS_SUCCESS;
285           break;
286         }
287       }
288       if(endfont)
289        startfont = endfont+1;
290       else break;
291     }
292   }
293 #endif
294 
295   /* no prefix, or prefix search didn't return satisfying result */
296   startfont = fontlist;
297   for(;;) {
298     if(!*startfont) break;
299     endfont = strchr(startfont,',');
300     if(endfont) *endfont = 0;
301     ok = check_single_font(fontset,startfont,run,glyphs,!endfont); /* ignore failing glyphs if we're using the last font in the list */
302     if(endfont) {
303       *endfont = ',';
304       if(ok == MS_SUCCESS) return MS_SUCCESS;
305       startfont = endfont+1; /* go to next font in list */
306     } else {
307       if(ok == MS_SUCCESS) return MS_SUCCESS;
308       break;
309     }
310   }
311 
312   return MS_FAILURE;
313 }
314 
315 #ifdef USE_HARFBUZZ
get_hb_font(struct _ms_hb_user_data * font_data)316 hb_font_t* get_hb_font(struct _ms_hb_user_data *font_data) {
317   face_element *fcache = font_data->run->face;
318   hb_font_element *hbf = fcache->hbfont;
319   FT_Face face = fcache->face;
320   int reqsize = MS_NINT(font_data->glyph_size *96.0 / 72.0);
321 
322   if(reqsize != fcache->face->size->metrics.x_ppem) {
323     FT_Set_Pixel_Sizes(face,0,reqsize);
324   }
325 
326   if(!hbf) {
327     hbf = msSmallMalloc(sizeof(hb_font_element));
328     hbf->hbparentfont = hb_ft_font_create(face,NULL);
329     hbf->hbfont = hb_font_create_sub_font(hbf->hbparentfont);
330     hbf->funcs = hb_font_funcs_create();
331     hb_font_funcs_set_glyph_h_advance_func(hbf->funcs, _ms_get_glyph_h_advance_func, NULL, NULL);
332     hb_font_funcs_set_glyph_func(hbf->funcs, _ms_get_glyph_func, NULL, NULL);
333     hb_font_funcs_set_glyph_v_advance_func(hbf->funcs, _ms_get_glyph_v_advance_func, NULL, NULL);
334     hbf->cursize = reqsize;
335     fcache->hbfont = hbf;
336     hb_font_set_funcs(hbf->hbfont, hbf->funcs, font_data, NULL);
337   } else {
338     if(hbf->cursize != reqsize) {
339       hb_font_set_scale (hbf->hbparentfont,
340           ((uint64_t) face->size->metrics.x_scale * (uint64_t) face->units_per_EM) >> 16,
341           ((uint64_t) face->size->metrics.y_scale * (uint64_t) face->units_per_EM) >> 16);
342       hb_font_set_ppem (hbf->hbparentfont, face->size->metrics.x_ppem, face->size->metrics.y_ppem);
343       hbf->cursize = reqsize;
344     }
345   }
346   hb_font_set_funcs_data(hbf->hbfont,font_data,NULL);
347   return hbf->hbfont;
348 }
349 
350 /*
351  *  Return non-zero (true) if the given unicode array contains
352  *  only ASCII and ISO Latin-1 characters, otherwise return zero.
353  */
unicode_is_latin1(const unsigned int * unicode,long nglyphs)354 int unicode_is_latin1(const unsigned int *unicode, long nglyphs)
355 {
356   long i;
357 
358   for (i=0; i < nglyphs; i++) {
359     if(unicode[i] < 0x2B0) continue;
360     return 0;
361   }
362   return 1;
363 }
364 
get_scripts(unsigned int * cp,int len,hb_script_t * scripts)365 void get_scripts(unsigned int *cp, int len, hb_script_t *scripts) {
366   int i;
367   int backwards_scan = 0;
368   hb_unicode_funcs_t *ufuncs = hb_unicode_funcs_get_default();
369   hb_script_t last_script = HB_SCRIPT_UNKNOWN;
370 
371   // determine script (forward scan)
372   for (i = 0; i < len; i++) {
373     scripts[i] = hb_unicode_script(ufuncs, cp[i]);
374 
375     // common/inherit codepoints inherit script from context
376     if (scripts[i] == HB_SCRIPT_COMMON ||
377         scripts[i] == HB_SCRIPT_INHERITED) {
378       // unknown is not a valid context
379       if (last_script != HB_SCRIPT_UNKNOWN)
380         scripts[i] = last_script;
381       else
382         // do a backwards scan to check if next codepoint
383         // contains a valid script for context
384         backwards_scan = 1;
385     } else {
386       last_script = scripts[i];
387     }
388   }
389 
390   // determine script (backwards scan, if needed)
391   last_script = HB_SCRIPT_UNKNOWN;
392   for (i = len - 1; i >= 0 && backwards_scan; i--) {
393     // common/inherit codepoints inherit script from context
394     if (scripts[i] == HB_SCRIPT_COMMON ||
395         scripts[i] == HB_SCRIPT_INHERITED) {
396       // unknown script is not a valid context
397       if (last_script != HB_SCRIPT_UNKNOWN)
398         scripts[i] = last_script;
399     } else {
400       last_script = scripts[i];
401     }
402   }
403 }
404 #endif
405 
406 /* returns 1 if this is a codepoint we should skip. only checks \r for now */
skip_unicode(unsigned int unicode)407 static int skip_unicode(unsigned int unicode) {
408   switch(unicode) {
409   case '\r':
410     return 1;
411     break;
412   default:
413     return 0;
414   }
415 }
416 
417 #define MS_RTL_LTR 0
418 #define MS_RTL_RTL 1
419 #define MS_RTL_MIXED 2
420 
421 struct line_desc{
422   int length;
423   int rtl;
424 } ;
425 
msLayoutTextSymbol(mapObj * map,textSymbolObj * ts,textPathObj * tgret)426 int msLayoutTextSymbol(mapObj *map, textSymbolObj *ts, textPathObj *tgret) {
427 #define STATIC_GLYPHS 100
428 #define STATIC_LINES 10
429   text_run static_runs[STATIC_GLYPHS];
430   int i,nruns, start, ret=MS_SUCCESS;
431   size_t text_num_bytes;
432   char *inp;
433   unsigned int static_unicodes[STATIC_GLYPHS];
434   unsigned int static_codepoints[STATIC_GLYPHS];
435 #ifdef USE_FRIBIDI
436   FriBidiCharType static_ctypes[STATIC_GLYPHS];
437   FriBidiLevel static_bidi_levels[STATIC_GLYPHS];
438 #endif
439 #ifdef USE_HARFBUZZ
440   hb_script_t static_scripts[STATIC_GLYPHS];
441   hb_buffer_t *buf = NULL;
442 #endif
443   struct line_desc static_line_descs[STATIC_LINES];
444   int alloc_glyphs = 0;
445   struct line_desc *line_descs = NULL;
446   text_run *runs;
447   double oldpeny=3455,peny,penx=0; /*oldpeny is set to an unreasonable default initial value */
448   fontSetObj *fontset = NULL;
449 
450   TextInfo glyphs;
451   int num_glyphs = 0;
452 
453   assert(ts->annotext && *ts->annotext); /* ensure we have at least one character/glyph to treat */
454 
455   if(map) fontset = &map->fontset;
456   /* go through iconv beforehand, so we know we're handling utf8 */
457 #ifdef USE_ICONV
458   if(ts->label->encoding && strcasecmp(ts->label->encoding,"UTF-8")) {
459     iconv_t cd;
460     size_t len, iconv_status,bufleft;
461     char *encoded_text,*outp;
462     len = strlen(ts->annotext);
463     bufleft = len*6;
464     encoded_text = msSmallMalloc(bufleft+1);
465     cd = iconv_open("UTF-8", ts->label->encoding);
466 
467     if(cd == (iconv_t)-1) {
468       msSetError(MS_IDENTERR, "Encoding not supported by libiconv (%s).",
469                "msGetEncodedString()", ts->label->encoding);
470       return MS_FAILURE;
471     }
472 
473     inp = ts->annotext;
474     outp = encoded_text;
475 
476     while(len>0) {
477       iconv_status = iconv(cd, &inp, &len, &outp, &bufleft);
478       if(iconv_status == -1) {
479         break;
480       }
481     }
482 
483     text_num_bytes = outp - encoded_text;
484     encoded_text[text_num_bytes] = 0;
485     free(ts->annotext);
486     ts->annotext = encoded_text;
487     iconv_close(cd);
488   } else
489 #endif
490   {
491     text_num_bytes = strlen(ts->annotext);
492   }
493 
494   if( text_num_bytes == 0 )
495       return 0;
496 
497   if(text_num_bytes > STATIC_GLYPHS) {
498 #ifdef USE_FRIBIDI
499     glyphs.bidi_levels = msSmallMalloc(text_num_bytes * sizeof(FriBidiLevel));
500     glyphs.ctypes = msSmallMalloc(text_num_bytes * sizeof(FriBidiCharType));
501 #endif
502     glyphs.unicodes = msSmallMalloc(text_num_bytes * sizeof(unsigned int));
503     glyphs.codepoints = msSmallMalloc(text_num_bytes * sizeof(unsigned int));
504 #ifdef USE_HARFBUZZ
505     glyphs.scripts = msSmallMalloc(text_num_bytes * sizeof(hb_script_t));
506 #endif
507     runs = msSmallMalloc(text_num_bytes * sizeof(text_run));
508   } else {
509 #ifdef USE_FRIBIDI
510     glyphs.bidi_levels = static_bidi_levels;
511     glyphs.ctypes = static_ctypes;
512 #endif
513     glyphs.unicodes = static_unicodes;
514     glyphs.codepoints = static_codepoints;
515 #ifdef USE_HARFBUZZ
516     glyphs.scripts = static_scripts;
517 #endif
518     runs = static_runs;
519   }
520 
521   /* populate the unicode entries once and for all */
522   inp = ts->annotext;
523   while(*inp) {
524     unsigned int unicode;
525     inp += msUTF8ToUniChar(inp, &unicode);
526     if(!skip_unicode(unicode)){
527       glyphs.unicodes[num_glyphs++] = unicode;
528     }
529   }
530 
531   if(ts->label->wrap || ts->label->maxlength > 0) {
532     if(ts->label->wrap && ts->label->maxlength == 0) {
533       for(i=0;i<num_glyphs;i++) {
534         /* replace all occurences of the wrap character with a newline */
535         if(glyphs.unicodes[i]== ts->label->wrap)
536           glyphs.unicodes[i]= '\n';
537       }
538     } else {
539       assert(ts->label->maxlength > 0);
540       if(num_glyphs > ts->label->maxlength) {
541         int num_cur_glyph_on_line = 0; /*count for the number of glyphs on the current line*/
542         for(i=0; i<num_glyphs; i++) {
543           /* wrap at wrap character or at ZERO WIDTH SPACE (unicode 0x200b), if
544            * current line is too long */
545           if((glyphs.unicodes[i] == ts->label->wrap || glyphs.unicodes[i] == 0x200b)
546               && num_cur_glyph_on_line >= ts->label->maxlength) {
547             glyphs.unicodes[i]= '\n';
548             num_cur_glyph_on_line = 0;
549           } else {
550             num_cur_glyph_on_line++;
551           }
552         }
553       }
554     }
555     /*
556      * TODO RFC98: RFC40 negative label->wrap. This is left out for the moment as it requires
557      * handling a realloc and imho is never used and is an overly-complex use-case.
558      */
559   }
560 
561   /* split our text into runs (one for each line) */
562   nruns = 0;
563   start = 0;
564   runs[0].offset = 0;
565   runs[0].line_number = 0;
566   for(i=0; i<num_glyphs; i++) {
567     if(glyphs.unicodes[i]!= '\n') continue;
568     runs[nruns].length = i - start; /* length of current line (without \n) */
569     start = i+1; /* start of next line */
570     runs[nruns+1].line_number = runs[nruns].line_number+1;
571     runs[nruns+1].offset = start;
572     nruns++;
573   }
574   /* unless the last glyph was a \n, we need to "close" the last run */
575   if(glyphs.unicodes[num_glyphs-1]!= '\n') {
576     runs[nruns].length = num_glyphs - start;
577     nruns++;
578   }
579 
580   if(runs[nruns-1].line_number+1 > STATIC_LINES) {
581     line_descs = msSmallMalloc((runs[nruns-1].line_number + 1)*sizeof(struct line_desc));
582   } else {
583     line_descs = static_line_descs;
584   }
585 
586 #ifdef USE_FRIBIDI
587   for(i=0;i<nruns;i++) {
588     /* check the run (at this stage, one run per line), decide if we need to go through bidi and/or shaping */
589     if(unicode_is_latin1(glyphs.unicodes + runs[i].offset, runs[i].length)) {
590       runs[i].rtl = -1;
591       line_descs[i].rtl = MS_RTL_LTR;
592     } else {
593       runs[i].rtl = 0;
594     }
595   }
596 
597   for(i=0; i<nruns; i++) {
598     /* split the text into bidi runs */
599     if(runs[i].rtl >= 0) {
600       int j, cur_run_start, original_num_glyphs, original_offset;
601       FriBidiLevel prevlevel;
602       FriBidiParType dir = FRIBIDI_PAR_LTR;
603       original_offset = cur_run_start = runs[i].offset;
604       original_num_glyphs = runs[i].length;
605       fribidi_get_bidi_types(glyphs.unicodes + original_offset, runs[i].length, glyphs.ctypes + original_offset);
606       fribidi_get_par_embedding_levels(glyphs.ctypes + original_offset, runs[i].length, &dir, glyphs.bidi_levels + runs[i].offset);
607       /* if we have different embedding levels, create a run for each one */
608       runs[i].rtl = prevlevel = glyphs.bidi_levels[original_offset];
609       line_descs[runs[i].line_number].rtl = (prevlevel%2) ? MS_RTL_RTL:MS_RTL_LTR;
610       for(j=1; j<original_num_glyphs; j++) {
611         if(glyphs.bidi_levels[original_offset+j] != prevlevel) {
612           line_descs[runs[i].line_number].rtl = MS_RTL_MIXED;
613           /* create a new run for the different embedding level */
614           nruns++;
615 
616           /* first move remaining runs */
617           memmove(runs+i+2,runs+i+1,(nruns-i-2)*sizeof(text_run));
618 
619           i++;
620           /* new run inherints line number */
621           runs[i].line_number = runs[i-1].line_number;
622           runs[i].offset = original_offset + j;
623           runs[i].length = (runs[i-1].offset + runs[i-1].length) - runs[i].offset;
624           runs[i-1].length = runs[i].offset - runs[i-1].offset;
625 
626 
627           /* new run starts at current postition */
628           runs[i].rtl = prevlevel = glyphs.bidi_levels[original_offset+j];
629         }
630       }
631     }
632   }
633 #else
634   for(i=0;i<nruns;i++) {
635     line_descs[i].rtl = MS_RTL_LTR;
636   }
637 #endif
638 
639 #ifdef USE_FRIBIDI
640   /* determine the scripts of each run, and split again into runs with identical script */
641   for(i=0; i<nruns; i++) {
642     if(runs[i].rtl == -1) {
643       runs[i].script = HB_SCRIPT_LATIN;
644       continue; /* skip runs we have determined we are latin (no shaping needed) */
645     } else {
646       int j, cur_run_start, original_num_glyphs, original_offset;
647       hb_script_t prevscript;
648       original_offset = cur_run_start = runs[i].offset;
649       original_num_glyphs = runs[i].length;
650       get_scripts(glyphs.unicodes + original_offset, runs[i].length, glyphs.scripts + original_offset);
651       /* if we have different scripts, create a run for each one */
652       runs[i].script = prevscript = glyphs.scripts[original_offset];
653       for(j=1; j<original_num_glyphs; j++) {
654         if(glyphs.scripts[original_offset+j] != prevscript) {
655           /* create a new run for the different embedding level */
656           nruns++;
657 
658           /* first move remaining runs */
659           memmove(runs+i+2,runs+i+1,(nruns-i-2)*sizeof(text_run));
660 
661           i++;
662           /* new run inherints line number and rtl*/
663           runs[i].line_number = runs[i-1].line_number;
664           runs[i].rtl = runs[i-1].rtl;
665           runs[i].offset = original_offset + j;
666           runs[i].length = (runs[i-1].offset + runs[i-1].length) - runs[i].offset;
667           runs[i-1].length = runs[i].offset - runs[i-1].offset;
668 
669 
670           runs[i].script = prevscript = glyphs.scripts[original_offset+j];
671         }
672       }
673     }
674   }
675 #endif
676 
677   for(i=0; i<nruns; i++) {
678     ret = get_face_for_run(fontset, ts->label->font, runs+i, &glyphs);
679     if(UNLIKELY(ret == MS_FAILURE))
680       goto cleanup;
681   }
682 
683   /*
684    * determine the font face to use for a given run. No splitting needed here for
685    * now, as we suppose that the decomposition of each run into individual bidi
686    * direction and script level is sufficient to ensure that a given run can be
687    * represented by a single font (i.e. there's no need to look into multiple fonts
688    * to find the glyphs of the run)
689    */
690 
691 
692   tgret->numlines = runs[nruns-1].line_number + 1;
693   tgret->bounds.bbox.minx = 0;
694   tgret->bounds.bbox.miny = FLT_MAX;
695   tgret->bounds.bbox.maxx = tgret->bounds.bbox.maxy = -FLT_MAX;
696 
697 
698   for(i=0;i<nruns;i++) {
699     unsigned int glyph_count,j;
700     if(!runs[i].face) continue;
701     peny = (1 - tgret->numlines + runs[i].line_number) * tgret->line_height;
702     if(peny != oldpeny) {
703       if(i>0) line_descs[runs[i-1].line_number].length = penx;
704       if(penx > tgret->bounds.bbox.maxx)
705         tgret->bounds.bbox.maxx = penx;
706       oldpeny = peny;
707       penx = 0;
708     }
709 #if defined(USE_HARFBUZZ) && defined(USE_FRIBIDI)
710     if(runs[i].rtl == -1 || runs[i].script == HB_SCRIPT_LATIN || runs[i].script == HB_SCRIPT_COMMON)
711 #endif
712     {
713       /* use our basic shaper */
714       unsigned int *codepoint = glyphs.codepoints + runs[i].offset;
715       alloc_glyphs += runs[i].length;
716       tgret->glyphs = msSmallRealloc(tgret->glyphs, alloc_glyphs * sizeof(glyphObj));
717       for(j=0;j<runs[i].length;j++) {
718         glyphObj *g = &tgret->glyphs[tgret->numglyphs + j];
719         g->glyph = msGetGlyphByIndex(runs[i].face,tgret->glyph_size, *codepoint);
720         g->face = runs[i].face;
721         codepoint++;
722         g->pnt.x = penx;
723         g->pnt.y = peny;
724         g->rot = 0.0;
725         penx += g->glyph->metrics.advance;
726         if(runs[i].line_number == 0 && peny - g->glyph->metrics.maxy < tgret->bounds.bbox.miny) /*compute minimal y, only for the first line */
727           tgret->bounds.bbox.miny = peny - g->glyph->metrics.maxy;
728         if(peny - g->glyph->metrics.miny > tgret->bounds.bbox.maxy)
729           tgret->bounds.bbox.maxy = peny - g->glyph->metrics.miny;
730       }
731 #if defined(USE_HARFBUZZ) && defined(USE_FRIBIDI)
732     } else {
733       struct _ms_hb_user_data user_data;
734       hb_font_t *font;
735       hb_glyph_info_t *glyph_info;
736       hb_glyph_position_t *glyph_pos;
737       if(!buf) {
738         buf = hb_buffer_create();
739       }
740       user_data.info = &glyphs;
741       user_data.run = runs + i;
742       user_data.glyph_size = tgret->glyph_size;
743       hb_buffer_clear_contents(buf);
744       hb_buffer_set_script (buf, runs[i].script);
745       font = get_hb_font(&user_data);
746       hb_buffer_set_direction(buf, (runs[i].rtl%2) ? HB_DIRECTION_RTL :HB_DIRECTION_LTR);
747       hb_buffer_add_utf32(buf,glyphs.unicodes + runs[i].offset, runs[i].length, 0, runs[i].length);
748       hb_shape(font,buf,hbfeatures,2);
749       glyph_info = hb_buffer_get_glyph_infos(buf, &glyph_count);
750       glyph_pos = hb_buffer_get_glyph_positions(buf, &glyph_count);
751       alloc_glyphs += glyph_count;
752       tgret->glyphs = msSmallRealloc(tgret->glyphs, alloc_glyphs * sizeof(glyphObj));
753       for(j=0;j<glyph_count;j++) {
754         glyphObj *g = &tgret->glyphs[tgret->numglyphs + j];
755         g->glyph = msGetGlyphByIndex(runs[i].face,tgret->glyph_size,glyph_info[j].codepoint);
756         g->face = runs[i].face;
757         g->pnt.x = penx + glyph_pos[j].x_offset/64;
758         g->pnt.y = peny - glyph_pos[j].y_offset/64;
759         g->rot = 0;
760         penx += glyph_pos[j].x_advance/64.0;
761         /* peny -= glyph_pos[j].y_advance/64; // we don't do vertical layouts */
762         if(runs[i].line_number == 0 && peny - g->glyph->metrics.maxy < tgret->bounds.bbox.miny) /*compute minimal y, only for the first line */
763           tgret->bounds.bbox.miny = peny - g->glyph->metrics.maxy;
764         if(peny - g->glyph->metrics.miny > tgret->bounds.bbox.maxy)
765           tgret->bounds.bbox.maxy = peny - g->glyph->metrics.miny;
766       }
767 #endif
768     }
769     tgret->numglyphs = alloc_glyphs;
770     line_descs[runs[nruns-1].line_number].length = penx;
771     if(penx > tgret->bounds.bbox.maxx)
772       tgret->bounds.bbox.maxx = penx;
773   }
774 
775 #ifdef USE_HARFBUZZ
776   if(buf) {
777     hb_buffer_destroy(buf);
778   }
779 #endif
780 
781   if(tgret->numlines > 1) {
782     int max_line_length = 0;
783     int line = -1;
784     double cur_line_offset = 0;
785     int prev_default_align = MS_ALIGN_LEFT; /* if we have mixed rtl status, use the alignment of the previous
786                                                line. this defaults to left-alignment if the first line is mixed */
787     int cur_default_align = 0;
788     for(i=0; i<tgret->numlines; i++) {
789       if(line_descs[i].length > max_line_length) {
790         max_line_length = line_descs[i].length;
791       }
792     }
793     oldpeny = 3455;
794     for(i=0; i<alloc_glyphs; i++) {
795       if(tgret->glyphs[i].pnt.y != oldpeny) {
796         oldpeny = tgret->glyphs[i].pnt.y;
797         line++;
798         /* compute offset to apply to coming line */
799         switch(ts->label->align) {
800         case MS_ALIGN_CENTER:
801           cur_line_offset = (max_line_length - line_descs[line].length)/2.0;
802           break;
803         case MS_ALIGN_RIGHT:
804           cur_line_offset = (max_line_length - line_descs[line].length);
805           break;
806         case MS_ALIGN_LEFT:
807           cur_line_offset = 0;
808           break;
809         case MS_ALIGN_DEFAULT:
810         default:
811           switch(line_descs[line].rtl) {
812           case MS_RTL_MIXED:
813             cur_default_align = prev_default_align;
814             break;
815           case MS_RTL_RTL:
816             cur_default_align = prev_default_align = MS_RTL_RTL;
817             break;
818           case MS_RTL_LTR:
819             cur_default_align = prev_default_align = MS_RTL_LTR;
820             break;
821           }
822           switch(cur_default_align) {
823           case MS_RTL_RTL:
824             /* align to the right */
825             cur_line_offset = (max_line_length - line_descs[line].length);
826             break;
827           case MS_RTL_LTR:
828             cur_line_offset = 0;
829             break;
830           }
831         }
832       }
833       tgret->glyphs[i].pnt.x += cur_line_offset;
834     }
835   }
836   /*
837    * msDebug("bounds for %s: %f %f %f %f\n",ts->annotext,tgret->bounds.bbox.minx,tgret->bounds.bbox.miny,tgret->bounds.bbox.maxx,tgret->bounds.bbox.maxy);
838    */
839 
840 cleanup:
841   if(line_descs != static_line_descs) free(line_descs);
842   if(glyphs.codepoints != static_codepoints) {
843 #ifdef USE_FRIBIDI
844     free(glyphs.bidi_levels);
845     free(glyphs.ctypes);
846 #endif
847     free(glyphs.codepoints);
848 #ifdef USE_HARFBUZZ
849     free(glyphs.scripts);
850 #endif
851     free(glyphs.unicodes);
852     free(runs);
853   }
854   return ret;
855 
856 
857 }
858