1 /*
2  * Copyright © 2015 Information Technology Authority (ITA) <foss@ita.gov.om>
3  * Copyright © 2016 Khaled Hosny <khaledhosny@eglug.org>
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a copy
6  * of this software and associated documentation files (the "Software"), to
7  * deal in the Software without restriction, including without limitation the
8  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
9  * sell copies of the Software, and to permit persons to whom the Software is
10  * furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #ifdef HAVE_CONFIG_H
26 #include "config.h"
27 #undef HAVE_CONFIG_H  // Workaround for Fribidi 1.0.5 and earlier
28 #endif
29 
30 #include <assert.h>
31 #include <string.h>
32 
33 #include <fribidi.h>
34 #include <hb.h>
35 #include <hb-ft.h>
36 
37 #include "raqm.h"
38 
39 #if FRIBIDI_MAJOR_VERSION >= 1
40 #define USE_FRIBIDI_EX_API
41 #endif
42 
43 /**
44  * SECTION:raqm
45  * @title: Raqm
46  * @short_description: A library for complex text layout
47  * @include: raqm.h
48  *
49  * Raqm is a light weight text layout library with strong emphasis on
50  * supporting languages and writing systems that require complex text layout.
51  *
52  * The main object in Raqm API is #raqm_t, it stores all the states of the
53  * input text, its properties, and the output of the layout process.
54  *
55  * To start, you create a #raqm_t object, add text and font(s) to it, run the
56  * layout process, and finally query about the output. For example:
57  *
58  * |[<!-- language="C" -->
59  * #include "raqm.h"
60  *
61  * int
62  * main (int argc, char *argv[])
63  * {
64  *     const char *fontfile;
65  *     const char *text;
66  *     const char *direction;
67  *     const char *language;
68  *     int ret = 1;
69  *
70  *     FT_Library library = NULL;
71  *     FT_Face face = NULL;
72  *
73  *     if (argc < 5)
74  *     {
75  *         printf ("Usage: %s FONT_FILE TEXT DIRECTION LANG\n", argv[0]);
76  *         return 1;
77  *     }
78  *
79  *     fontfile =  argv[1];
80  *     text = argv[2];
81  *     direction = argv[3];
82  *     language = argv[4];
83  *
84  *     if (FT_Init_FreeType (&library) == 0)
85  *     {
86  *       if (FT_New_Face (library, fontfile, 0, &face) == 0)
87  *       {
88  *         if (FT_Set_Char_Size (face, face->units_per_EM, 0, 0, 0) == 0)
89  *         {
90  *           raqm_t *rq = raqm_create ();
91  *           if (rq != NULL)
92  *           {
93  *             raqm_direction_t dir = RAQM_DIRECTION_DEFAULT;
94  *
95  *             if (strcmp (direction, "r") == 0)
96  *               dir = RAQM_DIRECTION_RTL;
97  *             else if (strcmp (direction, "l") == 0)
98  *               dir = RAQM_DIRECTION_LTR;
99  *
100  *             if (raqm_set_text_utf8 (rq, text, strlen (text)) &&
101  *                 raqm_set_freetype_face (rq, face) &&
102  *                 raqm_set_par_direction (rq, dir) &&
103  *                 raqm_set_language (rq, language, 0, strlen (text)) &&
104  *                 raqm_layout (rq))
105  *             {
106  *               size_t count, i;
107  *               raqm_glyph_t *glyphs = raqm_get_glyphs (rq, &count);
108  *
109  *               ret = !(glyphs != NULL || count == 0);
110  *
111  *               printf("glyph count: %zu\n", count);
112  *               for (i = 0; i < count; i++)
113  *               {
114  *                   printf ("gid#%d off: (%d, %d) adv: (%d, %d) idx: %d\n",
115  *                           glyphs[i].index,
116  *                           glyphs[i].x_offset,
117  *                           glyphs[i].y_offset,
118  *                           glyphs[i].x_advance,
119  *                           glyphs[i].y_advance,
120  *                           glyphs[i].cluster);
121  *               }
122  *             }
123  *
124  *             raqm_destroy (rq);
125  *           }
126  *         }
127  *
128  *         FT_Done_Face (face);
129  *       }
130  *
131  *       FT_Done_FreeType (library);
132  *     }
133  *
134  *     return ret;
135  * }
136  * ]|
137  * To compile this example:
138  * |[<prompt>
139  * cc -o test test.c `pkg-config --libs --cflags raqm`
140  * ]|
141  */
142 
143 /* For enabling debug mode */
144 /*#define RAQM_DEBUG 1*/
145 #ifdef RAQM_DEBUG
146 #define RAQM_DBG(...) fprintf (stderr, __VA_ARGS__)
147 #else
148 #define RAQM_DBG(...)
149 #endif
150 
151 #ifdef RAQM_TESTING
152 # define RAQM_TEST(...) printf (__VA_ARGS__)
153 # define SCRIPT_TO_STRING(script) \
154     char buff[5]; \
155     hb_tag_to_string (hb_script_to_iso15924_tag (script), buff); \
156     buff[4] = '\0';
157 #else
158 # define RAQM_TEST(...)
159 #endif
160 
161 typedef enum {
162   RAQM_FLAG_NONE = 0,
163   RAQM_FLAG_UTF8 = 1 << 0
164 } _raqm_flags_t;
165 
166 typedef struct {
167   FT_Face       ftface;
168   hb_language_t lang;
169   hb_script_t   script;
170 } _raqm_text_info;
171 
172 typedef struct _raqm_run raqm_run_t;
173 
174 struct _raqm {
175   int              ref_count;
176 
177   uint32_t        *text;
178   char            *text_utf8;
179   size_t           text_len;
180 
181   _raqm_text_info *text_info;
182 
183   raqm_direction_t base_dir;
184   raqm_direction_t resolved_dir;
185 
186   hb_feature_t    *features;
187   size_t           features_len;
188 
189   raqm_run_t      *runs;
190   raqm_glyph_t    *glyphs;
191 
192   _raqm_flags_t    flags;
193 
194   int              ft_loadflags;
195   int              invisible_glyph;
196 };
197 
198 struct _raqm_run {
199   int            pos;
200   int            len;
201 
202   hb_direction_t direction;
203   hb_script_t    script;
204   hb_font_t     *font;
205   hb_buffer_t   *buffer;
206 
207   raqm_run_t    *next;
208 };
209 
210 static uint32_t
211 _raqm_u8_to_u32_index (raqm_t   *rq,
212                        uint32_t  index);
213 
214 static bool
_raqm_init_text_info(raqm_t * rq)215 _raqm_init_text_info (raqm_t *rq)
216 {
217   hb_language_t default_lang;
218 
219   if (rq->text_info)
220     return true;
221 
222   rq->text_info = malloc (sizeof (_raqm_text_info) * rq->text_len);
223   if (!rq->text_info)
224     return false;
225 
226   default_lang = hb_language_get_default ();
227   for (size_t i = 0; i < rq->text_len; i++)
228   {
229     rq->text_info[i].ftface = NULL;
230     rq->text_info[i].lang = default_lang;
231     rq->text_info[i].script = HB_SCRIPT_INVALID;
232   }
233 
234   return true;
235 }
236 
237 static void
_raqm_free_text_info(raqm_t * rq)238 _raqm_free_text_info (raqm_t *rq)
239 {
240   if (!rq->text_info)
241     return;
242 
243   for (size_t i = 0; i < rq->text_len; i++)
244   {
245     if (rq->text_info[i].ftface)
246       FT_Done_Face (rq->text_info[i].ftface);
247   }
248 
249   free (rq->text_info);
250   rq->text_info = NULL;
251 }
252 
253 static bool
_raqm_compare_text_info(_raqm_text_info a,_raqm_text_info b)254 _raqm_compare_text_info (_raqm_text_info a,
255                          _raqm_text_info b)
256 {
257   if (a.ftface != b.ftface)
258     return false;
259 
260   if (a.lang != b.lang)
261     return false;
262 
263   if (a.script != b.script)
264     return false;
265 
266   return true;
267 }
268 
269 /**
270  * raqm_create:
271  *
272  * Creates a new #raqm_t with all its internal states initialized to their
273  * defaults.
274  *
275  * Return value:
276  * A newly allocated #raqm_t with a reference count of 1. The initial reference
277  * count should be released with raqm_destroy() when you are done using the
278  * #raqm_t. Returns %NULL in case of error.
279  *
280  * Since: 0.1
281  */
282 raqm_t *
raqm_create(void)283 raqm_create (void)
284 {
285   raqm_t *rq;
286 
287   rq = malloc (sizeof (raqm_t));
288   if (!rq)
289     return NULL;
290 
291   rq->ref_count = 1;
292 
293   rq->text = NULL;
294   rq->text_utf8 = NULL;
295   rq->text_len = 0;
296 
297   rq->text_info = NULL;
298 
299   rq->base_dir = RAQM_DIRECTION_DEFAULT;
300   rq->resolved_dir = RAQM_DIRECTION_DEFAULT;
301 
302   rq->features = NULL;
303   rq->features_len = 0;
304 
305   rq->runs = NULL;
306   rq->glyphs = NULL;
307 
308   rq->flags = RAQM_FLAG_NONE;
309 
310   rq->ft_loadflags = -1;
311   rq->invisible_glyph = 0;
312 
313   return rq;
314 }
315 
316 /**
317  * raqm_reference:
318  * @rq: a #raqm_t.
319  *
320  * Increases the reference count on @rq by one. This prevents @rq from being
321  * destroyed until a matching call to raqm_destroy() is made.
322  *
323  * Return value:
324  * The referenced #raqm_t.
325  *
326  * Since: 0.1
327  */
328 raqm_t *
raqm_reference(raqm_t * rq)329 raqm_reference (raqm_t *rq)
330 {
331   if (rq)
332     rq->ref_count++;
333 
334   return rq;
335 }
336 
337 static void
_raqm_free_runs(raqm_t * rq)338 _raqm_free_runs (raqm_t *rq)
339 {
340   raqm_run_t *runs = rq->runs;
341   while (runs)
342   {
343     raqm_run_t *run = runs;
344     runs = runs->next;
345 
346     hb_buffer_destroy (run->buffer);
347     hb_font_destroy (run->font);
348     free (run);
349   }
350 }
351 
352 /**
353  * raqm_destroy:
354  * @rq: a #raqm_t.
355  *
356  * Decreases the reference count on @rq by one. If the result is zero, then @rq
357  * and all associated resources are freed.
358  * See cairo_reference().
359  *
360  * Since: 0.1
361  */
362 void
raqm_destroy(raqm_t * rq)363 raqm_destroy (raqm_t *rq)
364 {
365   if (!rq || --rq->ref_count != 0)
366     return;
367 
368   free (rq->text);
369   free (rq->text_utf8);
370   _raqm_free_text_info (rq);
371   _raqm_free_runs (rq);
372   free (rq->glyphs);
373   free (rq);
374 }
375 
376 /**
377  * raqm_set_text:
378  * @rq: a #raqm_t.
379  * @text: a UTF-32 encoded text string.
380  * @len: the length of @text.
381  *
382  * Adds @text to @rq to be used for layout. It must be a valid UTF-32 text, any
383  * invalid character will be replaced with U+FFFD. The text should typically
384  * represent a full paragraph, since doing the layout of chunks of text
385  * separately can give improper output.
386  *
387  * Return value:
388  * %true if no errors happened, %false otherwise.
389  *
390  * Since: 0.1
391  */
392 bool
raqm_set_text(raqm_t * rq,const uint32_t * text,size_t len)393 raqm_set_text (raqm_t         *rq,
394                const uint32_t *text,
395                size_t          len)
396 {
397   if (!rq || !text)
398     return false;
399 
400   rq->text_len = len;
401 
402   /* Empty string, don’t fail but do nothing */
403   if (!len)
404     return true;
405 
406   free (rq->text);
407 
408   rq->text = malloc (sizeof (uint32_t) * rq->text_len);
409   if (!rq->text)
410     return false;
411 
412   _raqm_free_text_info (rq);
413   if (!_raqm_init_text_info (rq))
414     return false;
415 
416   memcpy (rq->text, text, sizeof (uint32_t) * rq->text_len);
417 
418   return true;
419 }
420 
421 /**
422  * raqm_set_text_utf8:
423  * @rq: a #raqm_t.
424  * @text: a UTF-8 encoded text string.
425  * @len: the length of @text in UTF-8 bytes.
426  *
427  * Same as raqm_set_text(), but for text encoded in UTF-8 encoding.
428  *
429  * Return value:
430  * %true if no errors happened, %false otherwise.
431  *
432  * Since: 0.1
433  */
434 bool
raqm_set_text_utf8(raqm_t * rq,const char * text,size_t len)435 raqm_set_text_utf8 (raqm_t         *rq,
436                     const char     *text,
437                     size_t          len)
438 {
439   uint32_t *unicode;
440   size_t ulen;
441   bool ok;
442 
443   if (!rq || !text)
444     return false;
445 
446   /* Empty string, don’t fail but do nothing */
447   if (!len)
448   {
449     rq->text_len = len;
450     return true;
451   }
452 
453   RAQM_TEST ("Text is: %s\n", text);
454 
455   rq->flags |= RAQM_FLAG_UTF8;
456 
457   rq->text_utf8 = malloc (sizeof (char) * len);
458   if (!rq->text_utf8)
459     return false;
460 
461   unicode = malloc (sizeof (uint32_t) * len);
462   if (!unicode)
463     return false;
464 
465   memcpy (rq->text_utf8, text, sizeof (char) * len);
466 
467   ulen = fribidi_charset_to_unicode (FRIBIDI_CHAR_SET_UTF8,
468                                      text, len, unicode);
469 
470   ok = raqm_set_text (rq, unicode, ulen);
471 
472   free (unicode);
473   return ok;
474 }
475 
476 /**
477  * raqm_set_par_direction:
478  * @rq: a #raqm_t.
479  * @dir: the direction of the paragraph.
480  *
481  * Sets the paragraph direction, also known as block direction in CSS. For
482  * horizontal text, this controls the overall direction in the Unicode
483  * Bidirectional Algorithm, so when the text is mainly right-to-left (with or
484  * without some left-to-right) text, then the base direction should be set to
485  * #RAQM_DIRECTION_RTL and vice versa.
486  *
487  * The default is #RAQM_DIRECTION_DEFAULT, which determines the paragraph
488  * direction based on the first character with strong bidi type (see [rule
489  * P2](http://unicode.org/reports/tr9/#P2) in Unicode Bidirectional Algorithm),
490  * which can be good enough for many cases but has problems when a mainly
491  * right-to-left paragraph starts with a left-to-right character and vice versa
492  * as the detected paragraph direction will be the wrong one, or when text does
493  * not contain any characters with string bidi types (e.g. only punctuation or
494  * numbers) as this will default to left-to-right paragraph direction.
495  *
496  * For vertical, top-to-bottom text, #RAQM_DIRECTION_TTB should be used. Raqm,
497  * however, provides limited vertical text support and does not handle rotated
498  * horizontal text in vertical text, instead everything is treated as vertical
499  * text.
500  *
501  * Return value:
502  * %true if no errors happened, %false otherwise.
503  *
504  * Since: 0.1
505  */
506 bool
raqm_set_par_direction(raqm_t * rq,raqm_direction_t dir)507 raqm_set_par_direction (raqm_t          *rq,
508                         raqm_direction_t dir)
509 {
510   if (!rq)
511     return false;
512 
513   rq->base_dir = dir;
514 
515   return true;
516 }
517 
518 /**
519  * raqm_set_language:
520  * @rq: a #raqm_t.
521  * @lang: a BCP47 language code.
522  * @start: index of first character that should use @face.
523  * @len: number of characters using @face.
524  *
525  * Sets a [BCP47 language
526  * code](https://www.w3.org/International/articles/language-tags/) to be used
527  * for @len-number of characters staring at @start.  The @start and @len are
528  * input string array indices (i.e. counting bytes in UTF-8 and scaler values
529  * in UTF-32).
530  *
531  * This method can be used repeatedly to set different languages for different
532  * parts of the text.
533  *
534  * Return value:
535  * %true if no errors happened, %false otherwise.
536  *
537  * Stability:
538  * Unstable
539  *
540  * Since: 0.2
541  */
542 bool
raqm_set_language(raqm_t * rq,const char * lang,size_t start,size_t len)543 raqm_set_language (raqm_t       *rq,
544                    const char   *lang,
545                    size_t        start,
546                    size_t        len)
547 {
548   hb_language_t language;
549   size_t end = start + len;
550 
551   if (!rq)
552     return false;
553 
554   if (!rq->text_len)
555     return true;
556 
557   if (rq->flags & RAQM_FLAG_UTF8)
558   {
559     start = _raqm_u8_to_u32_index (rq, start);
560     end = _raqm_u8_to_u32_index (rq, end);
561   }
562 
563   if (start >= rq->text_len || end > rq->text_len)
564     return false;
565 
566   if (!rq->text_info)
567     return false;
568 
569   language = hb_language_from_string (lang, -1);
570   for (size_t i = start; i < end; i++)
571   {
572     rq->text_info[i].lang = language;
573   }
574 
575   return true;
576 }
577 
578 /**
579  * raqm_add_font_feature:
580  * @rq: a #raqm_t.
581  * @feature: (transfer none): a font feature string.
582  * @len: length of @feature, -1 for %NULL-terminated.
583  *
584  * Adds a font feature to be used by the #raqm_t during text layout. This is
585  * usually used to turn on optional font features that are not enabled by
586  * default, for example `dlig` or `ss01`, but can be also used to turn off
587  * default font features.
588  *
589  * @feature is string representing a single font feature, in the syntax
590  * understood by hb_feature_from_string().
591  *
592  * This function can be called repeatedly, new features will be appended to the
593  * end of the features list and can potentially override previous features.
594  *
595  * Return value:
596  * %true if parsing @feature succeeded, %false otherwise.
597  *
598  * Since: 0.1
599  */
600 bool
raqm_add_font_feature(raqm_t * rq,const char * feature,int len)601 raqm_add_font_feature (raqm_t     *rq,
602                        const char *feature,
603                        int         len)
604 {
605   hb_bool_t ok;
606   hb_feature_t fea;
607 
608   if (!rq)
609     return false;
610 
611   ok = hb_feature_from_string (feature, len, &fea);
612   if (ok)
613   {
614     rq->features_len++;
615     rq->features = realloc (rq->features,
616                             sizeof (hb_feature_t) * (rq->features_len));
617     if (!rq->features)
618       return false;
619 
620     rq->features[rq->features_len - 1] = fea;
621   }
622 
623   return ok;
624 }
625 
626 static hb_font_t *
_raqm_create_hb_font(raqm_t * rq,FT_Face face)627 _raqm_create_hb_font (raqm_t *rq,
628                       FT_Face face)
629 {
630   hb_font_t *font = hb_ft_font_create_referenced (face);
631 
632   if (rq->ft_loadflags >= 0)
633     hb_ft_font_set_load_flags (font, rq->ft_loadflags);
634 
635   return font;
636 }
637 
638 static bool
_raqm_set_freetype_face(raqm_t * rq,FT_Face face,size_t start,size_t end)639 _raqm_set_freetype_face (raqm_t *rq,
640                          FT_Face face,
641                          size_t  start,
642                          size_t  end)
643 {
644   if (!rq)
645     return false;
646 
647   if (!rq->text_len)
648     return true;
649 
650   if (start >= rq->text_len || end > rq->text_len)
651     return false;
652 
653   if (!rq->text_info)
654     return false;
655 
656   for (size_t i = start; i < end; i++)
657   {
658     if (rq->text_info[i].ftface)
659         FT_Done_Face (rq->text_info[i].ftface);
660     rq->text_info[i].ftface = face;
661     FT_Reference_Face (face);
662   }
663 
664   return true;
665 }
666 
667 /**
668  * raqm_set_freetype_face:
669  * @rq: a #raqm_t.
670  * @face: an #FT_Face.
671  *
672  * Sets an #FT_Face to be used for all characters in @rq.
673  *
674  * See also raqm_set_freetype_face_range().
675  *
676  * Return value:
677  * %true if no errors happened, %false otherwise.
678  *
679  * Since: 0.1
680  */
681 bool
raqm_set_freetype_face(raqm_t * rq,FT_Face face)682 raqm_set_freetype_face (raqm_t *rq,
683                         FT_Face face)
684 {
685   return _raqm_set_freetype_face (rq, face, 0, rq->text_len);
686 }
687 
688 /**
689  * raqm_set_freetype_face_range:
690  * @rq: a #raqm_t.
691  * @face: an #FT_Face.
692  * @start: index of first character that should use @face.
693  * @len: number of characters using @face.
694  *
695  * Sets an #FT_Face to be used for @len-number of characters staring at @start.
696  * The @start and @len are input string array indices (i.e. counting bytes in
697  * UTF-8 and scaler values in UTF-32).
698  *
699  * This method can be used repeatedly to set different faces for different
700  * parts of the text. It is the responsibility of the client to make sure that
701  * face ranges cover the whole text.
702  *
703  * See also raqm_set_freetype_face().
704  *
705  * Return value:
706  * %true if no errors happened, %false otherwise.
707  *
708  * Since: 0.1
709  */
710 bool
raqm_set_freetype_face_range(raqm_t * rq,FT_Face face,size_t start,size_t len)711 raqm_set_freetype_face_range (raqm_t *rq,
712                               FT_Face face,
713                               size_t  start,
714                               size_t  len)
715 {
716   size_t end = start + len;
717 
718   if (!rq)
719     return false;
720 
721   if (!rq->text_len)
722     return true;
723 
724   if (rq->flags & RAQM_FLAG_UTF8)
725   {
726     start = _raqm_u8_to_u32_index (rq, start);
727     end = _raqm_u8_to_u32_index (rq, end);
728   }
729 
730   return _raqm_set_freetype_face (rq, face, start, end);
731 }
732 
733 /**
734  * raqm_set_freetype_load_flags:
735  * @rq: a #raqm_t.
736  * @flags: FreeType load flags.
737  *
738  * Sets the load flags passed to FreeType when loading glyphs, should be the
739  * same flags used by the client when rendering FreeType glyphs.
740  *
741  * This requires version of HarfBuzz that has hb_ft_font_set_load_flags(), for
742  * older version the flags will be ignored.
743  *
744  * Return value:
745  * %true if no errors happened, %false otherwise.
746  *
747  * Since: 0.3
748  */
749 bool
raqm_set_freetype_load_flags(raqm_t * rq,int flags)750 raqm_set_freetype_load_flags (raqm_t *rq,
751                               int flags)
752 {
753   if (!rq)
754     return false;
755 
756   rq->ft_loadflags = flags;
757 
758   return true;
759 }
760 
761 /**
762  * raqm_set_invisible_glyph:
763  * @rq: a #raqm_t.
764  * @gid: glyph id to use for invisible glyphs.
765  *
766  * Sets the glyph id to be used for invisible glyhphs.
767  *
768  * If @gid is negative, invisible glyphs will be suppressed from the output.
769  * This requires HarfBuzz 1.8.0 or later. If raqm is used with an earlier
770  * HarfBuzz version, the return value will be %false and the shaping behavior
771  * does not change.
772  *
773  * If @gid is zero, invisible glyphs will be rendered as space.
774  * This works on all versions of HarfBuzz.
775  *
776  * If @gid is a positive number, it will be used for invisible glyphs.
777  * This requires a version of HarfBuzz that has
778  * hb_buffer_set_invisible_glyph(). For older versions, the return value
779  * will be %false and the shaping behavior does not change.
780  *
781  * Return value:
782  * %true if no errors happened, %false otherwise.
783  *
784  * Since: 0.6
785  */
786 bool
raqm_set_invisible_glyph(raqm_t * rq,int gid)787 raqm_set_invisible_glyph (raqm_t *rq,
788                           int gid)
789 {
790   if (!rq)
791     return false;
792 
793 #ifndef HAVE_HB_BUFFER_SET_INVISIBLE_GLYPH
794   if (gid > 0)
795     return false;
796 #endif
797 
798 #if !defined(HAVE_DECL_HB_BUFFER_FLAG_REMOVE_DEFAULT_IGNORABLES) || \
799     !HAVE_DECL_HB_BUFFER_FLAG_REMOVE_DEFAULT_IGNORABLES
800   if (gid < 0)
801     return false;
802 #endif
803 
804   rq->invisible_glyph = gid;
805   return true;
806 }
807 
808 static bool
809 _raqm_itemize (raqm_t *rq);
810 
811 static bool
812 _raqm_shape (raqm_t *rq);
813 
814 /**
815  * raqm_layout:
816  * @rq: a #raqm_t.
817  *
818  * Run the text layout process on @rq. This is the main Raqm function where the
819  * Unicode Bidirectional Text algorithm will be applied to the text in @rq,
820  * text shaping, and any other part of the layout process.
821  *
822  * Return value:
823  * %true if the layout process was successful, %false otherwise.
824  *
825  * Since: 0.1
826  */
827 bool
raqm_layout(raqm_t * rq)828 raqm_layout (raqm_t *rq)
829 {
830   if (!rq)
831     return false;
832 
833   if (!rq->text_len)
834     return true;
835 
836   if (!rq->text_info)
837     return false;
838 
839   for (size_t i = 0; i < rq->text_len; i++)
840   {
841       if (!rq->text_info[i].ftface)
842           return false;
843   }
844 
845   if (!_raqm_itemize (rq))
846     return false;
847 
848   if (!_raqm_shape (rq))
849     return false;
850 
851   return true;
852 }
853 
854 static uint32_t
855 _raqm_u32_to_u8_index (raqm_t   *rq,
856                        uint32_t  index);
857 
858 /**
859  * raqm_get_glyphs:
860  * @rq: a #raqm_t.
861  * @length: (out): output array length.
862  *
863  * Gets the final result of Raqm layout process, an array of #raqm_glyph_t
864  * containing the glyph indices in the font, their positions and other possible
865  * information.
866  *
867  * Return value: (transfer none):
868  * An array of #raqm_glyph_t, or %NULL in case of error. This is owned by @rq
869  * and must not be freed.
870  *
871  * Since: 0.1
872  */
873 raqm_glyph_t *
raqm_get_glyphs(raqm_t * rq,size_t * length)874 raqm_get_glyphs (raqm_t *rq,
875                  size_t *length)
876 {
877   size_t count = 0;
878 
879   if (!rq || !rq->runs || !length)
880   {
881     if (length)
882       *length = 0;
883     return NULL;
884   }
885 
886   for (raqm_run_t *run = rq->runs; run != NULL; run = run->next)
887     count += hb_buffer_get_length (run->buffer);
888 
889   *length = count;
890 
891   if (rq->glyphs)
892     free (rq->glyphs);
893 
894   rq->glyphs = malloc (sizeof (raqm_glyph_t) * count);
895   if (!rq->glyphs)
896   {
897     *length = 0;
898     return NULL;
899   }
900 
901   RAQM_TEST ("Glyph information:\n");
902 
903   count = 0;
904   for (raqm_run_t *run = rq->runs; run != NULL; run = run->next)
905   {
906     size_t len;
907     hb_glyph_info_t *info;
908     hb_glyph_position_t *position;
909 
910     len = hb_buffer_get_length (run->buffer);
911     info = hb_buffer_get_glyph_infos (run->buffer, NULL);
912     position = hb_buffer_get_glyph_positions (run->buffer, NULL);
913 
914     for (size_t i = 0; i < len; i++)
915     {
916       rq->glyphs[count + i].index = info[i].codepoint;
917       rq->glyphs[count + i].cluster = info[i].cluster;
918       rq->glyphs[count + i].x_advance = position[i].x_advance;
919       rq->glyphs[count + i].y_advance = position[i].y_advance;
920       rq->glyphs[count + i].x_offset = position[i].x_offset;
921       rq->glyphs[count + i].y_offset = position[i].y_offset;
922       rq->glyphs[count + i].ftface = rq->text_info[info[i].cluster].ftface;
923 
924       RAQM_TEST ("glyph [%d]\tx_offset: %d\ty_offset: %d\tx_advance: %d\tfont: %s\n",
925           rq->glyphs[count + i].index, rq->glyphs[count + i].x_offset,
926           rq->glyphs[count + i].y_offset, rq->glyphs[count + i].x_advance,
927           rq->glyphs[count + i].ftface->family_name);
928     }
929 
930     count += len;
931   }
932 
933   if (rq->flags & RAQM_FLAG_UTF8)
934   {
935 #ifdef RAQM_TESTING
936     RAQM_TEST ("\nUTF-32 clusters:");
937     for (size_t i = 0; i < count; i++)
938       RAQM_TEST (" %02d", rq->glyphs[i].cluster);
939     RAQM_TEST ("\n");
940 #endif
941 
942     for (size_t i = 0; i < count; i++)
943       rq->glyphs[i].cluster = _raqm_u32_to_u8_index (rq,
944                                                      rq->glyphs[i].cluster);
945 
946 #ifdef RAQM_TESTING
947     RAQM_TEST ("UTF-8 clusters: ");
948     for (size_t i = 0; i < count; i++)
949       RAQM_TEST (" %02d", rq->glyphs[i].cluster);
950     RAQM_TEST ("\n");
951 #endif
952   }
953   return rq->glyphs;
954 }
955 
956 static bool
957 _raqm_resolve_scripts (raqm_t *rq);
958 
959 static hb_direction_t
_raqm_hb_dir(raqm_t * rq,FriBidiLevel level)960 _raqm_hb_dir (raqm_t *rq, FriBidiLevel level)
961 {
962   hb_direction_t dir = HB_DIRECTION_LTR;
963 
964   if (rq->base_dir == RAQM_DIRECTION_TTB)
965       dir = HB_DIRECTION_TTB;
966   else if (FRIBIDI_LEVEL_IS_RTL (level))
967       dir = HB_DIRECTION_RTL;
968 
969   return dir;
970 }
971 
972 typedef struct {
973   size_t pos;
974   size_t len;
975   FriBidiLevel level;
976 } _raqm_bidi_run;
977 
978 static void
_raqm_reverse_run(_raqm_bidi_run * run,const size_t len)979 _raqm_reverse_run (_raqm_bidi_run *run, const size_t len)
980 {
981   assert (run);
982 
983   for (size_t i = 0; i < len / 2; i++)
984   {
985     _raqm_bidi_run temp = run[i];
986     run[i] = run[len - 1 - i];
987     run[len - 1 - i] = temp;
988   }
989 }
990 
991 static _raqm_bidi_run *
_raqm_reorder_runs(const FriBidiCharType * types,const size_t len,const FriBidiParType base_dir,FriBidiLevel * levels,size_t * run_count)992 _raqm_reorder_runs (const FriBidiCharType *types,
993                     const size_t len,
994                     const FriBidiParType base_dir,
995                     /* input and output */
996                     FriBidiLevel *levels,
997                     /* output */
998                     size_t *run_count)
999 {
1000   FriBidiLevel level;
1001   FriBidiLevel last_level = -1;
1002   FriBidiLevel max_level = 0;
1003   size_t run_start = 0;
1004   size_t run_index = 0;
1005   _raqm_bidi_run *runs = NULL;
1006   size_t count = 0;
1007 
1008   if (len == 0)
1009   {
1010     *run_count = 0;
1011     return NULL;
1012   }
1013 
1014   assert (types);
1015   assert (levels);
1016 
1017   /* L1. Reset the embedding levels of some chars:
1018      4. any sequence of white space characters at the end of the line. */
1019   for (int i = len - 1;
1020        i >= 0 && FRIBIDI_IS_EXPLICIT_OR_BN_OR_WS (types[i]); i--)
1021   {
1022     levels[i] = FRIBIDI_DIR_TO_LEVEL (base_dir);
1023   }
1024 
1025   /* Find max_level of the line.  We don't reuse the paragraph
1026    * max_level, both for a cleaner API, and that the line max_level
1027    * may be far less than paragraph max_level. */
1028   for (int i = len - 1; i >= 0; i--)
1029   {
1030     if (levels[i] > max_level)
1031        max_level = levels[i];
1032   }
1033 
1034   for (size_t i = 0; i < len; i++)
1035   {
1036     if (levels[i] != last_level)
1037       count++;
1038 
1039     last_level = levels[i];
1040   }
1041 
1042   runs = malloc (sizeof (_raqm_bidi_run) * count);
1043 
1044   while (run_start < len)
1045   {
1046     size_t run_end = run_start;
1047     while (run_end < len && levels[run_start] == levels[run_end])
1048     {
1049       run_end++;
1050     }
1051 
1052     runs[run_index].pos = run_start;
1053     runs[run_index].level = levels[run_start];
1054     runs[run_index].len = run_end - run_start;
1055     run_start = run_end;
1056     run_index++;
1057   }
1058 
1059   /* L2. Reorder. */
1060   for (level = max_level; level > 0; level--)
1061   {
1062     for (int i = count - 1; i >= 0; i--)
1063     {
1064       if (runs[i].level >= level)
1065       {
1066         int end = i;
1067         for (i--; (i >= 0 && runs[i].level >= level); i--)
1068             ;
1069         _raqm_reverse_run (runs + i + 1, end - i);
1070       }
1071     }
1072   }
1073 
1074   *run_count = count;
1075   return runs;
1076 }
1077 
1078 static bool
_raqm_itemize(raqm_t * rq)1079 _raqm_itemize (raqm_t *rq)
1080 {
1081   FriBidiParType par_type = FRIBIDI_PAR_ON;
1082   FriBidiCharType *types;
1083 #ifdef USE_FRIBIDI_EX_API
1084   FriBidiBracketType *btypes;
1085 #endif
1086   FriBidiLevel *levels;
1087   _raqm_bidi_run *runs = NULL;
1088   raqm_run_t *last;
1089   int max_level;
1090   size_t run_count;
1091   bool ok = true;
1092 
1093 #ifdef RAQM_TESTING
1094   switch (rq->base_dir)
1095   {
1096     case RAQM_DIRECTION_RTL:
1097       RAQM_TEST ("Direction is: RTL\n\n");
1098       break;
1099     case RAQM_DIRECTION_LTR:
1100       RAQM_TEST ("Direction is: LTR\n\n");
1101       break;
1102     case RAQM_DIRECTION_TTB:
1103       RAQM_TEST ("Direction is: TTB\n\n");
1104       break;
1105     case RAQM_DIRECTION_DEFAULT:
1106     default:
1107       RAQM_TEST ("Direction is: DEFAULT\n\n");
1108       break;
1109   }
1110 #endif
1111 
1112   types = calloc (rq->text_len, sizeof (FriBidiCharType));
1113 #ifdef USE_FRIBIDI_EX_API
1114   btypes = calloc (rq->text_len, sizeof (FriBidiBracketType));
1115 #endif
1116   levels = calloc (rq->text_len, sizeof (FriBidiLevel));
1117   if (!types || !levels
1118 #ifdef USE_FRIBIDI_EX_API
1119       || !btypes
1120 #endif
1121       )
1122   {
1123     ok = false;
1124     goto done;
1125   }
1126 
1127   if (rq->base_dir == RAQM_DIRECTION_RTL)
1128     par_type = FRIBIDI_PAR_RTL;
1129   else if (rq->base_dir == RAQM_DIRECTION_LTR)
1130     par_type = FRIBIDI_PAR_LTR;
1131 
1132   if (rq->base_dir == RAQM_DIRECTION_TTB)
1133   {
1134     /* Treat every thing as LTR in vertical text */
1135     max_level = 1;
1136     memset (types, FRIBIDI_TYPE_LTR, rq->text_len);
1137     memset (levels, 0, rq->text_len);
1138     rq->resolved_dir = RAQM_DIRECTION_LTR;
1139   }
1140   else
1141   {
1142     fribidi_get_bidi_types (rq->text, rq->text_len, types);
1143 #ifdef USE_FRIBIDI_EX_API
1144     fribidi_get_bracket_types (rq->text, rq->text_len, types, btypes);
1145     max_level = fribidi_get_par_embedding_levels_ex (types, btypes,
1146                                                      rq->text_len, &par_type,
1147                                                      levels);
1148 #else
1149     max_level = fribidi_get_par_embedding_levels (types, rq->text_len,
1150                                                   &par_type, levels);
1151 #endif
1152 
1153    if (par_type == FRIBIDI_PAR_LTR)
1154      rq->resolved_dir = RAQM_DIRECTION_LTR;
1155    else
1156      rq->resolved_dir = RAQM_DIRECTION_RTL;
1157   }
1158 
1159   if (max_level == 0)
1160   {
1161     ok = false;
1162     goto done;
1163   }
1164 
1165   if (!_raqm_resolve_scripts (rq))
1166   {
1167     ok = false;
1168     goto done;
1169   }
1170 
1171   /* Get the number of bidi runs */
1172   runs = _raqm_reorder_runs (types, rq->text_len, par_type, levels, &run_count);
1173   if (!runs)
1174   {
1175     ok = false;
1176     goto done;
1177   }
1178 
1179 #ifdef RAQM_TESTING
1180   RAQM_TEST ("Number of runs before script itemization: %zu\n\n", run_count);
1181 
1182   RAQM_TEST ("Fribidi Runs:\n");
1183   for (size_t i = 0; i < run_count; i++)
1184   {
1185     RAQM_TEST ("run[%zu]:\t start: %zu\tlength: %zu\tlevel: %d\n",
1186                i, runs[i].pos, runs[i].len, runs[i].level);
1187   }
1188   RAQM_TEST ("\n");
1189 #endif
1190 
1191   last = NULL;
1192   for (size_t i = 0; i < run_count; i++)
1193   {
1194     raqm_run_t *run = calloc (1, sizeof (raqm_run_t));
1195     if (!run)
1196     {
1197       ok = false;
1198       goto done;
1199     }
1200 
1201     if (!rq->runs)
1202       rq->runs = run;
1203 
1204     if (last)
1205       last->next = run;
1206 
1207     run->direction = _raqm_hb_dir (rq, runs[i].level);
1208 
1209     if (HB_DIRECTION_IS_BACKWARD (run->direction))
1210     {
1211       run->pos = runs[i].pos + runs[i].len - 1;
1212       run->script = rq->text_info[run->pos].script;
1213       run->font = _raqm_create_hb_font (rq, rq->text_info[run->pos].ftface);
1214       for (int j = runs[i].len - 1; j >= 0; j--)
1215       {
1216         _raqm_text_info info = rq->text_info[runs[i].pos + j];
1217         if (!_raqm_compare_text_info (rq->text_info[run->pos], info))
1218         {
1219           raqm_run_t *newrun = calloc (1, sizeof (raqm_run_t));
1220           if (!newrun)
1221           {
1222             ok = false;
1223             goto done;
1224           }
1225           newrun->pos = runs[i].pos + j;
1226           newrun->len = 1;
1227           newrun->direction = _raqm_hb_dir (rq, runs[i].level);
1228           newrun->script = info.script;
1229           newrun->font = _raqm_create_hb_font (rq, info.ftface);
1230           run->next = newrun;
1231           run = newrun;
1232         }
1233         else
1234         {
1235           run->len++;
1236           run->pos = runs[i].pos + j;
1237         }
1238       }
1239     }
1240     else
1241     {
1242       run->pos = runs[i].pos;
1243       run->script = rq->text_info[run->pos].script;
1244       run->font = _raqm_create_hb_font (rq, rq->text_info[run->pos].ftface);
1245       for (size_t j = 0; j < runs[i].len; j++)
1246       {
1247         _raqm_text_info info = rq->text_info[runs[i].pos + j];
1248         if (!_raqm_compare_text_info (rq->text_info[run->pos], info))
1249         {
1250           raqm_run_t *newrun = calloc (1, sizeof (raqm_run_t));
1251           if (!newrun)
1252           {
1253             ok = false;
1254             goto done;
1255           }
1256           newrun->pos = runs[i].pos + j;
1257           newrun->len = 1;
1258           newrun->direction = _raqm_hb_dir (rq, runs[i].level);
1259           newrun->script = info.script;
1260           newrun->font = _raqm_create_hb_font (rq, info.ftface);
1261           run->next = newrun;
1262           run = newrun;
1263         }
1264         else
1265           run->len++;
1266       }
1267     }
1268 
1269     last = run;
1270     last->next = NULL;
1271   }
1272 
1273 #ifdef RAQM_TESTING
1274   run_count = 0;
1275   for (raqm_run_t *run = rq->runs; run != NULL; run = run->next)
1276     run_count++;
1277   RAQM_TEST ("Number of runs after script itemization: %zu\n\n", run_count);
1278 
1279   run_count = 0;
1280   RAQM_TEST ("Final Runs:\n");
1281   for (raqm_run_t *run = rq->runs; run != NULL; run = run->next)
1282   {
1283     SCRIPT_TO_STRING (run->script);
1284     RAQM_TEST ("run[%zu]:\t start: %d\tlength: %d\tdirection: %s\tscript: %s\tfont: %s\n",
1285                run_count++, run->pos, run->len,
1286                hb_direction_to_string (run->direction), buff,
1287                rq->text_info[run->pos].ftface->family_name);
1288   }
1289   RAQM_TEST ("\n");
1290 #endif
1291 
1292 done:
1293   free (runs);
1294   free (types);
1295 #ifdef USE_FRIBIDI_EX_API
1296   free (btypes);
1297 #endif
1298   free (levels);
1299 
1300   return ok;
1301 }
1302 
1303 /* Stack to handle script detection */
1304 typedef struct {
1305   size_t       capacity;
1306   size_t       size;
1307   int         *pair_index;
1308   hb_script_t *script;
1309 } _raqm_stack_t;
1310 
1311 /* Special paired characters for script detection */
1312 static size_t paired_len = 34;
1313 static const FriBidiChar paired_chars[] =
1314 {
1315   0x0028, 0x0029, /* ascii paired punctuation */
1316   0x003c, 0x003e,
1317   0x005b, 0x005d,
1318   0x007b, 0x007d,
1319   0x00ab, 0x00bb, /* guillemets */
1320   0x2018, 0x2019, /* general punctuation */
1321   0x201c, 0x201d,
1322   0x2039, 0x203a,
1323   0x3008, 0x3009, /* chinese paired punctuation */
1324   0x300a, 0x300b,
1325   0x300c, 0x300d,
1326   0x300e, 0x300f,
1327   0x3010, 0x3011,
1328   0x3014, 0x3015,
1329   0x3016, 0x3017,
1330   0x3018, 0x3019,
1331   0x301a, 0x301b
1332 };
1333 
1334 static void
_raqm_stack_free(_raqm_stack_t * stack)1335 _raqm_stack_free (_raqm_stack_t *stack)
1336 {
1337   free (stack->script);
1338   free (stack->pair_index);
1339   free (stack);
1340 }
1341 
1342 /* Stack handling functions */
1343 static _raqm_stack_t *
_raqm_stack_new(size_t max)1344 _raqm_stack_new (size_t max)
1345 {
1346   _raqm_stack_t *stack;
1347   stack = calloc (1, sizeof (_raqm_stack_t));
1348   if (!stack)
1349     return NULL;
1350 
1351   stack->script = malloc (sizeof (hb_script_t) * max);
1352   if (!stack->script)
1353   {
1354     _raqm_stack_free (stack);
1355     return NULL;
1356   }
1357 
1358   stack->pair_index = malloc (sizeof (int) * max);
1359   if (!stack->pair_index)
1360   {
1361     _raqm_stack_free (stack);
1362     return NULL;
1363   }
1364 
1365   stack->size = 0;
1366   stack->capacity = max;
1367 
1368   return stack;
1369 }
1370 
1371 static bool
_raqm_stack_pop(_raqm_stack_t * stack)1372 _raqm_stack_pop (_raqm_stack_t *stack)
1373 {
1374   if (!stack->size)
1375   {
1376     RAQM_DBG ("Stack is Empty\n");
1377     return false;
1378   }
1379 
1380   stack->size--;
1381 
1382   return true;
1383 }
1384 
1385 static hb_script_t
_raqm_stack_top(_raqm_stack_t * stack)1386 _raqm_stack_top (_raqm_stack_t *stack)
1387 {
1388   if (!stack->size)
1389   {
1390     RAQM_DBG ("Stack is Empty\n");
1391     return HB_SCRIPT_INVALID; /* XXX: check this */
1392   }
1393 
1394   return stack->script[stack->size];
1395 }
1396 
1397 static bool
_raqm_stack_push(_raqm_stack_t * stack,hb_script_t script,int pair_index)1398 _raqm_stack_push (_raqm_stack_t *stack,
1399                   hb_script_t    script,
1400                   int            pair_index)
1401 {
1402   if (stack->size == stack->capacity)
1403   {
1404     RAQM_DBG ("Stack is Full\n");
1405     return false;
1406   }
1407 
1408   stack->size++;
1409   stack->script[stack->size] = script;
1410   stack->pair_index[stack->size] = pair_index;
1411 
1412   return true;
1413 }
1414 
1415 static int
_get_pair_index(const FriBidiChar ch)1416 _get_pair_index (const FriBidiChar ch)
1417 {
1418   int lower = 0;
1419   int upper = paired_len - 1;
1420 
1421   while (lower <= upper)
1422   {
1423     int mid = (lower + upper) / 2;
1424     if (ch < paired_chars[mid])
1425       upper = mid - 1;
1426     else if (ch > paired_chars[mid])
1427       lower = mid + 1;
1428     else
1429       return mid;
1430   }
1431 
1432   return -1;
1433 }
1434 
1435 #define STACK_IS_EMPTY(script)     ((script)->size <= 0)
1436 #define IS_OPEN(pair_index)        (((pair_index) & 1) == 0)
1437 
1438 /* Resolve the script for each character in the input string, if the character
1439  * script is common or inherited it takes the script of the character before it
1440  * except paired characters which we try to make them use the same script. We
1441  * then split the BiDi runs, if necessary, on script boundaries.
1442  */
1443 static bool
_raqm_resolve_scripts(raqm_t * rq)1444 _raqm_resolve_scripts (raqm_t *rq)
1445 {
1446   int last_script_index = -1;
1447   int last_set_index = -1;
1448   hb_script_t last_script = HB_SCRIPT_INVALID;
1449   _raqm_stack_t *stack = NULL;
1450   hb_unicode_funcs_t* unicode_funcs = hb_unicode_funcs_get_default ();
1451 
1452   for (size_t i = 0; i < rq->text_len; ++i)
1453     rq->text_info[i].script = hb_unicode_script (unicode_funcs, rq->text[i]);
1454 
1455 #ifdef RAQM_TESTING
1456   RAQM_TEST ("Before script detection:\n");
1457   for (size_t i = 0; i < rq->text_len; ++i)
1458   {
1459     SCRIPT_TO_STRING (rq->text_info[i].script);
1460     RAQM_TEST ("script for ch[%zu]\t%s\n", i, buff);
1461   }
1462   RAQM_TEST ("\n");
1463 #endif
1464 
1465   stack = _raqm_stack_new (rq->text_len);
1466   if (!stack)
1467     return false;
1468 
1469   for (int i = 0; i < (int) rq->text_len; i++)
1470   {
1471     if (rq->text_info[i].script == HB_SCRIPT_COMMON && last_script_index != -1)
1472     {
1473       int pair_index = _get_pair_index (rq->text[i]);
1474       if (pair_index >= 0)
1475       {
1476         if (IS_OPEN (pair_index))
1477         {
1478           /* is a paired character */
1479           rq->text_info[i].script = last_script;
1480           last_set_index = i;
1481           _raqm_stack_push (stack, rq->text_info[i].script, pair_index);
1482         }
1483         else
1484         {
1485           /* is a close paired character */
1486           /* find matching opening (by getting the last even index for current
1487            * odd index) */
1488           while (!STACK_IS_EMPTY (stack) &&
1489                  stack->pair_index[stack->size] != (pair_index & ~1))
1490           {
1491             _raqm_stack_pop (stack);
1492           }
1493           if (!STACK_IS_EMPTY (stack))
1494           {
1495             rq->text_info[i].script = _raqm_stack_top (stack);
1496             last_script = rq->text_info[i].script;
1497             last_set_index = i;
1498           }
1499           else
1500           {
1501             rq->text_info[i].script = last_script;
1502             last_set_index = i;
1503           }
1504         }
1505       }
1506       else
1507       {
1508         rq->text_info[i].script = last_script;
1509         last_set_index = i;
1510       }
1511     }
1512     else if (rq->text_info[i].script == HB_SCRIPT_INHERITED &&
1513              last_script_index != -1)
1514     {
1515       rq->text_info[i].script = last_script;
1516       last_set_index = i;
1517     }
1518     else
1519     {
1520       for (int j = last_set_index + 1; j < i; ++j)
1521         rq->text_info[j].script = rq->text_info[i].script;
1522       last_script = rq->text_info[i].script;
1523       last_script_index = i;
1524       last_set_index = i;
1525     }
1526   }
1527 
1528   /* Loop backwards and change any remaining Common or Inherit characters to
1529    * take the script if the next character.
1530    * https://github.com/HOST-Oman/libraqm/issues/95
1531    */
1532   for (int i = rq->text_len - 2; i >= 0;  --i)
1533   {
1534     if (rq->text_info[i].script == HB_SCRIPT_INHERITED ||
1535         rq->text_info[i].script == HB_SCRIPT_COMMON)
1536       rq->text_info[i].script = rq->text_info[i + 1].script;
1537   }
1538 
1539 #ifdef RAQM_TESTING
1540   RAQM_TEST ("After script detection:\n");
1541   for (size_t i = 0; i < rq->text_len; ++i)
1542   {
1543     SCRIPT_TO_STRING (rq->text_info[i].script);
1544     RAQM_TEST ("script for ch[%zu]\t%s\n", i, buff);
1545   }
1546   RAQM_TEST ("\n");
1547 #endif
1548 
1549   _raqm_stack_free (stack);
1550 
1551   return true;
1552 }
1553 
1554 static bool
_raqm_shape(raqm_t * rq)1555 _raqm_shape (raqm_t *rq)
1556 {
1557   hb_buffer_flags_t hb_buffer_flags = HB_BUFFER_FLAG_BOT | HB_BUFFER_FLAG_EOT;
1558 
1559 #if defined(HAVE_DECL_HB_BUFFER_FLAG_REMOVE_DEFAULT_IGNORABLES) && \
1560     HAVE_DECL_HB_BUFFER_FLAG_REMOVE_DEFAULT_IGNORABLES
1561   if (rq->invisible_glyph < 0)
1562     hb_buffer_flags |= HB_BUFFER_FLAG_REMOVE_DEFAULT_IGNORABLES;
1563 #endif
1564 
1565   for (raqm_run_t *run = rq->runs; run != NULL; run = run->next)
1566   {
1567     run->buffer = hb_buffer_create ();
1568 
1569     hb_buffer_add_utf32 (run->buffer, rq->text, rq->text_len,
1570                          run->pos, run->len);
1571     hb_buffer_set_script (run->buffer, run->script);
1572     hb_buffer_set_language (run->buffer, rq->text_info[run->pos].lang);
1573     hb_buffer_set_direction (run->buffer, run->direction);
1574     hb_buffer_set_flags (run->buffer, hb_buffer_flags);
1575 
1576 #ifdef HAVE_HB_BUFFER_SET_INVISIBLE_GLYPH
1577     if (rq->invisible_glyph > 0)
1578       hb_buffer_set_invisible_glyph (run->buffer, rq->invisible_glyph);
1579 #endif
1580 
1581     hb_shape_full (run->font, run->buffer, rq->features, rq->features_len,
1582                    NULL);
1583   }
1584 
1585   return true;
1586 }
1587 
1588 /* Convert index from UTF-32 to UTF-8 */
1589 static uint32_t
_raqm_u32_to_u8_index(raqm_t * rq,uint32_t index)1590 _raqm_u32_to_u8_index (raqm_t   *rq,
1591                        uint32_t  index)
1592 {
1593   FriBidiStrIndex length;
1594   char *output = malloc ((sizeof (char) * 4 * index) + 1);
1595 
1596   length = fribidi_unicode_to_charset (FRIBIDI_CHAR_SET_UTF8,
1597                                        rq->text,
1598                                        index,
1599                                        output);
1600 
1601   free (output);
1602   return length;
1603 }
1604 
1605 /* Convert index from UTF-8 to UTF-32 */
1606 static uint32_t
_raqm_u8_to_u32_index(raqm_t * rq,uint32_t index)1607 _raqm_u8_to_u32_index (raqm_t   *rq,
1608                        uint32_t  index)
1609 {
1610   FriBidiStrIndex length;
1611   uint32_t *output = malloc (sizeof (uint32_t) * (index + 1));
1612 
1613   length = fribidi_charset_to_unicode (FRIBIDI_CHAR_SET_UTF8,
1614                                        rq->text_utf8,
1615                                        index,
1616                                        output);
1617 
1618   free (output);
1619   return length;
1620 }
1621 
1622 static bool
1623 _raqm_allowed_grapheme_boundary (hb_codepoint_t l_char,
1624                                 hb_codepoint_t r_char);
1625 
1626 static bool
1627 _raqm_in_hangul_syllable (hb_codepoint_t ch);
1628 
1629 /**
1630  * raqm_index_to_position:
1631  * @rq: a #raqm_t.
1632  * @index: (inout): character index.
1633  * @x: (out): output x position.
1634  * @y: (out): output y position.
1635  *
1636  * Calculates the cursor position after the character at @index. If the character
1637  * is right-to-left, then the cursor will be at the left of it, whereas if the
1638  * character is left-to-right, then the cursor will be at the right of it.
1639  *
1640  * Return value:
1641  * %true if the process was successful, %false otherwise.
1642  *
1643  * Since: 0.2
1644  */
1645 bool
raqm_index_to_position(raqm_t * rq,size_t * index,int * x,int * y)1646 raqm_index_to_position (raqm_t *rq,
1647                         size_t *index,
1648                         int *x,
1649                         int *y)
1650 {
1651   /* We don't currently support multiline, so y is always 0 */
1652   *y = 0;
1653   *x = 0;
1654 
1655   if (rq == NULL)
1656     return false;
1657 
1658   if (rq->flags & RAQM_FLAG_UTF8)
1659     *index = _raqm_u8_to_u32_index (rq, *index);
1660 
1661   if (*index >= rq->text_len)
1662     return false;
1663 
1664   RAQM_TEST ("\n");
1665 
1666   while (*index < rq->text_len)
1667   {
1668     if (_raqm_allowed_grapheme_boundary (rq->text[*index], rq->text[*index + 1]))
1669       break;
1670 
1671     ++*index;
1672   }
1673 
1674   for (raqm_run_t *run = rq->runs; run != NULL; run = run->next)
1675   {
1676     size_t len;
1677     hb_glyph_info_t *info;
1678     hb_glyph_position_t *position;
1679     len = hb_buffer_get_length (run->buffer);
1680     info = hb_buffer_get_glyph_infos (run->buffer, NULL);
1681     position = hb_buffer_get_glyph_positions (run->buffer, NULL);
1682 
1683     for (size_t i = 0; i < len; i++)
1684     {
1685       uint32_t curr_cluster = info[i].cluster;
1686       uint32_t next_cluster = curr_cluster;
1687       *x += position[i].x_advance;
1688 
1689       if (run->direction == HB_DIRECTION_LTR)
1690       {
1691         for (size_t j = i + 1; j < len && next_cluster == curr_cluster; j++)
1692           next_cluster = info[j].cluster;
1693       }
1694       else
1695       {
1696         for (int j = i - 1; i != 0 && j >= 0 && next_cluster == curr_cluster;
1697              j--)
1698           next_cluster = info[j].cluster;
1699       }
1700 
1701       if (next_cluster == curr_cluster)
1702         next_cluster = run->pos + run->len;
1703 
1704       if (*index < next_cluster && *index >= curr_cluster)
1705       {
1706         if (run->direction == HB_DIRECTION_RTL)
1707           *x -= position[i].x_advance;
1708         *index = curr_cluster;
1709         goto found;
1710       }
1711     }
1712   }
1713 
1714 found:
1715   if (rq->flags & RAQM_FLAG_UTF8)
1716     *index = _raqm_u32_to_u8_index (rq, *index);
1717   RAQM_TEST ("The position is %d at index %zu\n",*x ,*index);
1718   return true;
1719 }
1720 
1721 /**
1722  * raqm_position_to_index:
1723  * @rq: a #raqm_t.
1724  * @x: x position.
1725  * @y: y position.
1726  * @index: (out): output character index.
1727  *
1728  * Returns the @index of the character at @x and @y position within text.
1729  * If the position is outside the text, the last character is chosen as
1730  * @index.
1731  *
1732  * Return value:
1733  * %true if the process was successful, %false in case of error.
1734  *
1735  * Since: 0.2
1736  */
1737 bool
raqm_position_to_index(raqm_t * rq,int x,int y,size_t * index)1738 raqm_position_to_index (raqm_t *rq,
1739                         int x,
1740                         int y,
1741                         size_t *index)
1742 {
1743   int delta_x = 0, current_x = 0;
1744   (void)y;
1745 
1746   if (rq == NULL)
1747     return false;
1748 
1749   if (x < 0) /* Get leftmost index */
1750   {
1751     if (rq->resolved_dir == RAQM_DIRECTION_RTL)
1752       *index = rq->text_len;
1753     else
1754       *index = 0;
1755     return true;
1756   }
1757 
1758   RAQM_TEST ("\n");
1759 
1760   for (raqm_run_t *run = rq->runs; run != NULL; run = run->next)
1761   {
1762     size_t len;
1763     hb_glyph_info_t *info;
1764     hb_glyph_position_t *position;
1765     len = hb_buffer_get_length (run->buffer);
1766     info = hb_buffer_get_glyph_infos (run->buffer, NULL);
1767     position = hb_buffer_get_glyph_positions (run->buffer, NULL);
1768 
1769     for (size_t i = 0; i < len; i++)
1770     {
1771       delta_x = position[i].x_advance;
1772       if (x < (current_x + delta_x))
1773       {
1774         bool before = false;
1775         if (run->direction == HB_DIRECTION_LTR)
1776           before = (x < current_x + (delta_x / 2));
1777         else
1778           before = (x > current_x + (delta_x / 2));
1779 
1780         if (before)
1781           *index = info[i].cluster;
1782         else
1783         {
1784           uint32_t curr_cluster = info[i].cluster;
1785           uint32_t next_cluster = curr_cluster;
1786           if (run->direction == HB_DIRECTION_LTR)
1787             for (size_t j = i + 1; j < len && next_cluster == curr_cluster; j++)
1788               next_cluster = info[j].cluster;
1789           else
1790           for (int j = i - 1; i != 0 && j >= 0 && next_cluster == curr_cluster;
1791                  j--)
1792               next_cluster = info[j].cluster;
1793 
1794           if (next_cluster == curr_cluster)
1795             next_cluster = run->pos + run->len;
1796 
1797           *index = next_cluster;
1798         }
1799         if (_raqm_allowed_grapheme_boundary (rq->text[*index],rq->text[*index + 1]))
1800         {
1801           RAQM_TEST ("The start-index is %zu  at position %d \n", *index, x);
1802             return true;
1803         }
1804 
1805         while (*index < (unsigned)run->pos + run->len)
1806         {
1807           if (_raqm_allowed_grapheme_boundary (rq->text[*index],
1808                                                rq->text[*index + 1]))
1809           {
1810             *index += 1;
1811             break;
1812           }
1813           *index += 1;
1814         }
1815         RAQM_TEST ("The start-index is %zu  at position %d \n", *index, x);
1816         return true;
1817       }
1818       else
1819         current_x += delta_x;
1820     }
1821   }
1822 
1823   /* Get rightmost index*/
1824   if (rq->resolved_dir == RAQM_DIRECTION_RTL)
1825     *index = 0;
1826   else
1827     *index = rq->text_len;
1828 
1829   RAQM_TEST ("The start-index is %zu  at position %d \n", *index, x);
1830 
1831   return true;
1832 }
1833 
1834 typedef enum
1835 {
1836   RAQM_GRAPHEM_CR,
1837   RAQM_GRAPHEM_LF,
1838   RAQM_GRAPHEM_CONTROL,
1839   RAQM_GRAPHEM_EXTEND,
1840   RAQM_GRAPHEM_REGIONAL_INDICATOR,
1841   RAQM_GRAPHEM_PREPEND,
1842   RAQM_GRAPHEM_SPACING_MARK,
1843   RAQM_GRAPHEM_HANGUL_SYLLABLE,
1844   RAQM_GRAPHEM_OTHER
1845 } _raqm_grapheme_t;
1846 
1847 static _raqm_grapheme_t
1848 _raqm_get_grapheme_break (hb_codepoint_t ch,
1849                           hb_unicode_general_category_t category);
1850 
1851 static bool
_raqm_allowed_grapheme_boundary(hb_codepoint_t l_char,hb_codepoint_t r_char)1852 _raqm_allowed_grapheme_boundary (hb_codepoint_t l_char,
1853                                  hb_codepoint_t r_char)
1854 {
1855   hb_unicode_general_category_t l_category;
1856   hb_unicode_general_category_t r_category;
1857   _raqm_grapheme_t l_grapheme, r_grapheme;
1858   hb_unicode_funcs_t* unicode_funcs = hb_unicode_funcs_get_default ();
1859 
1860   l_category = hb_unicode_general_category (unicode_funcs, l_char);
1861   r_category = hb_unicode_general_category (unicode_funcs, r_char);
1862   l_grapheme = _raqm_get_grapheme_break (l_char, l_category);
1863   r_grapheme = _raqm_get_grapheme_break (r_char, r_category);
1864 
1865   if (l_grapheme == RAQM_GRAPHEM_CR && r_grapheme == RAQM_GRAPHEM_LF)
1866     return false; /*Do not break between a CR and LF GB3*/
1867   if (l_grapheme == RAQM_GRAPHEM_CONTROL || l_grapheme == RAQM_GRAPHEM_CR ||
1868       l_grapheme == RAQM_GRAPHEM_LF || r_grapheme == RAQM_GRAPHEM_CONTROL ||
1869       r_grapheme == RAQM_GRAPHEM_CR || r_grapheme == RAQM_GRAPHEM_LF)
1870     return true; /*Break before and after CONTROL GB4, GB5*/
1871   if (r_grapheme == RAQM_GRAPHEM_HANGUL_SYLLABLE)
1872     return false; /*Do not break Hangul syllable sequences. GB6, GB7, GB8*/
1873   if (l_grapheme == RAQM_GRAPHEM_REGIONAL_INDICATOR &&
1874       r_grapheme == RAQM_GRAPHEM_REGIONAL_INDICATOR)
1875     return false; /*Do not break between regional indicator symbols. GB8a*/
1876   if (r_grapheme == RAQM_GRAPHEM_EXTEND)
1877     return false; /*Do not break before extending characters. GB9*/
1878   /*Do not break before SpacingMarks, or after Prepend characters.GB9a, GB9b*/
1879   if (l_grapheme == RAQM_GRAPHEM_PREPEND)
1880     return false;
1881   if (r_grapheme == RAQM_GRAPHEM_SPACING_MARK)
1882     return false;
1883   return true; /*Otherwise, break everywhere. GB1, GB2, GB10*/
1884 }
1885 
1886 static _raqm_grapheme_t
_raqm_get_grapheme_break(hb_codepoint_t ch,hb_unicode_general_category_t category)1887 _raqm_get_grapheme_break (hb_codepoint_t ch,
1888                           hb_unicode_general_category_t category)
1889 {
1890   _raqm_grapheme_t gb_type;
1891 
1892   gb_type = RAQM_GRAPHEM_OTHER;
1893   switch ((int)category)
1894   {
1895     case HB_UNICODE_GENERAL_CATEGORY_FORMAT:
1896       if (ch == 0x200C || ch == 0x200D)
1897         gb_type = RAQM_GRAPHEM_EXTEND;
1898       else
1899         gb_type = RAQM_GRAPHEM_CONTROL;
1900       break;
1901 
1902     case HB_UNICODE_GENERAL_CATEGORY_CONTROL:
1903       if (ch == 0x000D)
1904         gb_type = RAQM_GRAPHEM_CR;
1905       else if (ch == 0x000A)
1906         gb_type = RAQM_GRAPHEM_LF;
1907       else
1908         gb_type = RAQM_GRAPHEM_CONTROL;
1909       break;
1910 
1911     case HB_UNICODE_GENERAL_CATEGORY_SURROGATE:
1912     case HB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR:
1913     case HB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR:
1914     case HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED:
1915       if ((ch >= 0xFFF0 && ch <= 0xFFF8) ||
1916           (ch >= 0xE0000 && ch <= 0xE0FFF))
1917         gb_type = RAQM_GRAPHEM_CONTROL;
1918       break;
1919 
1920     case HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK:
1921     case HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK:
1922     case HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK:
1923       if (ch != 0x102B && ch != 0x102C && ch != 0x1038 &&
1924           (ch < 0x1062 || ch > 0x1064) && (ch < 0x1067 || ch > 0x106D) &&
1925           ch != 0x1083 && (ch < 0x1087 || ch > 0x108C) && ch != 0x108F &&
1926           (ch < 0x109A || ch > 0x109C) && ch != 0x1A61 && ch != 0x1A63 &&
1927           ch != 0x1A64 && ch != 0xAA7B && ch != 0xAA70 && ch != 0x11720 &&
1928           ch != 0x11721) /**/
1929         gb_type = RAQM_GRAPHEM_SPACING_MARK;
1930 
1931       else if (ch == 0x09BE || ch == 0x09D7 ||
1932           ch == 0x0B3E || ch == 0x0B57 || ch == 0x0BBE || ch == 0x0BD7 ||
1933           ch == 0x0CC2 || ch == 0x0CD5 || ch == 0x0CD6 ||
1934           ch == 0x0D3E || ch == 0x0D57 || ch == 0x0DCF || ch == 0x0DDF ||
1935           ch == 0x1D165 || (ch >= 0x1D16E && ch <= 0x1D172))
1936         gb_type = RAQM_GRAPHEM_EXTEND;
1937       break;
1938 
1939     case HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER:
1940       if (ch == 0x0E33 || ch == 0x0EB3)
1941         gb_type = RAQM_GRAPHEM_SPACING_MARK;
1942       break;
1943 
1944     case HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL:
1945       if (ch >= 0x1F1E6 && ch <= 0x1F1FF)
1946         gb_type = RAQM_GRAPHEM_REGIONAL_INDICATOR;
1947       break;
1948 
1949     default:
1950       gb_type = RAQM_GRAPHEM_OTHER;
1951       break;
1952   }
1953 
1954   if (_raqm_in_hangul_syllable (ch))
1955     gb_type = RAQM_GRAPHEM_HANGUL_SYLLABLE;
1956 
1957   return gb_type;
1958 }
1959 
1960 static bool
_raqm_in_hangul_syllable(hb_codepoint_t ch)1961 _raqm_in_hangul_syllable (hb_codepoint_t ch)
1962 {
1963   (void)ch;
1964   return false;
1965 }
1966 
1967 /**
1968  * raqm_version:
1969  * @major: (out): Library major version component.
1970  * @minor: (out): Library minor version component.
1971  * @micro: (out): Library micro version component.
1972  *
1973  * Returns library version as three integer components.
1974  *
1975  * Since: 0.7
1976  **/
1977 void
raqm_version(unsigned int * major,unsigned int * minor,unsigned int * micro)1978 raqm_version (unsigned int *major,
1979               unsigned int *minor,
1980               unsigned int *micro)
1981 {
1982   *major = RAQM_VERSION_MAJOR;
1983   *minor = RAQM_VERSION_MINOR;
1984   *micro = RAQM_VERSION_MICRO;
1985 }
1986 
1987 /**
1988  * raqm_version_string:
1989  *
1990  * Returns library version as a string with three components.
1991  *
1992  * Return value: library version string.
1993  *
1994  * Since: 0.7
1995  **/
1996 const char *
raqm_version_string(void)1997 raqm_version_string (void)
1998 {
1999   return RAQM_VERSION_STRING;
2000 }
2001 
2002 /**
2003  * raqm_version_atleast:
2004  * @major: Library major version component.
2005  * @minor: Library minor version component.
2006  * @micro: Library micro version component.
2007  *
2008  * Checks if library version is less than or equal the specified version.
2009  *
2010  * Return value:
2011  * %true if library version is less than or equal the specfied version, %false
2012  * otherwise.
2013  *
2014  * Since: 0.7
2015  **/
2016 bool
raqm_version_atleast(unsigned int major,unsigned int minor,unsigned int micro)2017 raqm_version_atleast (unsigned int major,
2018                       unsigned int minor,
2019                       unsigned int micro)
2020 {
2021   return RAQM_VERSION_ATLEAST (major, minor, micro);
2022 }
2023 
2024 /**
2025  * RAQM_VERSION_ATLEAST:
2026  * @major: Library major version component.
2027  * @minor: Library minor version component.
2028  * @micro: Library micro version component.
2029  *
2030  * Checks if library version is less than or equal the specified version.
2031  *
2032  * Return value:
2033  * %true if library version is less than or equal the specfied version, %false
2034  * otherwise.
2035  *
2036  * Since: 0.7
2037  **/
2038 
2039 /**
2040  * RAQM_VERSION_STRING:
2041  *
2042  * Library version as a string with three components.
2043  *
2044  * Since: 0.7
2045  **/
2046 
2047 /**
2048  * RAQM_VERSION_MAJOR:
2049  *
2050  * Library major version component.
2051  *
2052  * Since: 0.7
2053  **/
2054 
2055 /**
2056  * RAQM_VERSION_MINOR:
2057  *
2058  * Library minor version component.
2059  *
2060  * Since: 0.7
2061  **/
2062 
2063 /**
2064  * RAQM_VERSION_MICRO:
2065  *
2066  * Library micro version component.
2067  *
2068  * Since: 0.7
2069  **/
2070