1 /*
2 * Copyright © 2015 Information Technology Authority (ITA) <foss@ita.gov.om>
3 * Copyright © 2016 Khaled Hosny <khaledhosny@eglug.org>
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a copy
6 * of this software and associated documentation files (the "Software"), to
7 * deal in the Software without restriction, including without limitation the
8 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
9 * sell copies of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25 #ifdef HAVE_CONFIG_H
26 #include "config.h"
27 #undef HAVE_CONFIG_H // Workaround for Fribidi 1.0.5 and earlier
28 #endif
29
30 #include <assert.h>
31 #include <string.h>
32
33 #include <fribidi.h>
34 #include <hb.h>
35 #include <hb-ft.h>
36
37 #include "raqm.h"
38
39 #if FRIBIDI_MAJOR_VERSION >= 1
40 #define USE_FRIBIDI_EX_API
41 #endif
42
43 /**
44 * SECTION:raqm
45 * @title: Raqm
46 * @short_description: A library for complex text layout
47 * @include: raqm.h
48 *
49 * Raqm is a light weight text layout library with strong emphasis on
50 * supporting languages and writing systems that require complex text layout.
51 *
52 * The main object in Raqm API is #raqm_t, it stores all the states of the
53 * input text, its properties, and the output of the layout process.
54 *
55 * To start, you create a #raqm_t object, add text and font(s) to it, run the
56 * layout process, and finally query about the output. For example:
57 *
58 * |[<!-- language="C" -->
59 * #include "raqm.h"
60 *
61 * int
62 * main (int argc, char *argv[])
63 * {
64 * const char *fontfile;
65 * const char *text;
66 * const char *direction;
67 * const char *language;
68 * int ret = 1;
69 *
70 * FT_Library library = NULL;
71 * FT_Face face = NULL;
72 *
73 * if (argc < 5)
74 * {
75 * printf ("Usage: %s FONT_FILE TEXT DIRECTION LANG\n", argv[0]);
76 * return 1;
77 * }
78 *
79 * fontfile = argv[1];
80 * text = argv[2];
81 * direction = argv[3];
82 * language = argv[4];
83 *
84 * if (FT_Init_FreeType (&library) == 0)
85 * {
86 * if (FT_New_Face (library, fontfile, 0, &face) == 0)
87 * {
88 * if (FT_Set_Char_Size (face, face->units_per_EM, 0, 0, 0) == 0)
89 * {
90 * raqm_t *rq = raqm_create ();
91 * if (rq != NULL)
92 * {
93 * raqm_direction_t dir = RAQM_DIRECTION_DEFAULT;
94 *
95 * if (strcmp (direction, "r") == 0)
96 * dir = RAQM_DIRECTION_RTL;
97 * else if (strcmp (direction, "l") == 0)
98 * dir = RAQM_DIRECTION_LTR;
99 *
100 * if (raqm_set_text_utf8 (rq, text, strlen (text)) &&
101 * raqm_set_freetype_face (rq, face) &&
102 * raqm_set_par_direction (rq, dir) &&
103 * raqm_set_language (rq, language, 0, strlen (text)) &&
104 * raqm_layout (rq))
105 * {
106 * size_t count, i;
107 * raqm_glyph_t *glyphs = raqm_get_glyphs (rq, &count);
108 *
109 * ret = !(glyphs != NULL || count == 0);
110 *
111 * printf("glyph count: %zu\n", count);
112 * for (i = 0; i < count; i++)
113 * {
114 * printf ("gid#%d off: (%d, %d) adv: (%d, %d) idx: %d\n",
115 * glyphs[i].index,
116 * glyphs[i].x_offset,
117 * glyphs[i].y_offset,
118 * glyphs[i].x_advance,
119 * glyphs[i].y_advance,
120 * glyphs[i].cluster);
121 * }
122 * }
123 *
124 * raqm_destroy (rq);
125 * }
126 * }
127 *
128 * FT_Done_Face (face);
129 * }
130 *
131 * FT_Done_FreeType (library);
132 * }
133 *
134 * return ret;
135 * }
136 * ]|
137 * To compile this example:
138 * |[<prompt>
139 * cc -o test test.c `pkg-config --libs --cflags raqm`
140 * ]|
141 */
142
143 /* For enabling debug mode */
144 /*#define RAQM_DEBUG 1*/
145 #ifdef RAQM_DEBUG
146 #define RAQM_DBG(...) fprintf (stderr, __VA_ARGS__)
147 #else
148 #define RAQM_DBG(...)
149 #endif
150
151 #ifdef RAQM_TESTING
152 # define RAQM_TEST(...) printf (__VA_ARGS__)
153 # define SCRIPT_TO_STRING(script) \
154 char buff[5]; \
155 hb_tag_to_string (hb_script_to_iso15924_tag (script), buff); \
156 buff[4] = '\0';
157 #else
158 # define RAQM_TEST(...)
159 #endif
160
161 typedef enum {
162 RAQM_FLAG_NONE = 0,
163 RAQM_FLAG_UTF8 = 1 << 0
164 } _raqm_flags_t;
165
166 typedef struct {
167 FT_Face ftface;
168 hb_language_t lang;
169 hb_script_t script;
170 } _raqm_text_info;
171
172 typedef struct _raqm_run raqm_run_t;
173
174 struct _raqm {
175 int ref_count;
176
177 uint32_t *text;
178 char *text_utf8;
179 size_t text_len;
180
181 _raqm_text_info *text_info;
182
183 raqm_direction_t base_dir;
184 raqm_direction_t resolved_dir;
185
186 hb_feature_t *features;
187 size_t features_len;
188
189 raqm_run_t *runs;
190 raqm_glyph_t *glyphs;
191
192 _raqm_flags_t flags;
193
194 int ft_loadflags;
195 int invisible_glyph;
196 };
197
198 struct _raqm_run {
199 int pos;
200 int len;
201
202 hb_direction_t direction;
203 hb_script_t script;
204 hb_font_t *font;
205 hb_buffer_t *buffer;
206
207 raqm_run_t *next;
208 };
209
210 static uint32_t
211 _raqm_u8_to_u32_index (raqm_t *rq,
212 uint32_t index);
213
214 static bool
_raqm_init_text_info(raqm_t * rq)215 _raqm_init_text_info (raqm_t *rq)
216 {
217 hb_language_t default_lang;
218
219 if (rq->text_info)
220 return true;
221
222 rq->text_info = malloc (sizeof (_raqm_text_info) * rq->text_len);
223 if (!rq->text_info)
224 return false;
225
226 default_lang = hb_language_get_default ();
227 for (size_t i = 0; i < rq->text_len; i++)
228 {
229 rq->text_info[i].ftface = NULL;
230 rq->text_info[i].lang = default_lang;
231 rq->text_info[i].script = HB_SCRIPT_INVALID;
232 }
233
234 return true;
235 }
236
237 static void
_raqm_free_text_info(raqm_t * rq)238 _raqm_free_text_info (raqm_t *rq)
239 {
240 if (!rq->text_info)
241 return;
242
243 for (size_t i = 0; i < rq->text_len; i++)
244 {
245 if (rq->text_info[i].ftface)
246 FT_Done_Face (rq->text_info[i].ftface);
247 }
248
249 free (rq->text_info);
250 rq->text_info = NULL;
251 }
252
253 static bool
_raqm_compare_text_info(_raqm_text_info a,_raqm_text_info b)254 _raqm_compare_text_info (_raqm_text_info a,
255 _raqm_text_info b)
256 {
257 if (a.ftface != b.ftface)
258 return false;
259
260 if (a.lang != b.lang)
261 return false;
262
263 if (a.script != b.script)
264 return false;
265
266 return true;
267 }
268
269 /**
270 * raqm_create:
271 *
272 * Creates a new #raqm_t with all its internal states initialized to their
273 * defaults.
274 *
275 * Return value:
276 * A newly allocated #raqm_t with a reference count of 1. The initial reference
277 * count should be released with raqm_destroy() when you are done using the
278 * #raqm_t. Returns %NULL in case of error.
279 *
280 * Since: 0.1
281 */
282 raqm_t *
raqm_create(void)283 raqm_create (void)
284 {
285 raqm_t *rq;
286
287 rq = malloc (sizeof (raqm_t));
288 if (!rq)
289 return NULL;
290
291 rq->ref_count = 1;
292
293 rq->text = NULL;
294 rq->text_utf8 = NULL;
295 rq->text_len = 0;
296
297 rq->text_info = NULL;
298
299 rq->base_dir = RAQM_DIRECTION_DEFAULT;
300 rq->resolved_dir = RAQM_DIRECTION_DEFAULT;
301
302 rq->features = NULL;
303 rq->features_len = 0;
304
305 rq->runs = NULL;
306 rq->glyphs = NULL;
307
308 rq->flags = RAQM_FLAG_NONE;
309
310 rq->ft_loadflags = -1;
311 rq->invisible_glyph = 0;
312
313 return rq;
314 }
315
316 /**
317 * raqm_reference:
318 * @rq: a #raqm_t.
319 *
320 * Increases the reference count on @rq by one. This prevents @rq from being
321 * destroyed until a matching call to raqm_destroy() is made.
322 *
323 * Return value:
324 * The referenced #raqm_t.
325 *
326 * Since: 0.1
327 */
328 raqm_t *
raqm_reference(raqm_t * rq)329 raqm_reference (raqm_t *rq)
330 {
331 if (rq)
332 rq->ref_count++;
333
334 return rq;
335 }
336
337 static void
_raqm_free_runs(raqm_t * rq)338 _raqm_free_runs (raqm_t *rq)
339 {
340 raqm_run_t *runs = rq->runs;
341 while (runs)
342 {
343 raqm_run_t *run = runs;
344 runs = runs->next;
345
346 hb_buffer_destroy (run->buffer);
347 hb_font_destroy (run->font);
348 free (run);
349 }
350 }
351
352 /**
353 * raqm_destroy:
354 * @rq: a #raqm_t.
355 *
356 * Decreases the reference count on @rq by one. If the result is zero, then @rq
357 * and all associated resources are freed.
358 * See cairo_reference().
359 *
360 * Since: 0.1
361 */
362 void
raqm_destroy(raqm_t * rq)363 raqm_destroy (raqm_t *rq)
364 {
365 if (!rq || --rq->ref_count != 0)
366 return;
367
368 free (rq->text);
369 free (rq->text_utf8);
370 _raqm_free_text_info (rq);
371 _raqm_free_runs (rq);
372 free (rq->glyphs);
373 free (rq);
374 }
375
376 /**
377 * raqm_set_text:
378 * @rq: a #raqm_t.
379 * @text: a UTF-32 encoded text string.
380 * @len: the length of @text.
381 *
382 * Adds @text to @rq to be used for layout. It must be a valid UTF-32 text, any
383 * invalid character will be replaced with U+FFFD. The text should typically
384 * represent a full paragraph, since doing the layout of chunks of text
385 * separately can give improper output.
386 *
387 * Return value:
388 * %true if no errors happened, %false otherwise.
389 *
390 * Since: 0.1
391 */
392 bool
raqm_set_text(raqm_t * rq,const uint32_t * text,size_t len)393 raqm_set_text (raqm_t *rq,
394 const uint32_t *text,
395 size_t len)
396 {
397 if (!rq || !text)
398 return false;
399
400 rq->text_len = len;
401
402 /* Empty string, don’t fail but do nothing */
403 if (!len)
404 return true;
405
406 free (rq->text);
407
408 rq->text = malloc (sizeof (uint32_t) * rq->text_len);
409 if (!rq->text)
410 return false;
411
412 _raqm_free_text_info (rq);
413 if (!_raqm_init_text_info (rq))
414 return false;
415
416 memcpy (rq->text, text, sizeof (uint32_t) * rq->text_len);
417
418 return true;
419 }
420
421 /**
422 * raqm_set_text_utf8:
423 * @rq: a #raqm_t.
424 * @text: a UTF-8 encoded text string.
425 * @len: the length of @text in UTF-8 bytes.
426 *
427 * Same as raqm_set_text(), but for text encoded in UTF-8 encoding.
428 *
429 * Return value:
430 * %true if no errors happened, %false otherwise.
431 *
432 * Since: 0.1
433 */
434 bool
raqm_set_text_utf8(raqm_t * rq,const char * text,size_t len)435 raqm_set_text_utf8 (raqm_t *rq,
436 const char *text,
437 size_t len)
438 {
439 uint32_t *unicode;
440 size_t ulen;
441 bool ok;
442
443 if (!rq || !text)
444 return false;
445
446 /* Empty string, don’t fail but do nothing */
447 if (!len)
448 {
449 rq->text_len = len;
450 return true;
451 }
452
453 RAQM_TEST ("Text is: %s\n", text);
454
455 rq->flags |= RAQM_FLAG_UTF8;
456
457 rq->text_utf8 = malloc (sizeof (char) * len);
458 if (!rq->text_utf8)
459 return false;
460
461 unicode = malloc (sizeof (uint32_t) * len);
462 if (!unicode)
463 return false;
464
465 memcpy (rq->text_utf8, text, sizeof (char) * len);
466
467 ulen = fribidi_charset_to_unicode (FRIBIDI_CHAR_SET_UTF8,
468 text, len, unicode);
469
470 ok = raqm_set_text (rq, unicode, ulen);
471
472 free (unicode);
473 return ok;
474 }
475
476 /**
477 * raqm_set_par_direction:
478 * @rq: a #raqm_t.
479 * @dir: the direction of the paragraph.
480 *
481 * Sets the paragraph direction, also known as block direction in CSS. For
482 * horizontal text, this controls the overall direction in the Unicode
483 * Bidirectional Algorithm, so when the text is mainly right-to-left (with or
484 * without some left-to-right) text, then the base direction should be set to
485 * #RAQM_DIRECTION_RTL and vice versa.
486 *
487 * The default is #RAQM_DIRECTION_DEFAULT, which determines the paragraph
488 * direction based on the first character with strong bidi type (see [rule
489 * P2](http://unicode.org/reports/tr9/#P2) in Unicode Bidirectional Algorithm),
490 * which can be good enough for many cases but has problems when a mainly
491 * right-to-left paragraph starts with a left-to-right character and vice versa
492 * as the detected paragraph direction will be the wrong one, or when text does
493 * not contain any characters with string bidi types (e.g. only punctuation or
494 * numbers) as this will default to left-to-right paragraph direction.
495 *
496 * For vertical, top-to-bottom text, #RAQM_DIRECTION_TTB should be used. Raqm,
497 * however, provides limited vertical text support and does not handle rotated
498 * horizontal text in vertical text, instead everything is treated as vertical
499 * text.
500 *
501 * Return value:
502 * %true if no errors happened, %false otherwise.
503 *
504 * Since: 0.1
505 */
506 bool
raqm_set_par_direction(raqm_t * rq,raqm_direction_t dir)507 raqm_set_par_direction (raqm_t *rq,
508 raqm_direction_t dir)
509 {
510 if (!rq)
511 return false;
512
513 rq->base_dir = dir;
514
515 return true;
516 }
517
518 /**
519 * raqm_set_language:
520 * @rq: a #raqm_t.
521 * @lang: a BCP47 language code.
522 * @start: index of first character that should use @face.
523 * @len: number of characters using @face.
524 *
525 * Sets a [BCP47 language
526 * code](https://www.w3.org/International/articles/language-tags/) to be used
527 * for @len-number of characters staring at @start. The @start and @len are
528 * input string array indices (i.e. counting bytes in UTF-8 and scaler values
529 * in UTF-32).
530 *
531 * This method can be used repeatedly to set different languages for different
532 * parts of the text.
533 *
534 * Return value:
535 * %true if no errors happened, %false otherwise.
536 *
537 * Stability:
538 * Unstable
539 *
540 * Since: 0.2
541 */
542 bool
raqm_set_language(raqm_t * rq,const char * lang,size_t start,size_t len)543 raqm_set_language (raqm_t *rq,
544 const char *lang,
545 size_t start,
546 size_t len)
547 {
548 hb_language_t language;
549 size_t end = start + len;
550
551 if (!rq)
552 return false;
553
554 if (!rq->text_len)
555 return true;
556
557 if (rq->flags & RAQM_FLAG_UTF8)
558 {
559 start = _raqm_u8_to_u32_index (rq, start);
560 end = _raqm_u8_to_u32_index (rq, end);
561 }
562
563 if (start >= rq->text_len || end > rq->text_len)
564 return false;
565
566 if (!rq->text_info)
567 return false;
568
569 language = hb_language_from_string (lang, -1);
570 for (size_t i = start; i < end; i++)
571 {
572 rq->text_info[i].lang = language;
573 }
574
575 return true;
576 }
577
578 /**
579 * raqm_add_font_feature:
580 * @rq: a #raqm_t.
581 * @feature: (transfer none): a font feature string.
582 * @len: length of @feature, -1 for %NULL-terminated.
583 *
584 * Adds a font feature to be used by the #raqm_t during text layout. This is
585 * usually used to turn on optional font features that are not enabled by
586 * default, for example `dlig` or `ss01`, but can be also used to turn off
587 * default font features.
588 *
589 * @feature is string representing a single font feature, in the syntax
590 * understood by hb_feature_from_string().
591 *
592 * This function can be called repeatedly, new features will be appended to the
593 * end of the features list and can potentially override previous features.
594 *
595 * Return value:
596 * %true if parsing @feature succeeded, %false otherwise.
597 *
598 * Since: 0.1
599 */
600 bool
raqm_add_font_feature(raqm_t * rq,const char * feature,int len)601 raqm_add_font_feature (raqm_t *rq,
602 const char *feature,
603 int len)
604 {
605 hb_bool_t ok;
606 hb_feature_t fea;
607
608 if (!rq)
609 return false;
610
611 ok = hb_feature_from_string (feature, len, &fea);
612 if (ok)
613 {
614 rq->features_len++;
615 rq->features = realloc (rq->features,
616 sizeof (hb_feature_t) * (rq->features_len));
617 if (!rq->features)
618 return false;
619
620 rq->features[rq->features_len - 1] = fea;
621 }
622
623 return ok;
624 }
625
626 static hb_font_t *
_raqm_create_hb_font(raqm_t * rq,FT_Face face)627 _raqm_create_hb_font (raqm_t *rq,
628 FT_Face face)
629 {
630 hb_font_t *font = hb_ft_font_create_referenced (face);
631
632 if (rq->ft_loadflags >= 0)
633 hb_ft_font_set_load_flags (font, rq->ft_loadflags);
634
635 return font;
636 }
637
638 static bool
_raqm_set_freetype_face(raqm_t * rq,FT_Face face,size_t start,size_t end)639 _raqm_set_freetype_face (raqm_t *rq,
640 FT_Face face,
641 size_t start,
642 size_t end)
643 {
644 if (!rq)
645 return false;
646
647 if (!rq->text_len)
648 return true;
649
650 if (start >= rq->text_len || end > rq->text_len)
651 return false;
652
653 if (!rq->text_info)
654 return false;
655
656 for (size_t i = start; i < end; i++)
657 {
658 if (rq->text_info[i].ftface)
659 FT_Done_Face (rq->text_info[i].ftface);
660 rq->text_info[i].ftface = face;
661 FT_Reference_Face (face);
662 }
663
664 return true;
665 }
666
667 /**
668 * raqm_set_freetype_face:
669 * @rq: a #raqm_t.
670 * @face: an #FT_Face.
671 *
672 * Sets an #FT_Face to be used for all characters in @rq.
673 *
674 * See also raqm_set_freetype_face_range().
675 *
676 * Return value:
677 * %true if no errors happened, %false otherwise.
678 *
679 * Since: 0.1
680 */
681 bool
raqm_set_freetype_face(raqm_t * rq,FT_Face face)682 raqm_set_freetype_face (raqm_t *rq,
683 FT_Face face)
684 {
685 return _raqm_set_freetype_face (rq, face, 0, rq->text_len);
686 }
687
688 /**
689 * raqm_set_freetype_face_range:
690 * @rq: a #raqm_t.
691 * @face: an #FT_Face.
692 * @start: index of first character that should use @face.
693 * @len: number of characters using @face.
694 *
695 * Sets an #FT_Face to be used for @len-number of characters staring at @start.
696 * The @start and @len are input string array indices (i.e. counting bytes in
697 * UTF-8 and scaler values in UTF-32).
698 *
699 * This method can be used repeatedly to set different faces for different
700 * parts of the text. It is the responsibility of the client to make sure that
701 * face ranges cover the whole text.
702 *
703 * See also raqm_set_freetype_face().
704 *
705 * Return value:
706 * %true if no errors happened, %false otherwise.
707 *
708 * Since: 0.1
709 */
710 bool
raqm_set_freetype_face_range(raqm_t * rq,FT_Face face,size_t start,size_t len)711 raqm_set_freetype_face_range (raqm_t *rq,
712 FT_Face face,
713 size_t start,
714 size_t len)
715 {
716 size_t end = start + len;
717
718 if (!rq)
719 return false;
720
721 if (!rq->text_len)
722 return true;
723
724 if (rq->flags & RAQM_FLAG_UTF8)
725 {
726 start = _raqm_u8_to_u32_index (rq, start);
727 end = _raqm_u8_to_u32_index (rq, end);
728 }
729
730 return _raqm_set_freetype_face (rq, face, start, end);
731 }
732
733 /**
734 * raqm_set_freetype_load_flags:
735 * @rq: a #raqm_t.
736 * @flags: FreeType load flags.
737 *
738 * Sets the load flags passed to FreeType when loading glyphs, should be the
739 * same flags used by the client when rendering FreeType glyphs.
740 *
741 * This requires version of HarfBuzz that has hb_ft_font_set_load_flags(), for
742 * older version the flags will be ignored.
743 *
744 * Return value:
745 * %true if no errors happened, %false otherwise.
746 *
747 * Since: 0.3
748 */
749 bool
raqm_set_freetype_load_flags(raqm_t * rq,int flags)750 raqm_set_freetype_load_flags (raqm_t *rq,
751 int flags)
752 {
753 if (!rq)
754 return false;
755
756 rq->ft_loadflags = flags;
757
758 return true;
759 }
760
761 /**
762 * raqm_set_invisible_glyph:
763 * @rq: a #raqm_t.
764 * @gid: glyph id to use for invisible glyphs.
765 *
766 * Sets the glyph id to be used for invisible glyhphs.
767 *
768 * If @gid is negative, invisible glyphs will be suppressed from the output.
769 * This requires HarfBuzz 1.8.0 or later. If raqm is used with an earlier
770 * HarfBuzz version, the return value will be %false and the shaping behavior
771 * does not change.
772 *
773 * If @gid is zero, invisible glyphs will be rendered as space.
774 * This works on all versions of HarfBuzz.
775 *
776 * If @gid is a positive number, it will be used for invisible glyphs.
777 * This requires a version of HarfBuzz that has
778 * hb_buffer_set_invisible_glyph(). For older versions, the return value
779 * will be %false and the shaping behavior does not change.
780 *
781 * Return value:
782 * %true if no errors happened, %false otherwise.
783 *
784 * Since: 0.6
785 */
786 bool
raqm_set_invisible_glyph(raqm_t * rq,int gid)787 raqm_set_invisible_glyph (raqm_t *rq,
788 int gid)
789 {
790 if (!rq)
791 return false;
792
793 #ifndef HAVE_HB_BUFFER_SET_INVISIBLE_GLYPH
794 if (gid > 0)
795 return false;
796 #endif
797
798 #if !defined(HAVE_DECL_HB_BUFFER_FLAG_REMOVE_DEFAULT_IGNORABLES) || \
799 !HAVE_DECL_HB_BUFFER_FLAG_REMOVE_DEFAULT_IGNORABLES
800 if (gid < 0)
801 return false;
802 #endif
803
804 rq->invisible_glyph = gid;
805 return true;
806 }
807
808 static bool
809 _raqm_itemize (raqm_t *rq);
810
811 static bool
812 _raqm_shape (raqm_t *rq);
813
814 /**
815 * raqm_layout:
816 * @rq: a #raqm_t.
817 *
818 * Run the text layout process on @rq. This is the main Raqm function where the
819 * Unicode Bidirectional Text algorithm will be applied to the text in @rq,
820 * text shaping, and any other part of the layout process.
821 *
822 * Return value:
823 * %true if the layout process was successful, %false otherwise.
824 *
825 * Since: 0.1
826 */
827 bool
raqm_layout(raqm_t * rq)828 raqm_layout (raqm_t *rq)
829 {
830 if (!rq)
831 return false;
832
833 if (!rq->text_len)
834 return true;
835
836 if (!rq->text_info)
837 return false;
838
839 for (size_t i = 0; i < rq->text_len; i++)
840 {
841 if (!rq->text_info[i].ftface)
842 return false;
843 }
844
845 if (!_raqm_itemize (rq))
846 return false;
847
848 if (!_raqm_shape (rq))
849 return false;
850
851 return true;
852 }
853
854 static uint32_t
855 _raqm_u32_to_u8_index (raqm_t *rq,
856 uint32_t index);
857
858 /**
859 * raqm_get_glyphs:
860 * @rq: a #raqm_t.
861 * @length: (out): output array length.
862 *
863 * Gets the final result of Raqm layout process, an array of #raqm_glyph_t
864 * containing the glyph indices in the font, their positions and other possible
865 * information.
866 *
867 * Return value: (transfer none):
868 * An array of #raqm_glyph_t, or %NULL in case of error. This is owned by @rq
869 * and must not be freed.
870 *
871 * Since: 0.1
872 */
873 raqm_glyph_t *
raqm_get_glyphs(raqm_t * rq,size_t * length)874 raqm_get_glyphs (raqm_t *rq,
875 size_t *length)
876 {
877 size_t count = 0;
878
879 if (!rq || !rq->runs || !length)
880 {
881 if (length)
882 *length = 0;
883 return NULL;
884 }
885
886 for (raqm_run_t *run = rq->runs; run != NULL; run = run->next)
887 count += hb_buffer_get_length (run->buffer);
888
889 *length = count;
890
891 if (rq->glyphs)
892 free (rq->glyphs);
893
894 rq->glyphs = malloc (sizeof (raqm_glyph_t) * count);
895 if (!rq->glyphs)
896 {
897 *length = 0;
898 return NULL;
899 }
900
901 RAQM_TEST ("Glyph information:\n");
902
903 count = 0;
904 for (raqm_run_t *run = rq->runs; run != NULL; run = run->next)
905 {
906 size_t len;
907 hb_glyph_info_t *info;
908 hb_glyph_position_t *position;
909
910 len = hb_buffer_get_length (run->buffer);
911 info = hb_buffer_get_glyph_infos (run->buffer, NULL);
912 position = hb_buffer_get_glyph_positions (run->buffer, NULL);
913
914 for (size_t i = 0; i < len; i++)
915 {
916 rq->glyphs[count + i].index = info[i].codepoint;
917 rq->glyphs[count + i].cluster = info[i].cluster;
918 rq->glyphs[count + i].x_advance = position[i].x_advance;
919 rq->glyphs[count + i].y_advance = position[i].y_advance;
920 rq->glyphs[count + i].x_offset = position[i].x_offset;
921 rq->glyphs[count + i].y_offset = position[i].y_offset;
922 rq->glyphs[count + i].ftface = rq->text_info[info[i].cluster].ftface;
923
924 RAQM_TEST ("glyph [%d]\tx_offset: %d\ty_offset: %d\tx_advance: %d\tfont: %s\n",
925 rq->glyphs[count + i].index, rq->glyphs[count + i].x_offset,
926 rq->glyphs[count + i].y_offset, rq->glyphs[count + i].x_advance,
927 rq->glyphs[count + i].ftface->family_name);
928 }
929
930 count += len;
931 }
932
933 if (rq->flags & RAQM_FLAG_UTF8)
934 {
935 #ifdef RAQM_TESTING
936 RAQM_TEST ("\nUTF-32 clusters:");
937 for (size_t i = 0; i < count; i++)
938 RAQM_TEST (" %02d", rq->glyphs[i].cluster);
939 RAQM_TEST ("\n");
940 #endif
941
942 for (size_t i = 0; i < count; i++)
943 rq->glyphs[i].cluster = _raqm_u32_to_u8_index (rq,
944 rq->glyphs[i].cluster);
945
946 #ifdef RAQM_TESTING
947 RAQM_TEST ("UTF-8 clusters: ");
948 for (size_t i = 0; i < count; i++)
949 RAQM_TEST (" %02d", rq->glyphs[i].cluster);
950 RAQM_TEST ("\n");
951 #endif
952 }
953 return rq->glyphs;
954 }
955
956 static bool
957 _raqm_resolve_scripts (raqm_t *rq);
958
959 static hb_direction_t
_raqm_hb_dir(raqm_t * rq,FriBidiLevel level)960 _raqm_hb_dir (raqm_t *rq, FriBidiLevel level)
961 {
962 hb_direction_t dir = HB_DIRECTION_LTR;
963
964 if (rq->base_dir == RAQM_DIRECTION_TTB)
965 dir = HB_DIRECTION_TTB;
966 else if (FRIBIDI_LEVEL_IS_RTL (level))
967 dir = HB_DIRECTION_RTL;
968
969 return dir;
970 }
971
972 typedef struct {
973 size_t pos;
974 size_t len;
975 FriBidiLevel level;
976 } _raqm_bidi_run;
977
978 static void
_raqm_reverse_run(_raqm_bidi_run * run,const size_t len)979 _raqm_reverse_run (_raqm_bidi_run *run, const size_t len)
980 {
981 assert (run);
982
983 for (size_t i = 0; i < len / 2; i++)
984 {
985 _raqm_bidi_run temp = run[i];
986 run[i] = run[len - 1 - i];
987 run[len - 1 - i] = temp;
988 }
989 }
990
991 static _raqm_bidi_run *
_raqm_reorder_runs(const FriBidiCharType * types,const size_t len,const FriBidiParType base_dir,FriBidiLevel * levels,size_t * run_count)992 _raqm_reorder_runs (const FriBidiCharType *types,
993 const size_t len,
994 const FriBidiParType base_dir,
995 /* input and output */
996 FriBidiLevel *levels,
997 /* output */
998 size_t *run_count)
999 {
1000 FriBidiLevel level;
1001 FriBidiLevel last_level = -1;
1002 FriBidiLevel max_level = 0;
1003 size_t run_start = 0;
1004 size_t run_index = 0;
1005 _raqm_bidi_run *runs = NULL;
1006 size_t count = 0;
1007
1008 if (len == 0)
1009 {
1010 *run_count = 0;
1011 return NULL;
1012 }
1013
1014 assert (types);
1015 assert (levels);
1016
1017 /* L1. Reset the embedding levels of some chars:
1018 4. any sequence of white space characters at the end of the line. */
1019 for (int i = len - 1;
1020 i >= 0 && FRIBIDI_IS_EXPLICIT_OR_BN_OR_WS (types[i]); i--)
1021 {
1022 levels[i] = FRIBIDI_DIR_TO_LEVEL (base_dir);
1023 }
1024
1025 /* Find max_level of the line. We don't reuse the paragraph
1026 * max_level, both for a cleaner API, and that the line max_level
1027 * may be far less than paragraph max_level. */
1028 for (int i = len - 1; i >= 0; i--)
1029 {
1030 if (levels[i] > max_level)
1031 max_level = levels[i];
1032 }
1033
1034 for (size_t i = 0; i < len; i++)
1035 {
1036 if (levels[i] != last_level)
1037 count++;
1038
1039 last_level = levels[i];
1040 }
1041
1042 runs = malloc (sizeof (_raqm_bidi_run) * count);
1043
1044 while (run_start < len)
1045 {
1046 size_t run_end = run_start;
1047 while (run_end < len && levels[run_start] == levels[run_end])
1048 {
1049 run_end++;
1050 }
1051
1052 runs[run_index].pos = run_start;
1053 runs[run_index].level = levels[run_start];
1054 runs[run_index].len = run_end - run_start;
1055 run_start = run_end;
1056 run_index++;
1057 }
1058
1059 /* L2. Reorder. */
1060 for (level = max_level; level > 0; level--)
1061 {
1062 for (int i = count - 1; i >= 0; i--)
1063 {
1064 if (runs[i].level >= level)
1065 {
1066 int end = i;
1067 for (i--; (i >= 0 && runs[i].level >= level); i--)
1068 ;
1069 _raqm_reverse_run (runs + i + 1, end - i);
1070 }
1071 }
1072 }
1073
1074 *run_count = count;
1075 return runs;
1076 }
1077
1078 static bool
_raqm_itemize(raqm_t * rq)1079 _raqm_itemize (raqm_t *rq)
1080 {
1081 FriBidiParType par_type = FRIBIDI_PAR_ON;
1082 FriBidiCharType *types;
1083 #ifdef USE_FRIBIDI_EX_API
1084 FriBidiBracketType *btypes;
1085 #endif
1086 FriBidiLevel *levels;
1087 _raqm_bidi_run *runs = NULL;
1088 raqm_run_t *last;
1089 int max_level;
1090 size_t run_count;
1091 bool ok = true;
1092
1093 #ifdef RAQM_TESTING
1094 switch (rq->base_dir)
1095 {
1096 case RAQM_DIRECTION_RTL:
1097 RAQM_TEST ("Direction is: RTL\n\n");
1098 break;
1099 case RAQM_DIRECTION_LTR:
1100 RAQM_TEST ("Direction is: LTR\n\n");
1101 break;
1102 case RAQM_DIRECTION_TTB:
1103 RAQM_TEST ("Direction is: TTB\n\n");
1104 break;
1105 case RAQM_DIRECTION_DEFAULT:
1106 default:
1107 RAQM_TEST ("Direction is: DEFAULT\n\n");
1108 break;
1109 }
1110 #endif
1111
1112 types = calloc (rq->text_len, sizeof (FriBidiCharType));
1113 #ifdef USE_FRIBIDI_EX_API
1114 btypes = calloc (rq->text_len, sizeof (FriBidiBracketType));
1115 #endif
1116 levels = calloc (rq->text_len, sizeof (FriBidiLevel));
1117 if (!types || !levels
1118 #ifdef USE_FRIBIDI_EX_API
1119 || !btypes
1120 #endif
1121 )
1122 {
1123 ok = false;
1124 goto done;
1125 }
1126
1127 if (rq->base_dir == RAQM_DIRECTION_RTL)
1128 par_type = FRIBIDI_PAR_RTL;
1129 else if (rq->base_dir == RAQM_DIRECTION_LTR)
1130 par_type = FRIBIDI_PAR_LTR;
1131
1132 if (rq->base_dir == RAQM_DIRECTION_TTB)
1133 {
1134 /* Treat every thing as LTR in vertical text */
1135 max_level = 1;
1136 memset (types, FRIBIDI_TYPE_LTR, rq->text_len);
1137 memset (levels, 0, rq->text_len);
1138 rq->resolved_dir = RAQM_DIRECTION_LTR;
1139 }
1140 else
1141 {
1142 fribidi_get_bidi_types (rq->text, rq->text_len, types);
1143 #ifdef USE_FRIBIDI_EX_API
1144 fribidi_get_bracket_types (rq->text, rq->text_len, types, btypes);
1145 max_level = fribidi_get_par_embedding_levels_ex (types, btypes,
1146 rq->text_len, &par_type,
1147 levels);
1148 #else
1149 max_level = fribidi_get_par_embedding_levels (types, rq->text_len,
1150 &par_type, levels);
1151 #endif
1152
1153 if (par_type == FRIBIDI_PAR_LTR)
1154 rq->resolved_dir = RAQM_DIRECTION_LTR;
1155 else
1156 rq->resolved_dir = RAQM_DIRECTION_RTL;
1157 }
1158
1159 if (max_level == 0)
1160 {
1161 ok = false;
1162 goto done;
1163 }
1164
1165 if (!_raqm_resolve_scripts (rq))
1166 {
1167 ok = false;
1168 goto done;
1169 }
1170
1171 /* Get the number of bidi runs */
1172 runs = _raqm_reorder_runs (types, rq->text_len, par_type, levels, &run_count);
1173 if (!runs)
1174 {
1175 ok = false;
1176 goto done;
1177 }
1178
1179 #ifdef RAQM_TESTING
1180 RAQM_TEST ("Number of runs before script itemization: %zu\n\n", run_count);
1181
1182 RAQM_TEST ("Fribidi Runs:\n");
1183 for (size_t i = 0; i < run_count; i++)
1184 {
1185 RAQM_TEST ("run[%zu]:\t start: %zu\tlength: %zu\tlevel: %d\n",
1186 i, runs[i].pos, runs[i].len, runs[i].level);
1187 }
1188 RAQM_TEST ("\n");
1189 #endif
1190
1191 last = NULL;
1192 for (size_t i = 0; i < run_count; i++)
1193 {
1194 raqm_run_t *run = calloc (1, sizeof (raqm_run_t));
1195 if (!run)
1196 {
1197 ok = false;
1198 goto done;
1199 }
1200
1201 if (!rq->runs)
1202 rq->runs = run;
1203
1204 if (last)
1205 last->next = run;
1206
1207 run->direction = _raqm_hb_dir (rq, runs[i].level);
1208
1209 if (HB_DIRECTION_IS_BACKWARD (run->direction))
1210 {
1211 run->pos = runs[i].pos + runs[i].len - 1;
1212 run->script = rq->text_info[run->pos].script;
1213 run->font = _raqm_create_hb_font (rq, rq->text_info[run->pos].ftface);
1214 for (int j = runs[i].len - 1; j >= 0; j--)
1215 {
1216 _raqm_text_info info = rq->text_info[runs[i].pos + j];
1217 if (!_raqm_compare_text_info (rq->text_info[run->pos], info))
1218 {
1219 raqm_run_t *newrun = calloc (1, sizeof (raqm_run_t));
1220 if (!newrun)
1221 {
1222 ok = false;
1223 goto done;
1224 }
1225 newrun->pos = runs[i].pos + j;
1226 newrun->len = 1;
1227 newrun->direction = _raqm_hb_dir (rq, runs[i].level);
1228 newrun->script = info.script;
1229 newrun->font = _raqm_create_hb_font (rq, info.ftface);
1230 run->next = newrun;
1231 run = newrun;
1232 }
1233 else
1234 {
1235 run->len++;
1236 run->pos = runs[i].pos + j;
1237 }
1238 }
1239 }
1240 else
1241 {
1242 run->pos = runs[i].pos;
1243 run->script = rq->text_info[run->pos].script;
1244 run->font = _raqm_create_hb_font (rq, rq->text_info[run->pos].ftface);
1245 for (size_t j = 0; j < runs[i].len; j++)
1246 {
1247 _raqm_text_info info = rq->text_info[runs[i].pos + j];
1248 if (!_raqm_compare_text_info (rq->text_info[run->pos], info))
1249 {
1250 raqm_run_t *newrun = calloc (1, sizeof (raqm_run_t));
1251 if (!newrun)
1252 {
1253 ok = false;
1254 goto done;
1255 }
1256 newrun->pos = runs[i].pos + j;
1257 newrun->len = 1;
1258 newrun->direction = _raqm_hb_dir (rq, runs[i].level);
1259 newrun->script = info.script;
1260 newrun->font = _raqm_create_hb_font (rq, info.ftface);
1261 run->next = newrun;
1262 run = newrun;
1263 }
1264 else
1265 run->len++;
1266 }
1267 }
1268
1269 last = run;
1270 last->next = NULL;
1271 }
1272
1273 #ifdef RAQM_TESTING
1274 run_count = 0;
1275 for (raqm_run_t *run = rq->runs; run != NULL; run = run->next)
1276 run_count++;
1277 RAQM_TEST ("Number of runs after script itemization: %zu\n\n", run_count);
1278
1279 run_count = 0;
1280 RAQM_TEST ("Final Runs:\n");
1281 for (raqm_run_t *run = rq->runs; run != NULL; run = run->next)
1282 {
1283 SCRIPT_TO_STRING (run->script);
1284 RAQM_TEST ("run[%zu]:\t start: %d\tlength: %d\tdirection: %s\tscript: %s\tfont: %s\n",
1285 run_count++, run->pos, run->len,
1286 hb_direction_to_string (run->direction), buff,
1287 rq->text_info[run->pos].ftface->family_name);
1288 }
1289 RAQM_TEST ("\n");
1290 #endif
1291
1292 done:
1293 free (runs);
1294 free (types);
1295 #ifdef USE_FRIBIDI_EX_API
1296 free (btypes);
1297 #endif
1298 free (levels);
1299
1300 return ok;
1301 }
1302
1303 /* Stack to handle script detection */
1304 typedef struct {
1305 size_t capacity;
1306 size_t size;
1307 int *pair_index;
1308 hb_script_t *script;
1309 } _raqm_stack_t;
1310
1311 /* Special paired characters for script detection */
1312 static size_t paired_len = 34;
1313 static const FriBidiChar paired_chars[] =
1314 {
1315 0x0028, 0x0029, /* ascii paired punctuation */
1316 0x003c, 0x003e,
1317 0x005b, 0x005d,
1318 0x007b, 0x007d,
1319 0x00ab, 0x00bb, /* guillemets */
1320 0x2018, 0x2019, /* general punctuation */
1321 0x201c, 0x201d,
1322 0x2039, 0x203a,
1323 0x3008, 0x3009, /* chinese paired punctuation */
1324 0x300a, 0x300b,
1325 0x300c, 0x300d,
1326 0x300e, 0x300f,
1327 0x3010, 0x3011,
1328 0x3014, 0x3015,
1329 0x3016, 0x3017,
1330 0x3018, 0x3019,
1331 0x301a, 0x301b
1332 };
1333
1334 static void
_raqm_stack_free(_raqm_stack_t * stack)1335 _raqm_stack_free (_raqm_stack_t *stack)
1336 {
1337 free (stack->script);
1338 free (stack->pair_index);
1339 free (stack);
1340 }
1341
1342 /* Stack handling functions */
1343 static _raqm_stack_t *
_raqm_stack_new(size_t max)1344 _raqm_stack_new (size_t max)
1345 {
1346 _raqm_stack_t *stack;
1347 stack = calloc (1, sizeof (_raqm_stack_t));
1348 if (!stack)
1349 return NULL;
1350
1351 stack->script = malloc (sizeof (hb_script_t) * max);
1352 if (!stack->script)
1353 {
1354 _raqm_stack_free (stack);
1355 return NULL;
1356 }
1357
1358 stack->pair_index = malloc (sizeof (int) * max);
1359 if (!stack->pair_index)
1360 {
1361 _raqm_stack_free (stack);
1362 return NULL;
1363 }
1364
1365 stack->size = 0;
1366 stack->capacity = max;
1367
1368 return stack;
1369 }
1370
1371 static bool
_raqm_stack_pop(_raqm_stack_t * stack)1372 _raqm_stack_pop (_raqm_stack_t *stack)
1373 {
1374 if (!stack->size)
1375 {
1376 RAQM_DBG ("Stack is Empty\n");
1377 return false;
1378 }
1379
1380 stack->size--;
1381
1382 return true;
1383 }
1384
1385 static hb_script_t
_raqm_stack_top(_raqm_stack_t * stack)1386 _raqm_stack_top (_raqm_stack_t *stack)
1387 {
1388 if (!stack->size)
1389 {
1390 RAQM_DBG ("Stack is Empty\n");
1391 return HB_SCRIPT_INVALID; /* XXX: check this */
1392 }
1393
1394 return stack->script[stack->size];
1395 }
1396
1397 static bool
_raqm_stack_push(_raqm_stack_t * stack,hb_script_t script,int pair_index)1398 _raqm_stack_push (_raqm_stack_t *stack,
1399 hb_script_t script,
1400 int pair_index)
1401 {
1402 if (stack->size == stack->capacity)
1403 {
1404 RAQM_DBG ("Stack is Full\n");
1405 return false;
1406 }
1407
1408 stack->size++;
1409 stack->script[stack->size] = script;
1410 stack->pair_index[stack->size] = pair_index;
1411
1412 return true;
1413 }
1414
1415 static int
_get_pair_index(const FriBidiChar ch)1416 _get_pair_index (const FriBidiChar ch)
1417 {
1418 int lower = 0;
1419 int upper = paired_len - 1;
1420
1421 while (lower <= upper)
1422 {
1423 int mid = (lower + upper) / 2;
1424 if (ch < paired_chars[mid])
1425 upper = mid - 1;
1426 else if (ch > paired_chars[mid])
1427 lower = mid + 1;
1428 else
1429 return mid;
1430 }
1431
1432 return -1;
1433 }
1434
1435 #define STACK_IS_EMPTY(script) ((script)->size <= 0)
1436 #define IS_OPEN(pair_index) (((pair_index) & 1) == 0)
1437
1438 /* Resolve the script for each character in the input string, if the character
1439 * script is common or inherited it takes the script of the character before it
1440 * except paired characters which we try to make them use the same script. We
1441 * then split the BiDi runs, if necessary, on script boundaries.
1442 */
1443 static bool
_raqm_resolve_scripts(raqm_t * rq)1444 _raqm_resolve_scripts (raqm_t *rq)
1445 {
1446 int last_script_index = -1;
1447 int last_set_index = -1;
1448 hb_script_t last_script = HB_SCRIPT_INVALID;
1449 _raqm_stack_t *stack = NULL;
1450 hb_unicode_funcs_t* unicode_funcs = hb_unicode_funcs_get_default ();
1451
1452 for (size_t i = 0; i < rq->text_len; ++i)
1453 rq->text_info[i].script = hb_unicode_script (unicode_funcs, rq->text[i]);
1454
1455 #ifdef RAQM_TESTING
1456 RAQM_TEST ("Before script detection:\n");
1457 for (size_t i = 0; i < rq->text_len; ++i)
1458 {
1459 SCRIPT_TO_STRING (rq->text_info[i].script);
1460 RAQM_TEST ("script for ch[%zu]\t%s\n", i, buff);
1461 }
1462 RAQM_TEST ("\n");
1463 #endif
1464
1465 stack = _raqm_stack_new (rq->text_len);
1466 if (!stack)
1467 return false;
1468
1469 for (int i = 0; i < (int) rq->text_len; i++)
1470 {
1471 if (rq->text_info[i].script == HB_SCRIPT_COMMON && last_script_index != -1)
1472 {
1473 int pair_index = _get_pair_index (rq->text[i]);
1474 if (pair_index >= 0)
1475 {
1476 if (IS_OPEN (pair_index))
1477 {
1478 /* is a paired character */
1479 rq->text_info[i].script = last_script;
1480 last_set_index = i;
1481 _raqm_stack_push (stack, rq->text_info[i].script, pair_index);
1482 }
1483 else
1484 {
1485 /* is a close paired character */
1486 /* find matching opening (by getting the last even index for current
1487 * odd index) */
1488 while (!STACK_IS_EMPTY (stack) &&
1489 stack->pair_index[stack->size] != (pair_index & ~1))
1490 {
1491 _raqm_stack_pop (stack);
1492 }
1493 if (!STACK_IS_EMPTY (stack))
1494 {
1495 rq->text_info[i].script = _raqm_stack_top (stack);
1496 last_script = rq->text_info[i].script;
1497 last_set_index = i;
1498 }
1499 else
1500 {
1501 rq->text_info[i].script = last_script;
1502 last_set_index = i;
1503 }
1504 }
1505 }
1506 else
1507 {
1508 rq->text_info[i].script = last_script;
1509 last_set_index = i;
1510 }
1511 }
1512 else if (rq->text_info[i].script == HB_SCRIPT_INHERITED &&
1513 last_script_index != -1)
1514 {
1515 rq->text_info[i].script = last_script;
1516 last_set_index = i;
1517 }
1518 else
1519 {
1520 for (int j = last_set_index + 1; j < i; ++j)
1521 rq->text_info[j].script = rq->text_info[i].script;
1522 last_script = rq->text_info[i].script;
1523 last_script_index = i;
1524 last_set_index = i;
1525 }
1526 }
1527
1528 /* Loop backwards and change any remaining Common or Inherit characters to
1529 * take the script if the next character.
1530 * https://github.com/HOST-Oman/libraqm/issues/95
1531 */
1532 for (int i = rq->text_len - 2; i >= 0; --i)
1533 {
1534 if (rq->text_info[i].script == HB_SCRIPT_INHERITED ||
1535 rq->text_info[i].script == HB_SCRIPT_COMMON)
1536 rq->text_info[i].script = rq->text_info[i + 1].script;
1537 }
1538
1539 #ifdef RAQM_TESTING
1540 RAQM_TEST ("After script detection:\n");
1541 for (size_t i = 0; i < rq->text_len; ++i)
1542 {
1543 SCRIPT_TO_STRING (rq->text_info[i].script);
1544 RAQM_TEST ("script for ch[%zu]\t%s\n", i, buff);
1545 }
1546 RAQM_TEST ("\n");
1547 #endif
1548
1549 _raqm_stack_free (stack);
1550
1551 return true;
1552 }
1553
1554 static bool
_raqm_shape(raqm_t * rq)1555 _raqm_shape (raqm_t *rq)
1556 {
1557 hb_buffer_flags_t hb_buffer_flags = HB_BUFFER_FLAG_BOT | HB_BUFFER_FLAG_EOT;
1558
1559 #if defined(HAVE_DECL_HB_BUFFER_FLAG_REMOVE_DEFAULT_IGNORABLES) && \
1560 HAVE_DECL_HB_BUFFER_FLAG_REMOVE_DEFAULT_IGNORABLES
1561 if (rq->invisible_glyph < 0)
1562 hb_buffer_flags |= HB_BUFFER_FLAG_REMOVE_DEFAULT_IGNORABLES;
1563 #endif
1564
1565 for (raqm_run_t *run = rq->runs; run != NULL; run = run->next)
1566 {
1567 run->buffer = hb_buffer_create ();
1568
1569 hb_buffer_add_utf32 (run->buffer, rq->text, rq->text_len,
1570 run->pos, run->len);
1571 hb_buffer_set_script (run->buffer, run->script);
1572 hb_buffer_set_language (run->buffer, rq->text_info[run->pos].lang);
1573 hb_buffer_set_direction (run->buffer, run->direction);
1574 hb_buffer_set_flags (run->buffer, hb_buffer_flags);
1575
1576 #ifdef HAVE_HB_BUFFER_SET_INVISIBLE_GLYPH
1577 if (rq->invisible_glyph > 0)
1578 hb_buffer_set_invisible_glyph (run->buffer, rq->invisible_glyph);
1579 #endif
1580
1581 hb_shape_full (run->font, run->buffer, rq->features, rq->features_len,
1582 NULL);
1583 }
1584
1585 return true;
1586 }
1587
1588 /* Convert index from UTF-32 to UTF-8 */
1589 static uint32_t
_raqm_u32_to_u8_index(raqm_t * rq,uint32_t index)1590 _raqm_u32_to_u8_index (raqm_t *rq,
1591 uint32_t index)
1592 {
1593 FriBidiStrIndex length;
1594 char *output = malloc ((sizeof (char) * 4 * index) + 1);
1595
1596 length = fribidi_unicode_to_charset (FRIBIDI_CHAR_SET_UTF8,
1597 rq->text,
1598 index,
1599 output);
1600
1601 free (output);
1602 return length;
1603 }
1604
1605 /* Convert index from UTF-8 to UTF-32 */
1606 static uint32_t
_raqm_u8_to_u32_index(raqm_t * rq,uint32_t index)1607 _raqm_u8_to_u32_index (raqm_t *rq,
1608 uint32_t index)
1609 {
1610 FriBidiStrIndex length;
1611 uint32_t *output = malloc (sizeof (uint32_t) * (index + 1));
1612
1613 length = fribidi_charset_to_unicode (FRIBIDI_CHAR_SET_UTF8,
1614 rq->text_utf8,
1615 index,
1616 output);
1617
1618 free (output);
1619 return length;
1620 }
1621
1622 static bool
1623 _raqm_allowed_grapheme_boundary (hb_codepoint_t l_char,
1624 hb_codepoint_t r_char);
1625
1626 static bool
1627 _raqm_in_hangul_syllable (hb_codepoint_t ch);
1628
1629 /**
1630 * raqm_index_to_position:
1631 * @rq: a #raqm_t.
1632 * @index: (inout): character index.
1633 * @x: (out): output x position.
1634 * @y: (out): output y position.
1635 *
1636 * Calculates the cursor position after the character at @index. If the character
1637 * is right-to-left, then the cursor will be at the left of it, whereas if the
1638 * character is left-to-right, then the cursor will be at the right of it.
1639 *
1640 * Return value:
1641 * %true if the process was successful, %false otherwise.
1642 *
1643 * Since: 0.2
1644 */
1645 bool
raqm_index_to_position(raqm_t * rq,size_t * index,int * x,int * y)1646 raqm_index_to_position (raqm_t *rq,
1647 size_t *index,
1648 int *x,
1649 int *y)
1650 {
1651 /* We don't currently support multiline, so y is always 0 */
1652 *y = 0;
1653 *x = 0;
1654
1655 if (rq == NULL)
1656 return false;
1657
1658 if (rq->flags & RAQM_FLAG_UTF8)
1659 *index = _raqm_u8_to_u32_index (rq, *index);
1660
1661 if (*index >= rq->text_len)
1662 return false;
1663
1664 RAQM_TEST ("\n");
1665
1666 while (*index < rq->text_len)
1667 {
1668 if (_raqm_allowed_grapheme_boundary (rq->text[*index], rq->text[*index + 1]))
1669 break;
1670
1671 ++*index;
1672 }
1673
1674 for (raqm_run_t *run = rq->runs; run != NULL; run = run->next)
1675 {
1676 size_t len;
1677 hb_glyph_info_t *info;
1678 hb_glyph_position_t *position;
1679 len = hb_buffer_get_length (run->buffer);
1680 info = hb_buffer_get_glyph_infos (run->buffer, NULL);
1681 position = hb_buffer_get_glyph_positions (run->buffer, NULL);
1682
1683 for (size_t i = 0; i < len; i++)
1684 {
1685 uint32_t curr_cluster = info[i].cluster;
1686 uint32_t next_cluster = curr_cluster;
1687 *x += position[i].x_advance;
1688
1689 if (run->direction == HB_DIRECTION_LTR)
1690 {
1691 for (size_t j = i + 1; j < len && next_cluster == curr_cluster; j++)
1692 next_cluster = info[j].cluster;
1693 }
1694 else
1695 {
1696 for (int j = i - 1; i != 0 && j >= 0 && next_cluster == curr_cluster;
1697 j--)
1698 next_cluster = info[j].cluster;
1699 }
1700
1701 if (next_cluster == curr_cluster)
1702 next_cluster = run->pos + run->len;
1703
1704 if (*index < next_cluster && *index >= curr_cluster)
1705 {
1706 if (run->direction == HB_DIRECTION_RTL)
1707 *x -= position[i].x_advance;
1708 *index = curr_cluster;
1709 goto found;
1710 }
1711 }
1712 }
1713
1714 found:
1715 if (rq->flags & RAQM_FLAG_UTF8)
1716 *index = _raqm_u32_to_u8_index (rq, *index);
1717 RAQM_TEST ("The position is %d at index %zu\n",*x ,*index);
1718 return true;
1719 }
1720
1721 /**
1722 * raqm_position_to_index:
1723 * @rq: a #raqm_t.
1724 * @x: x position.
1725 * @y: y position.
1726 * @index: (out): output character index.
1727 *
1728 * Returns the @index of the character at @x and @y position within text.
1729 * If the position is outside the text, the last character is chosen as
1730 * @index.
1731 *
1732 * Return value:
1733 * %true if the process was successful, %false in case of error.
1734 *
1735 * Since: 0.2
1736 */
1737 bool
raqm_position_to_index(raqm_t * rq,int x,int y,size_t * index)1738 raqm_position_to_index (raqm_t *rq,
1739 int x,
1740 int y,
1741 size_t *index)
1742 {
1743 int delta_x = 0, current_x = 0;
1744 (void)y;
1745
1746 if (rq == NULL)
1747 return false;
1748
1749 if (x < 0) /* Get leftmost index */
1750 {
1751 if (rq->resolved_dir == RAQM_DIRECTION_RTL)
1752 *index = rq->text_len;
1753 else
1754 *index = 0;
1755 return true;
1756 }
1757
1758 RAQM_TEST ("\n");
1759
1760 for (raqm_run_t *run = rq->runs; run != NULL; run = run->next)
1761 {
1762 size_t len;
1763 hb_glyph_info_t *info;
1764 hb_glyph_position_t *position;
1765 len = hb_buffer_get_length (run->buffer);
1766 info = hb_buffer_get_glyph_infos (run->buffer, NULL);
1767 position = hb_buffer_get_glyph_positions (run->buffer, NULL);
1768
1769 for (size_t i = 0; i < len; i++)
1770 {
1771 delta_x = position[i].x_advance;
1772 if (x < (current_x + delta_x))
1773 {
1774 bool before = false;
1775 if (run->direction == HB_DIRECTION_LTR)
1776 before = (x < current_x + (delta_x / 2));
1777 else
1778 before = (x > current_x + (delta_x / 2));
1779
1780 if (before)
1781 *index = info[i].cluster;
1782 else
1783 {
1784 uint32_t curr_cluster = info[i].cluster;
1785 uint32_t next_cluster = curr_cluster;
1786 if (run->direction == HB_DIRECTION_LTR)
1787 for (size_t j = i + 1; j < len && next_cluster == curr_cluster; j++)
1788 next_cluster = info[j].cluster;
1789 else
1790 for (int j = i - 1; i != 0 && j >= 0 && next_cluster == curr_cluster;
1791 j--)
1792 next_cluster = info[j].cluster;
1793
1794 if (next_cluster == curr_cluster)
1795 next_cluster = run->pos + run->len;
1796
1797 *index = next_cluster;
1798 }
1799 if (_raqm_allowed_grapheme_boundary (rq->text[*index],rq->text[*index + 1]))
1800 {
1801 RAQM_TEST ("The start-index is %zu at position %d \n", *index, x);
1802 return true;
1803 }
1804
1805 while (*index < (unsigned)run->pos + run->len)
1806 {
1807 if (_raqm_allowed_grapheme_boundary (rq->text[*index],
1808 rq->text[*index + 1]))
1809 {
1810 *index += 1;
1811 break;
1812 }
1813 *index += 1;
1814 }
1815 RAQM_TEST ("The start-index is %zu at position %d \n", *index, x);
1816 return true;
1817 }
1818 else
1819 current_x += delta_x;
1820 }
1821 }
1822
1823 /* Get rightmost index*/
1824 if (rq->resolved_dir == RAQM_DIRECTION_RTL)
1825 *index = 0;
1826 else
1827 *index = rq->text_len;
1828
1829 RAQM_TEST ("The start-index is %zu at position %d \n", *index, x);
1830
1831 return true;
1832 }
1833
1834 typedef enum
1835 {
1836 RAQM_GRAPHEM_CR,
1837 RAQM_GRAPHEM_LF,
1838 RAQM_GRAPHEM_CONTROL,
1839 RAQM_GRAPHEM_EXTEND,
1840 RAQM_GRAPHEM_REGIONAL_INDICATOR,
1841 RAQM_GRAPHEM_PREPEND,
1842 RAQM_GRAPHEM_SPACING_MARK,
1843 RAQM_GRAPHEM_HANGUL_SYLLABLE,
1844 RAQM_GRAPHEM_OTHER
1845 } _raqm_grapheme_t;
1846
1847 static _raqm_grapheme_t
1848 _raqm_get_grapheme_break (hb_codepoint_t ch,
1849 hb_unicode_general_category_t category);
1850
1851 static bool
_raqm_allowed_grapheme_boundary(hb_codepoint_t l_char,hb_codepoint_t r_char)1852 _raqm_allowed_grapheme_boundary (hb_codepoint_t l_char,
1853 hb_codepoint_t r_char)
1854 {
1855 hb_unicode_general_category_t l_category;
1856 hb_unicode_general_category_t r_category;
1857 _raqm_grapheme_t l_grapheme, r_grapheme;
1858 hb_unicode_funcs_t* unicode_funcs = hb_unicode_funcs_get_default ();
1859
1860 l_category = hb_unicode_general_category (unicode_funcs, l_char);
1861 r_category = hb_unicode_general_category (unicode_funcs, r_char);
1862 l_grapheme = _raqm_get_grapheme_break (l_char, l_category);
1863 r_grapheme = _raqm_get_grapheme_break (r_char, r_category);
1864
1865 if (l_grapheme == RAQM_GRAPHEM_CR && r_grapheme == RAQM_GRAPHEM_LF)
1866 return false; /*Do not break between a CR and LF GB3*/
1867 if (l_grapheme == RAQM_GRAPHEM_CONTROL || l_grapheme == RAQM_GRAPHEM_CR ||
1868 l_grapheme == RAQM_GRAPHEM_LF || r_grapheme == RAQM_GRAPHEM_CONTROL ||
1869 r_grapheme == RAQM_GRAPHEM_CR || r_grapheme == RAQM_GRAPHEM_LF)
1870 return true; /*Break before and after CONTROL GB4, GB5*/
1871 if (r_grapheme == RAQM_GRAPHEM_HANGUL_SYLLABLE)
1872 return false; /*Do not break Hangul syllable sequences. GB6, GB7, GB8*/
1873 if (l_grapheme == RAQM_GRAPHEM_REGIONAL_INDICATOR &&
1874 r_grapheme == RAQM_GRAPHEM_REGIONAL_INDICATOR)
1875 return false; /*Do not break between regional indicator symbols. GB8a*/
1876 if (r_grapheme == RAQM_GRAPHEM_EXTEND)
1877 return false; /*Do not break before extending characters. GB9*/
1878 /*Do not break before SpacingMarks, or after Prepend characters.GB9a, GB9b*/
1879 if (l_grapheme == RAQM_GRAPHEM_PREPEND)
1880 return false;
1881 if (r_grapheme == RAQM_GRAPHEM_SPACING_MARK)
1882 return false;
1883 return true; /*Otherwise, break everywhere. GB1, GB2, GB10*/
1884 }
1885
1886 static _raqm_grapheme_t
_raqm_get_grapheme_break(hb_codepoint_t ch,hb_unicode_general_category_t category)1887 _raqm_get_grapheme_break (hb_codepoint_t ch,
1888 hb_unicode_general_category_t category)
1889 {
1890 _raqm_grapheme_t gb_type;
1891
1892 gb_type = RAQM_GRAPHEM_OTHER;
1893 switch ((int)category)
1894 {
1895 case HB_UNICODE_GENERAL_CATEGORY_FORMAT:
1896 if (ch == 0x200C || ch == 0x200D)
1897 gb_type = RAQM_GRAPHEM_EXTEND;
1898 else
1899 gb_type = RAQM_GRAPHEM_CONTROL;
1900 break;
1901
1902 case HB_UNICODE_GENERAL_CATEGORY_CONTROL:
1903 if (ch == 0x000D)
1904 gb_type = RAQM_GRAPHEM_CR;
1905 else if (ch == 0x000A)
1906 gb_type = RAQM_GRAPHEM_LF;
1907 else
1908 gb_type = RAQM_GRAPHEM_CONTROL;
1909 break;
1910
1911 case HB_UNICODE_GENERAL_CATEGORY_SURROGATE:
1912 case HB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR:
1913 case HB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR:
1914 case HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED:
1915 if ((ch >= 0xFFF0 && ch <= 0xFFF8) ||
1916 (ch >= 0xE0000 && ch <= 0xE0FFF))
1917 gb_type = RAQM_GRAPHEM_CONTROL;
1918 break;
1919
1920 case HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK:
1921 case HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK:
1922 case HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK:
1923 if (ch != 0x102B && ch != 0x102C && ch != 0x1038 &&
1924 (ch < 0x1062 || ch > 0x1064) && (ch < 0x1067 || ch > 0x106D) &&
1925 ch != 0x1083 && (ch < 0x1087 || ch > 0x108C) && ch != 0x108F &&
1926 (ch < 0x109A || ch > 0x109C) && ch != 0x1A61 && ch != 0x1A63 &&
1927 ch != 0x1A64 && ch != 0xAA7B && ch != 0xAA70 && ch != 0x11720 &&
1928 ch != 0x11721) /**/
1929 gb_type = RAQM_GRAPHEM_SPACING_MARK;
1930
1931 else if (ch == 0x09BE || ch == 0x09D7 ||
1932 ch == 0x0B3E || ch == 0x0B57 || ch == 0x0BBE || ch == 0x0BD7 ||
1933 ch == 0x0CC2 || ch == 0x0CD5 || ch == 0x0CD6 ||
1934 ch == 0x0D3E || ch == 0x0D57 || ch == 0x0DCF || ch == 0x0DDF ||
1935 ch == 0x1D165 || (ch >= 0x1D16E && ch <= 0x1D172))
1936 gb_type = RAQM_GRAPHEM_EXTEND;
1937 break;
1938
1939 case HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER:
1940 if (ch == 0x0E33 || ch == 0x0EB3)
1941 gb_type = RAQM_GRAPHEM_SPACING_MARK;
1942 break;
1943
1944 case HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL:
1945 if (ch >= 0x1F1E6 && ch <= 0x1F1FF)
1946 gb_type = RAQM_GRAPHEM_REGIONAL_INDICATOR;
1947 break;
1948
1949 default:
1950 gb_type = RAQM_GRAPHEM_OTHER;
1951 break;
1952 }
1953
1954 if (_raqm_in_hangul_syllable (ch))
1955 gb_type = RAQM_GRAPHEM_HANGUL_SYLLABLE;
1956
1957 return gb_type;
1958 }
1959
1960 static bool
_raqm_in_hangul_syllable(hb_codepoint_t ch)1961 _raqm_in_hangul_syllable (hb_codepoint_t ch)
1962 {
1963 (void)ch;
1964 return false;
1965 }
1966
1967 /**
1968 * raqm_version:
1969 * @major: (out): Library major version component.
1970 * @minor: (out): Library minor version component.
1971 * @micro: (out): Library micro version component.
1972 *
1973 * Returns library version as three integer components.
1974 *
1975 * Since: 0.7
1976 **/
1977 void
raqm_version(unsigned int * major,unsigned int * minor,unsigned int * micro)1978 raqm_version (unsigned int *major,
1979 unsigned int *minor,
1980 unsigned int *micro)
1981 {
1982 *major = RAQM_VERSION_MAJOR;
1983 *minor = RAQM_VERSION_MINOR;
1984 *micro = RAQM_VERSION_MICRO;
1985 }
1986
1987 /**
1988 * raqm_version_string:
1989 *
1990 * Returns library version as a string with three components.
1991 *
1992 * Return value: library version string.
1993 *
1994 * Since: 0.7
1995 **/
1996 const char *
raqm_version_string(void)1997 raqm_version_string (void)
1998 {
1999 return RAQM_VERSION_STRING;
2000 }
2001
2002 /**
2003 * raqm_version_atleast:
2004 * @major: Library major version component.
2005 * @minor: Library minor version component.
2006 * @micro: Library micro version component.
2007 *
2008 * Checks if library version is less than or equal the specified version.
2009 *
2010 * Return value:
2011 * %true if library version is less than or equal the specfied version, %false
2012 * otherwise.
2013 *
2014 * Since: 0.7
2015 **/
2016 bool
raqm_version_atleast(unsigned int major,unsigned int minor,unsigned int micro)2017 raqm_version_atleast (unsigned int major,
2018 unsigned int minor,
2019 unsigned int micro)
2020 {
2021 return RAQM_VERSION_ATLEAST (major, minor, micro);
2022 }
2023
2024 /**
2025 * RAQM_VERSION_ATLEAST:
2026 * @major: Library major version component.
2027 * @minor: Library minor version component.
2028 * @micro: Library micro version component.
2029 *
2030 * Checks if library version is less than or equal the specified version.
2031 *
2032 * Return value:
2033 * %true if library version is less than or equal the specfied version, %false
2034 * otherwise.
2035 *
2036 * Since: 0.7
2037 **/
2038
2039 /**
2040 * RAQM_VERSION_STRING:
2041 *
2042 * Library version as a string with three components.
2043 *
2044 * Since: 0.7
2045 **/
2046
2047 /**
2048 * RAQM_VERSION_MAJOR:
2049 *
2050 * Library major version component.
2051 *
2052 * Since: 0.7
2053 **/
2054
2055 /**
2056 * RAQM_VERSION_MINOR:
2057 *
2058 * Library minor version component.
2059 *
2060 * Since: 0.7
2061 **/
2062
2063 /**
2064 * RAQM_VERSION_MICRO:
2065 *
2066 * Library micro version component.
2067 *
2068 * Since: 0.7
2069 **/
2070