1 /*******************************************************
2 
3    CoolReader Engine C-compatible API
4 
5    lvtextfm.cpp:  Text formatter
6 
7    (c) Vadim Lopatin, 2000-2011
8    This source code is distributed under the terms of
9    GNU General Public License
10    See LICENSE file for details
11 
12 *******************************************************/
13 
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 
18 
19 #include "../include/crsetup.h"
20 #include "../include/lvfnt.h"
21 #include "../include/lvtextfm.h"
22 #include "../include/lvdrawbuf.h"
23 #include "../include/fb2def.h"
24 
25 #ifdef __cplusplus
26 #include "../include/lvimg.h"
27 #include "../include/lvtinydom.h"
28 #include "../include/lvrend.h"
29 #include "../include/textlang.h"
30 #endif
31 
32 #if USE_HARFBUZZ==1
33 #include <hb.h>
34 #endif
35 
36 #if (USE_FRIBIDI==1)
37 #if BUNDLED_FRIBIDI==1
38 #include "fribidi.h"
39 #else
40 #include <fribidi/fribidi.h>
41 #endif
42 #endif
43 
44 #define SPACE_WIDTH_SCALE_PERCENT 100
45 #define MIN_SPACE_CONDENSING_PERCENT 50
46 #define UNUSED_SPACE_THRESHOLD_PERCENT 5
47 #define MAX_ADDED_LETTER_SPACING_PERCENT 0
48 
49 
50 // to debug formatter
51 
52 #if defined(_DEBUG) && 0
53 #define TRACE_LINE_SPLITTING 1
54 #else
55 #define TRACE_LINE_SPLITTING 0
56 #endif
57 
58 #if TRACE_LINE_SPLITTING==1
59 #ifdef _MSC_VER
60 #define TR(...) CRLog::trace(__VA_ARGS__)
61 #else
62 #define TR(x...) CRLog::trace(x)
63 #endif
64 #else
65 #ifdef _MSC_VER
66 #define TR(...)
67 #else
68 #define TR(x...)
69 #endif
70 #endif
71 
72 #define FRM_ALLOC_SIZE 16
73 #define FLT_ALLOC_SIZE 4
74 
75 // lvfreetypeface.cpp
76 #if USE_HARFBUZZ==1
77 extern bool isHBScriptCursive( hb_script_t script );
78 #endif
79 
lvtextAllocFormattedLine()80 formatted_line_t * lvtextAllocFormattedLine( )
81 {
82     formatted_line_t * pline = (formatted_line_t *)calloc(1, sizeof(*pline));
83     return pline;
84 }
85 
lvtextAllocFormattedLineCopy(formatted_word_t * words,int word_count)86 formatted_line_t * lvtextAllocFormattedLineCopy( formatted_word_t * words, int word_count )
87 {
88     formatted_line_t * pline = (formatted_line_t *)calloc(1, sizeof(*pline));
89     lUInt32 size = (word_count + FRM_ALLOC_SIZE-1) / FRM_ALLOC_SIZE * FRM_ALLOC_SIZE;
90     pline->words = (formatted_word_t*)malloc( sizeof(formatted_word_t)*(size) );
91     memcpy( pline->words, words, word_count * sizeof(formatted_word_t) );
92     return pline;
93 }
94 
lvtextFreeFormattedLine(formatted_line_t * pline)95 void lvtextFreeFormattedLine( formatted_line_t * pline )
96 {
97     if (pline->words)
98         free( pline->words );
99     free(pline);
100 }
101 
lvtextAddFormattedWord(formatted_line_t * pline)102 formatted_word_t * lvtextAddFormattedWord( formatted_line_t * pline )
103 {
104     int size = (pline->word_count + FRM_ALLOC_SIZE-1) / FRM_ALLOC_SIZE * FRM_ALLOC_SIZE;
105     if ( pline->word_count >= size)
106     {
107         size += FRM_ALLOC_SIZE;
108         pline->words = cr_realloc( pline->words, size );
109     }
110     return &pline->words[ pline->word_count++ ];
111 }
112 
lvtextAddFormattedLine(formatted_text_fragment_t * pbuffer)113 formatted_line_t * lvtextAddFormattedLine( formatted_text_fragment_t * pbuffer )
114 {
115     int size = (pbuffer->frmlinecount + FRM_ALLOC_SIZE-1) / FRM_ALLOC_SIZE * FRM_ALLOC_SIZE;
116     if (pbuffer->frmlinecount >= size)
117     {
118         size += FRM_ALLOC_SIZE;
119         pbuffer->frmlines = cr_realloc( pbuffer->frmlines, size );
120     }
121     return (pbuffer->frmlines[ pbuffer->frmlinecount++ ] = lvtextAllocFormattedLine());
122 }
123 
lvtextAddFormattedLineCopy(formatted_text_fragment_t * pbuffer,formatted_word_t * words,int words_count)124 formatted_line_t * lvtextAddFormattedLineCopy( formatted_text_fragment_t * pbuffer, formatted_word_t * words, int words_count )
125 {
126     int size = (pbuffer->frmlinecount + FRM_ALLOC_SIZE-1) / FRM_ALLOC_SIZE * FRM_ALLOC_SIZE;
127     if ( pbuffer->frmlinecount >= size)
128     {
129         size += FRM_ALLOC_SIZE;
130         pbuffer->frmlines = cr_realloc( pbuffer->frmlines, size );
131     }
132     return (pbuffer->frmlines[ pbuffer->frmlinecount++ ] = lvtextAllocFormattedLineCopy(words, words_count));
133 }
134 
lvtextAllocEmbeddedFloat()135 embedded_float_t * lvtextAllocEmbeddedFloat( )
136 {
137     embedded_float_t * flt = (embedded_float_t *)calloc(1, sizeof(*flt));
138     return flt;
139 }
140 
lvtextAddEmbeddedFloat(formatted_text_fragment_t * pbuffer)141 embedded_float_t * lvtextAddEmbeddedFloat( formatted_text_fragment_t * pbuffer )
142 {
143     int size = (pbuffer->floatcount + FLT_ALLOC_SIZE-1) / FLT_ALLOC_SIZE * FLT_ALLOC_SIZE;
144     if (pbuffer->floatcount >= size)
145     {
146         size += FLT_ALLOC_SIZE;
147         pbuffer->floats = cr_realloc( pbuffer->floats, size );
148     }
149     return (pbuffer->floats[ pbuffer->floatcount++ ] = lvtextAllocEmbeddedFloat());
150 }
151 
152 
lvtextAllocFormatter(lUInt16 width)153 formatted_text_fragment_t * lvtextAllocFormatter( lUInt16 width )
154 {
155     formatted_text_fragment_t * pbuffer = (formatted_text_fragment_t*)calloc(1, sizeof(*pbuffer));
156     pbuffer->width = width;
157     pbuffer->strut_height = 0;
158     pbuffer->strut_baseline = 0;
159     pbuffer->is_reusable = true;
160     pbuffer->light_formatting = false;
161     int defMode = MAX_IMAGE_SCALE_MUL > 1 ? (ARBITRARY_IMAGE_SCALE_ENABLED==1 ? 2 : 1) : 0;
162     int defMult = MAX_IMAGE_SCALE_MUL;
163     // Notes from thornyreader:
164     // mode: 0=disabled, 1=integer scaling factors, 2=free scaling
165     // scale: 0=auto based on font size, 1=no zoom, 2=scale up to *2, 3=scale up to *3
166     pbuffer->img_zoom_in_mode_block = defMode; /**< can zoom in block images: 0=disabled, 1=integer scale, 2=free scale */
167     pbuffer->img_zoom_in_scale_block = defMult; /**< max scale for block images zoom in: 1, 2, 3 */
168     pbuffer->img_zoom_in_mode_inline = defMode; /**< can zoom in inline images: 0=disabled, 1=integer scale, 2=free scale */
169     pbuffer->img_zoom_in_scale_inline = defMult; /**< max scale for inline images zoom in: 1, 2, 3 */
170     pbuffer->img_zoom_out_mode_block = defMode; /**< can zoom out block images: 0=disabled, 1=integer scale, 2=free scale */
171     pbuffer->img_zoom_out_scale_block = defMult; /**< max scale for block images zoom out: 1, 2, 3 */
172     pbuffer->img_zoom_out_mode_inline = defMode; /**< can zoom out inline images: 0=disabled, 1=integer scale, 2=free scale */
173     pbuffer->img_zoom_out_scale_inline = defMult; /**< max scale for inline images zoom out: 1, 2, 3 */
174     pbuffer->space_width_scale_percent = SPACE_WIDTH_SCALE_PERCENT; // 100% (keep original width)
175     pbuffer->min_space_condensing_percent = MIN_SPACE_CONDENSING_PERCENT; // 50%
176     pbuffer->unused_space_threshold_percent = UNUSED_SPACE_THRESHOLD_PERCENT; // 5%
177     pbuffer->max_added_letter_spacing_percent = MAX_ADDED_LETTER_SPACING_PERCENT; // 0%
178 
179     return pbuffer;
180 }
181 
lvtextFreeFormatter(formatted_text_fragment_t * pbuffer)182 void lvtextFreeFormatter( formatted_text_fragment_t * pbuffer )
183 {
184     if (pbuffer->srctext)
185     {
186         for (int i=0; i<pbuffer->srctextlen; i++)
187         {
188             if (pbuffer->srctext[i].flags & LTEXT_FLAG_OWNTEXT)
189                 free( (void*)pbuffer->srctext[i].t.text );
190         }
191         free( pbuffer->srctext );
192     }
193     if (pbuffer->frmlines)
194     {
195         for (int i=0; i<pbuffer->frmlinecount; i++)
196         {
197             lvtextFreeFormattedLine( pbuffer->frmlines[i] );
198         }
199         free( pbuffer->frmlines );
200     }
201     if (pbuffer->floats)
202     {
203         for (int i=0; i<pbuffer->floatcount; i++)
204         {
205             if (pbuffer->floats[i]->links) {
206                 delete pbuffer->floats[i]->links;
207             }
208             free(pbuffer->floats[i]);
209         }
210         free( pbuffer->floats );
211     }
212     free(pbuffer);
213 }
214 
215 
lvtextAddSourceLine(formatted_text_fragment_t * pbuffer,lvfont_handle font,TextLangCfg * lang_cfg,const lChar32 * text,lUInt32 len,lUInt32 color,lUInt32 bgcolor,lUInt32 flags,lInt16 interval,lInt16 valign_dy,lInt16 indent,void * object,lUInt16 offset,lInt16 letter_spacing)216 void lvtextAddSourceLine( formatted_text_fragment_t * pbuffer,
217    lvfont_handle   font,     /* handle of font to draw string */
218    TextLangCfg *   lang_cfg,
219    const lChar32 * text,     /* pointer to unicode text string */
220    lUInt32         len,      /* number of chars in text, 0 for auto(strlen) */
221    lUInt32         color,    /* color */
222    lUInt32         bgcolor,  /* bgcolor */
223    lUInt32         flags,    /* flags */
224    lInt16          interval, /* line height in screen pixels */
225    lInt16          valign_dy, /* drift y from baseline */
226    lInt16          indent,    /* first line indent (or all but first, when negative) */
227    void *          object,    /* pointer to custom object */
228    lUInt16         offset,
229    lInt16          letter_spacing
230                          )
231 {
232     int srctextsize = (pbuffer->srctextlen + FRM_ALLOC_SIZE-1) / FRM_ALLOC_SIZE * FRM_ALLOC_SIZE;
233     if ( pbuffer->srctextlen >= srctextsize)
234     {
235         srctextsize += FRM_ALLOC_SIZE;
236         pbuffer->srctext = cr_realloc( pbuffer->srctext, srctextsize );
237     }
238     src_text_fragment_t * pline = &pbuffer->srctext[ pbuffer->srctextlen++ ];
239     pline->t.font = font;
240 //    if (font) {
241 //        // DEBUG: check for crash
242 //        CRLog::trace("c font = %08x  txt = %08x", (lUInt32)font, (lUInt32)text);
243 //        ((LVFont*)font)->getVisualAligmentWidth();
244 //    }
245 //    if (font == NULL && ((flags & LTEXT_WORD_IS_OBJECT) == 0)) {
246 //        CRLog::fatal("No font specified for text");
247 //    }
248     if ( !lang_cfg )
249         lang_cfg = TextLangMan::getTextLangCfg(); // use main_lang
250     pline->lang_cfg = lang_cfg;
251     if (!len) for (len=0; text[len]; len++) ;
252     if (flags & LTEXT_FLAG_OWNTEXT)
253     {
254         /* make own copy of text */
255         // We do a bit ugly to avoid clang-tidy warning "call to 'malloc' has an
256         // allocation size of 0 bytes" without having to add checks for NULL pointer
257         // (in lvrend.cpp, we're normalling not adding empty text with LTEXT_FLAG_OWNTEXT)
258         lUInt32 alloc_len = len > 0 ? len : 1;
259         pline->t.text = (lChar32*)malloc( alloc_len * sizeof(lChar32) );
260         memcpy((void*)pline->t.text, text, len * sizeof(lChar32));
261     }
262     else
263     {
264         pline->t.text = text;
265     }
266     pline->index = (lUInt16)(pbuffer->srctextlen-1);
267     pline->object = object;
268     pline->t.len = (lUInt16)len;
269     pline->indent = indent;
270     pline->flags = flags;
271     pline->interval = interval;
272     pline->valign_dy = valign_dy;
273     pline->t.offset = offset;
274     pline->color = color;
275     pline->bgcolor = bgcolor;
276     pline->letter_spacing = letter_spacing;
277 }
278 
lvtextAddSourceObject(formatted_text_fragment_t * pbuffer,lInt16 width,lInt16 height,lUInt32 flags,lInt16 interval,lInt16 valign_dy,lInt16 indent,void * object,TextLangCfg * lang_cfg,lInt16 letter_spacing)279 void lvtextAddSourceObject(
280    formatted_text_fragment_t * pbuffer,
281    lInt16         width,
282    lInt16         height,
283    lUInt32         flags,     /* flags */
284    lInt16          interval,  /* line height in screen pixels */
285    lInt16          valign_dy, /* drift y from baseline */
286    lInt16          indent,    /* first line indent (or all but first, when negative) */
287    void *          object,    /* pointer to custom object */
288    TextLangCfg *   lang_cfg,
289    lInt16          letter_spacing
290                          )
291 {
292     int srctextsize = (pbuffer->srctextlen + FRM_ALLOC_SIZE-1) / FRM_ALLOC_SIZE * FRM_ALLOC_SIZE;
293     if ( pbuffer->srctextlen >= srctextsize)
294     {
295         srctextsize += FRM_ALLOC_SIZE;
296         pbuffer->srctext = cr_realloc( pbuffer->srctext, srctextsize );
297     }
298     src_text_fragment_t * pline = &pbuffer->srctext[ pbuffer->srctextlen++ ];
299     pline->index = (lUInt16)(pbuffer->srctextlen-1);
300     pline->o.width = width;
301     pline->o.height = height;
302     pline->object = object;
303     pline->indent = indent;
304     pline->flags = flags | LTEXT_SRC_IS_OBJECT;
305     pline->interval = interval;
306     pline->valign_dy = valign_dy;
307     pline->letter_spacing = letter_spacing;
308     if ( !lang_cfg )
309         lang_cfg = TextLangMan::getTextLangCfg(); // use main_lang
310     pline->lang_cfg = lang_cfg;
311 }
312 
313 
314 #define DEPRECATED_LINE_BREAK_WORD_COUNT    3
315 #define DEPRECATED_LINE_BREAK_SPACE_LIMIT   64
316 
317 
318 #ifdef __cplusplus
319 
320 #define DUMMY_IMAGE_SIZE 16
321 
AddSourceObject(lUInt32 flags,lInt16 interval,lInt16 valign_dy,lInt16 indent,void * object,TextLangCfg * lang_cfg,lInt16 letter_spacing)322 void LFormattedText::AddSourceObject(
323             lUInt32         flags,     /* flags */
324             lInt16          interval,  /* line height in screen pixels */
325             lInt16          valign_dy, /* drift y from baseline */
326             lInt16          indent,    /* first line indent (or all but first, when negative) */
327             void *          object,    /* pointer to custom object */
328             TextLangCfg *   lang_cfg,
329             lInt16          letter_spacing
330      )
331 {
332     ldomNode * node = (ldomNode*)object;
333     if (!node || node->isNull()) {
334         TR("LFormattedText::AddSourceObject(): node is NULL!");
335         return;
336     }
337 
338     if (flags & LTEXT_SRC_IS_FLOAT) { // not an image but a float:'ing node
339         // Nothing much to do with it at this point
340         lvtextAddSourceObject(m_pbuffer, 0, 0,
341             flags, interval, valign_dy, indent, object, lang_cfg, letter_spacing );
342             // lvtextAddSourceObject will itself add to flags: | LTEXT_SRC_IS_OBJECT
343             // (only flags & object parameter will be used, the others are not,
344             // but they matter if this float is the first node in a paragraph,
345             // as the code may grab them from the first source)
346         return;
347     }
348     if (flags & LTEXT_SRC_IS_INLINE_BOX) { // not an image but a inline-block wrapping node
349         // Nothing much to do with it at this point: we can't yet render it to
350         // get its width & neight, as they might be in % of our main width, that
351         // we don't know yet (but only when ->Format() is called).
352         lvtextAddSourceObject(m_pbuffer, 0, 0,
353             flags, interval, valign_dy, indent, object, lang_cfg, letter_spacing );
354             // lvtextAddSourceObject will itself add to flags: | LTEXT_SRC_IS_OBJECT
355         return;
356     }
357 
358     LVImageSourceRef img = node->getObjectImageSource();
359     if ( img.isNull() )
360         img = LVCreateDummyImageSource( node, DUMMY_IMAGE_SIZE, DUMMY_IMAGE_SIZE );
361     lInt16 width = (lUInt16)img->GetWidth();
362     lInt16 height = (lUInt16)img->GetHeight();
363 
364     // Scale image native size according to gRenderDPI
365     width = scaleForRenderDPI(width);
366     height = scaleForRenderDPI(height);
367 
368     css_style_ref_t style = node->getStyle();
369     lInt16 w = 0, h = 0;
370     int em = node->getFont()->getSize();
371     w = lengthToPx(style->width, 100, em);
372     h = lengthToPx(style->height, 100, em);
373     // width in % will be computed in measureText() as a % of m_pbuffer->width
374     // For height in %, it's more complicated... see:
375     //   https://www.w3.org/TR/CSS2/visudet.html#the-width-property
376     //   https://www.w3.org/TR/CSS2/visudet.html#the-height-property
377     //   https://www.w3.org/TR/CSS2/visudet.html#inline-replaced-height
378     //   https://drafts.csswg.org/css-sizing-3/#extrinsic
379     if (style->width.type == css_val_percent)
380         w = -w;
381     if (style->height.type == css_val_percent)
382         h = w*height/width;
383 
384     if ( w*h==0 ) {
385         if ( w==0 ) {
386             if ( h==0 ) { // use image native size
387                 h = height;
388                 w = width;
389             } else { // use style height, keep aspect ratio
390                 w = width*h/height;
391             }
392         } else if ( h==0 ) { // use style width, keep aspect ratio
393             h = w*height/width;
394             if (h == 0) h = height;
395         }
396     }
397     width = w;
398     height = h;
399 
400     lvtextAddSourceObject(m_pbuffer, width, height,
401         flags, interval, valign_dy, indent, object, lang_cfg, letter_spacing );
402 }
403 
404 class LVFormatter {
405 public:
406     //LVArray<lUInt16>  widths_buf;
407     //LVArray<lUInt8>   flags_buf;
408     formatted_text_fragment_t * m_pbuffer;
409     int       m_length;
410     int       m_size;
411     bool      m_staticBufs;
412     static bool      m_staticBufs_inUse;
413     #if (USE_LIBUNIBREAK==1)
414     static bool      m_libunibreak_init_done;
415     #endif
416     lChar32 * m_text;
417     lUInt16 * m_flags;
418     src_text_fragment_t * * m_srcs;
419     lUInt16 * m_charindex;
420     int  *     m_widths;
421     int  m_y;
422     int  m_max_img_height;
423     bool m_has_images;
424     bool m_has_float_to_position;
425     bool m_has_ongoing_float;
426     bool m_no_clear_own_floats;
427     bool m_allow_strut_confining;
428     bool m_has_multiple_scripts;
429     int  m_usable_left_overflow;
430     int  m_usable_right_overflow;
431     bool m_hanging_punctuation;
432     bool m_indent_first_line_done;
433     int  m_indent_after_first_line;
434     int  m_indent_current;
435     int  m_specified_para_dir;
436     #if (USE_FRIBIDI==1)
437         // Bidi/RTL support
438         FriBidiCharType *    m_bidi_ctypes;
439         FriBidiBracketType * m_bidi_btypes;
440         FriBidiLevel *       m_bidi_levels;
441         FriBidiParType       m_para_bidi_type;
442     #endif
443     // These default to false and LTR when USE_FRIBIDI==0,
444     // just to avoid too many "#if (USE_FRIBIDI==1)"
445     bool m_has_bidi; // true when Bidi (or pure RTL) detected
446     bool m_para_dir_is_rtl; // boolean shortcut of m_para_bidi_type
447 
448 // These are not unicode codepoints: these values are put where we
449 // store text indexes in the source text node.
450 // So, when checking for these, also checks for m_flags[i] & LCHAR_IS_OBJECT.
451 // Note that m_charindex, being lUInt16, assume text nodes are not longer
452 // than 65535 chars. Things will get messy with longer text nodes...
453 #define OBJECT_CHAR_INDEX     ((lUInt16)0xFFFF)
454 #define FLOAT_CHAR_INDEX      ((lUInt16)0xFFFE)
455 #define INLINEBOX_CHAR_INDEX  ((lUInt16)0xFFFD)
456 
LVFormatter(formatted_text_fragment_t * pbuffer)457     LVFormatter(formatted_text_fragment_t * pbuffer)
458     : m_pbuffer(pbuffer), m_length(0), m_size(0), m_staticBufs(true), m_y(0)
459     {
460         #if (USE_LIBUNIBREAK==1)
461         if (!m_libunibreak_init_done) {
462             m_libunibreak_init_done = true;
463             // Have libunibreak build up a few lookup tables for quicker computation
464             init_linebreak();
465         }
466         #endif
467         if (m_staticBufs_inUse)
468             m_staticBufs = false;
469         m_text = NULL;
470         m_flags = NULL;
471         m_srcs = NULL;
472         m_charindex = NULL;
473         m_widths = NULL;
474         m_has_images = false,
475         m_max_img_height = -1;
476         m_has_float_to_position = false;
477         m_has_ongoing_float = false;
478         m_no_clear_own_floats = false;
479         m_has_multiple_scripts = false;
480         m_usable_left_overflow = 0;
481         m_usable_right_overflow = 0;
482         m_hanging_punctuation = false;
483         m_specified_para_dir = REND_DIRECTION_UNSET;
484         #if (USE_FRIBIDI==1)
485             m_bidi_ctypes = NULL;
486             m_bidi_btypes = NULL;
487             m_bidi_levels = NULL;
488         #endif
489     }
490 
~LVFormatter()491     ~LVFormatter()
492     {
493     }
494 
495     // Embedded floats positioning helpers.
496     // Returns y of the bottom of the lowest float
getFloatsMaxBottomY()497     int getFloatsMaxBottomY() {
498         int max_b_y = m_y;
499         for (int i=0; i<m_pbuffer->floatcount; i++) {
500             embedded_float_t * flt = m_pbuffer->floats[i];
501             // Ignore fake floats (no src) made from outer floats footprint
502             if ( flt->srctext != NULL ) {
503                 int b_y = flt->y + flt->height;
504                 if (b_y > max_b_y)
505                     max_b_y = b_y;
506             }
507         }
508         return max_b_y;
509     }
510     // Returns min y for next float
getNextFloatMinY(css_clear_t clear)511     int getNextFloatMinY(css_clear_t clear) {
512         int y = m_y; // current line y
513         for (int i=0; i<m_pbuffer->floatcount; i++) {
514             embedded_float_t * flt = m_pbuffer->floats[i];
515             if (flt->to_position) // ignore not yet positioned floats
516                 continue;
517             // A later float should never be positioned above an earlier float
518             if ( flt->y > y )
519                 y = flt->y;
520             if ( clear > css_c_none) {
521                 if ( (clear == css_c_both) || (clear == css_c_left && !flt->is_right)
522                                            || (clear == css_c_right && flt->is_right) ) {
523                     int b_y = flt->y + flt->height;
524                     if (b_y > y)
525                         y = b_y;
526                 }
527             }
528         }
529         return y;
530     }
531     // Returns available width (for text or a new float) available at y
532     // and between y and y+h
533     // Also set offset_x to the x where this width is available
getAvailableWidthAtY(int start_y,int h,int & offset_x)534     int getAvailableWidthAtY(int start_y, int h, int & offset_x) {
535         if (m_pbuffer->floatcount == 0) { // common short path when no float
536             offset_x = 0;
537             return m_pbuffer->width;
538         }
539         int fl_left_max_x = 0;
540         int fl_right_min_x = m_pbuffer->width;
541         // We need to scan line by line from start_y to start_y+h to be sure
542         int y = start_y;
543         while (y <= start_y + h) {
544             for (int i=0; i<m_pbuffer->floatcount; i++) {
545                 embedded_float_t * flt = m_pbuffer->floats[i];
546                 if (flt->to_position) // ignore not yet positioned floats
547                     continue;
548                 if (flt->y <= y && flt->y + flt->height > y) { // this float is spanning this y
549                     if (flt->is_right) {
550                         if (flt->x < fl_right_min_x)
551                             fl_right_min_x = flt->x;
552                     }
553                     else {
554                         if (flt->x + flt->width > fl_left_max_x)
555                             fl_left_max_x = flt->x + flt->width;
556                     }
557                 }
558             }
559             y += 1;
560         }
561         offset_x = fl_left_max_x;
562         return fl_right_min_x - fl_left_max_x;
563     }
564     // Returns next y after start_y where required_width is available
565     // Also set offset_x to the x where that width is available
getYWithAvailableWidth(int start_y,int required_width,int required_height,int & offset_x,bool get_right_offset_x=false)566     int getYWithAvailableWidth(int start_y, int required_width, int required_height, int & offset_x, bool get_right_offset_x=false) {
567         int y = start_y;
568         int w;
569         while (true) {
570             w = getAvailableWidthAtY(y, required_height, offset_x);
571             if (w >= required_width) // found it
572                 break;
573             if (w == m_pbuffer->width) { // We're past all floats
574                 // returns this y even if required_width is larger than
575                 // m_pbuffer->width and it will overflow
576                 offset_x = 0;
577                 break;
578             }
579             y += 1;
580         }
581         if (get_right_offset_x) {
582             int left_floats_w = offset_x;
583             int right_floats_w = m_pbuffer->width - left_floats_w - w;
584             offset_x = m_pbuffer->width - right_floats_w - required_width;
585             if (offset_x < 0) // overflow
586                 offset_x = 0;
587         }
588         return y;
589     }
590     // The following positioning codes is not the most efficient, as we
591     // call the previous functions that do many of the same kind of loops.
592     // But it's the clearest to express the decision flow
593 
594     /// Embedded (among other inline elements) floats management
addFloat(src_text_fragment_t * src,int currentTextWidth)595     void addFloat(src_text_fragment_t * src, int currentTextWidth) {
596         embedded_float_t * flt =  lvtextAddEmbeddedFloat( m_pbuffer );
597         flt->srctext = src;
598 
599         ldomNode * node = (ldomNode *) src->object;
600         flt->is_right = node->getStyle()->float_ == css_f_right;
601         // clear was not moved to the floatBox: get it from its single child
602         flt->clear = node->getChildNode(0)->getStyle()->clear;
603 
604         // Thanks to the wrapping floatBox element, which has no
605         // margin, we can set its RenderRectAccessor to be exactly
606         // our embedded_float coordinates and sizes.
607         //   If the wrapped element has margins, its renderRectAccessor
608         //   will be positioned/sized at the level of borders or padding,
609         //   as crengine does naturally with:
610         //       fmt.setWidth(width - margin_left - margin_right);
611         //       fmt.setHeight(height - margin_top - margin_bottom);
612         //       fmt.setX(x + margin_left);
613         //       fmt.setY(y + margin_top);
614         // So, the RenderRectAccessor(floatBox) can act as a cache
615         // of previously rendered and positioned floats!
616         int width;
617         int height;
618         // This formatting code is called when rendering, but can also be called when
619         // looking for links, highlighting... so it may happen that floats have
620         // already been rendered and positioned, and we already know their width
621         // and height.
622         bool already_rendered = false;
623         { // in its own scope, so this RenderRectAccessor is forgotten when left
624             RenderRectAccessor fmt( node );
625             if ( RENDER_RECT_HAS_FLAG(fmt, BOX_IS_RENDERED) )
626                 already_rendered = true;
627             // We could also directly use fmt.getX/Y() if it has already been
628             // positioned, and avoid the positioning code below.
629             // But let's be fully deterministic with that, and redo it.
630         }
631         if ( !already_rendered ) {
632             LVRendPageContext alt_context( NULL, m_pbuffer->page_height, false );
633             // We render the float with the specified direction (from upper dir=), even
634             // if UNSET (and not with the direction determined by fribidi from the text).
635             // We provide 0,0 as the usable left/right overflows, so no glyph/hanging
636             // punctuation will leak outside the floatBox.
637             renderBlockElement( alt_context, node, 0, 0, m_pbuffer->width, 0, 0, m_specified_para_dir );
638             // (renderBlockElement will ensure style->height if requested.)
639             // Gather footnotes links accumulated by alt_context
640             // (We only need to gather links in the rendering phase, for
641             // page splitting, so no worry if we don't when already_rendered)
642             lString32Collection * link_ids = alt_context.getLinkIds();
643             if (link_ids->length() > 0) {
644                 flt->links = new lString32Collection();
645                 for ( int n=0; n<link_ids->length(); n++ ) {
646                     flt->links->add( link_ids->at(n) );
647                 }
648             }
649         }
650         // (renderBlockElement() above may update our RenderRectAccessor(),
651         // so (re)get it only now)
652         RenderRectAccessor fmt( node );
653         width = fmt.getWidth();
654         height = fmt.getHeight();
655 
656         flt->width = width;
657         flt->height = height;
658         flt->to_position = true;
659 
660         if ( node->getChildCount() > 0 ) {
661             // The margins were used to position the original
662             // float node in its wrapping floatBox - so get it
663             // back from their relative positions
664             RenderRectAccessor cfmt(node->getChildNode(0));
665             if ( flt->is_right )
666                 flt->inward_margin = cfmt.getX();
667             else
668                 flt->inward_margin = width - (cfmt.getX() + cfmt.getWidth());
669         }
670 
671         // If there are already floats to position, don't position any more for now
672         if ( !m_has_float_to_position ) {
673             if ( getNextFloatMinY(flt->clear) == m_y ) {
674                 // No previous float, nor any clear:'ing, prevents having this one
675                 // on current line,
676                 // See if it can still fit on this line, accounting for the current
677                 // width used by the text before this inline float (getCurrentLineWidth()
678                 // accounts for already positioned floats on this line)
679                 if ( currentTextWidth + flt->width <= getCurrentLineWidth() ) {
680                     // Call getYWithAvailableWidth() just to get x
681                     int x;
682                     int y = getYWithAvailableWidth(m_y, flt->width + currentTextWidth, 0, x, flt->is_right);
683                     if (y == m_y) { // should always be true, but just to be sure
684                         if (flt->is_right) // correct x: add currentTextWidth we added
685                             x = x + currentTextWidth;  // to the width for computation
686                         flt->x = x;
687                         flt->y = y;
688                         flt->to_position = false;
689                         fmt.setX(flt->x);
690                         fmt.setY(flt->y);
691                         if (flt->is_right)
692                             RENDER_RECT_SET_FLAG(fmt, FLOATBOX_IS_RIGHT);
693                         else
694                             RENDER_RECT_UNSET_FLAG(fmt, FLOATBOX_IS_RIGHT);
695                         RENDER_RECT_SET_FLAG(fmt, BOX_IS_RENDERED);
696                         // Small trick for elements with negative margins (invert dropcaps)
697                         // that would overflow above flt->x, to avoid a page split by
698                         // sticking the line to the hopefully present margin-top that
699                         // precedes this paragraph
700                         // (we may want to deal with that more generically by storing these
701                         // overflows so we can ensure no page split on the other following
702                         // lines as long as they are not consumed)
703                         RenderRectAccessor cfmt( node->getChildNode(0));
704                         if (cfmt.getY() < 0)
705                             m_has_ongoing_float = true;
706                         return; // all done with this float
707                     }
708                 }
709             }
710             m_has_float_to_position = true;
711         }
712     }
positionDelayedFloats()713     void positionDelayedFloats() {
714         // m_y has been updated, position delayed floats
715         if (!m_has_float_to_position)
716             return;
717         for (int i=0; i<m_pbuffer->floatcount; i++) {
718             embedded_float_t * flt = m_pbuffer->floats[i];
719             if (!flt->to_position)
720                 continue;
721             int x = 0;
722             int y = getNextFloatMinY(flt->clear);
723             y = getYWithAvailableWidth(y, flt->width, flt->height, x, flt->is_right);
724             flt->x = x;
725             flt->y = y;
726             flt->to_position = false;
727             ldomNode * node = (ldomNode *) flt->srctext->object;
728             RenderRectAccessor fmt( node );
729             fmt.setX(flt->x);
730             fmt.setY(flt->y);
731             if (flt->is_right)
732                 RENDER_RECT_SET_FLAG(fmt, FLOATBOX_IS_RIGHT);
733             else
734                 RENDER_RECT_UNSET_FLAG(fmt, FLOATBOX_IS_RIGHT);
735             RENDER_RECT_SET_FLAG(fmt, BOX_IS_RENDERED);
736         }
737         m_has_float_to_position = false;
738     }
finalizeFloats()739     void finalizeFloats() {
740         // Adds blank lines to fill the vertical space still occupied by our own
741         // inner floats (we don't fill the height of outer floats (float_footprint)
742         // as they can still apply over our siblings.)
743         fillAndMoveToY( getFloatsMaxBottomY() );
744     }
fillAndMoveToY(int target_y)745     void fillAndMoveToY(int target_y) {
746         // Adds blank lines to fill the vertical space from current m_y to target_y.
747         // We need to use 1px lines to get a chance to allow a page wrap at
748         // vertically stacked floats boundaries
749         if ( target_y <= m_y ) // bogus: we won't rewind y
750             return;
751         bool has_ongoing_float;
752         while ( m_y < target_y ) {
753             formatted_line_t * frmline =  lvtextAddFormattedLine( m_pbuffer );
754             frmline->y = m_y;
755             frmline->x = 0;
756             frmline->height = 1;
757             frmline->baseline = 1; // no word to draw, does not matter
758             // Check if there are floats spanning that y, so we
759             // can avoid a page split
760             has_ongoing_float = false;
761             for (int i=0; i<m_pbuffer->floatcount; i++) {
762                 embedded_float_t * flt = m_pbuffer->floats[i];
763                 if (flt->to_position) // ignore not yet positioned floats (even if
764                     continue;         // there shouldn't be any when this is called)
765                 if (flt->y < m_y && flt->y + flt->height > m_y) {
766                     has_ongoing_float = true;
767                     break;
768                 }
769                 // flt->y == m_y is fine: the float starts on this line,
770                 // we can split on it
771             }
772             if (has_ongoing_float) {
773                 frmline->flags |= LTEXT_LINE_SPLIT_AVOID_BEFORE;
774             }
775             m_y += 1;
776             m_pbuffer->height = m_y;
777         }
778         checkOngoingFloat();
779     }
floatClearText(int flags)780     void floatClearText( int flags ) {
781         // Handling of "clear: left/right/both" is different if the 'clear:'
782         // is carried by a <BR> or by a float'ing box (for floating boxes, it
783         // is done in addFloat()). Here, we deal with <BR style="clear:..">.
784         // If a <BR/> has a "clear:", it moves the text below the floats, and the
785         // text continues from there.
786         // (Only a <BR> can carry a clear: among the non-floating inline elements.)
787         if ( flags & LTEXT_SRC_IS_CLEAR_LEFT ) {
788             int y = getNextFloatMinY( css_c_left );
789             if (y > m_y)
790                 fillAndMoveToY( y );
791         }
792         if ( flags & LTEXT_SRC_IS_CLEAR_RIGHT ) {
793             int y = getNextFloatMinY( css_c_right );
794             if (y > m_y)
795                 fillAndMoveToY( y );
796         }
797     }
getCurrentLineWidth()798     int getCurrentLineWidth() {
799         int x;
800         // m_pbuffer->strut_height is all we can check for at this point,
801         // but the text that will be put on this line may exceed it if
802         // there's some vertical-align or font size change involved.
803         // So, the line could be pushed down and conflict with a float below.
804         // But this will do for now...
805         return getAvailableWidthAtY(m_y, m_pbuffer->strut_height, x);
806     }
getCurrentLineX()807     int getCurrentLineX() {
808         int x;
809         getAvailableWidthAtY(m_y, m_pbuffer->strut_height, x);
810         return x;
811     }
isCurrentLineWithFloat()812     bool isCurrentLineWithFloat() {
813         int x;
814         int w = getAvailableWidthAtY(m_y, m_pbuffer->strut_height, x);
815         return w < m_pbuffer->width;
816     }
isCurrentLineWithFloatOnLeft()817     bool isCurrentLineWithFloatOnLeft() {
818         int x;
819         getAvailableWidthAtY(m_y, m_pbuffer->strut_height, x);
820         return x > 0;
821     }
isCurrentLineWithFloatOnRight()822     bool isCurrentLineWithFloatOnRight() {
823         int x;
824         int w = getAvailableWidthAtY(m_y, m_pbuffer->strut_height, x);
825         return x + w < m_pbuffer->width;
826     }
checkOngoingFloat()827     void checkOngoingFloat() {
828         // Check if there is still some float spanning at current m_y
829         // If there is, next added line will ensure no page split
830         // between it and the previous line
831         m_has_ongoing_float = false;
832         for (int i=0; i<m_pbuffer->floatcount; i++) {
833             embedded_float_t * flt = m_pbuffer->floats[i];
834             if (flt->to_position) // ignore not yet positioned floats, as they
835                 continue;         // are not yet running past m_y
836             if (flt->y < m_y && flt->y + flt->height > m_y) {
837                 m_has_ongoing_float = true;
838                 break;
839             }
840             // flt->y == m_y is fine: the float starts on this line,
841             // no need to avoid page split by next line
842         }
843     }
844     // We prefer to not use the fully usable left overflow, but keep
845     // a bit of the margin it comes from
846     #define USABLE_OVERFLOW_USABLE_RATIO 0.8
847     // Use this for testing computations and get visually perfect fitting
848     // #define USABLE_OVERFLOW_USABLE_RATIO 1
getCurrentLineUsableOverflows(int & usable_left_overflow,int & usable_right_overflow)849     void getCurrentLineUsableOverflows( int & usable_left_overflow, int & usable_right_overflow ) {
850         if (m_pbuffer->floatcount > 0) {
851             // We have left or right floats on this line, that might
852             // make m_usable_left/right_overflow no more relevant.
853             // We'll allow the main text to overflow in these floats'
854             // inward margin (the float element content itself is also
855             // allowed to overflow in it, so its margin is shared;
856             // hopefully, both overflowing in it at the same position
857             // will be rare).
858             // Note that if the float that sets the text min or max x
859             // have some large inward margin, an other further float
860             // with less inward margin might be the one that should
861             // limit the usable overflow.
862             int fl_left_max_x = 0;
863             int fl_left_max_x_overflow = - m_usable_left_overflow;
864             int fl_right_min_x = m_pbuffer->width;
865             int fl_right_min_x_overflow = m_pbuffer->width + m_usable_right_overflow;
866             // We need to scan pixel line by pixel line along the strut height to be sure
867             int y = m_y;
868             int end_y = y + m_pbuffer->strut_height;
869             while (y <= end_y) {
870                 for (int i=0; i<m_pbuffer->floatcount; i++) {
871                     embedded_float_t * flt = m_pbuffer->floats[i];
872                     if (flt->to_position) // ignore not yet positioned floats
873                         continue;
874                     if (flt->y <= y && flt->y + flt->height > y) { // this float is spanning this y
875                         if (flt->is_right) {
876                             if (flt->x < fl_right_min_x)
877                                 fl_right_min_x = flt->x;
878                             if (flt->x + flt->inward_margin < fl_right_min_x_overflow)
879                                 fl_right_min_x_overflow = flt->x + flt->inward_margin;
880                                 // (inward_margin is the left margin of a right float)
881                         }
882                         else {
883                             if (flt->x + flt->width > fl_left_max_x)
884                                 fl_left_max_x = flt->x + flt->width;
885                             if (flt->x + flt->width - flt->inward_margin > fl_left_max_x_overflow)
886                                 fl_left_max_x_overflow = flt->x + flt->width - flt->inward_margin;
887                                 // (inward_margin is the right margin of a left float)
888                         }
889                     }
890                 }
891                 y += 1;
892             }
893             usable_left_overflow  = fl_left_max_x - fl_left_max_x_overflow;
894             usable_right_overflow = fl_right_min_x_overflow - fl_right_min_x;
895         }
896         else {
897             usable_left_overflow  = m_usable_left_overflow;
898             usable_right_overflow = m_usable_right_overflow;
899         }
900         usable_left_overflow  =  usable_left_overflow * USABLE_OVERFLOW_USABLE_RATIO;
901         usable_right_overflow = usable_right_overflow * USABLE_OVERFLOW_USABLE_RATIO;
902     }
903 
904     /// allocate buffers for paragraph
allocate(int start,int end)905     void allocate( int start, int end )
906     {
907         int pos = 0;
908         int i;
909         // PASS 1: calculate total length (characters + objects)
910         for ( i=start; i<end; i++ ) {
911             src_text_fragment_t * src = &m_pbuffer->srctext[i];
912             if ( src->flags & LTEXT_SRC_IS_FLOAT ) {
913                 pos++;
914             }
915             else if ( src->flags & LTEXT_SRC_IS_INLINE_BOX ) {
916                 pos++;
917             }
918             else if ( src->flags & LTEXT_SRC_IS_OBJECT ) {
919                 pos++;
920                 if (!m_has_images) {
921                     // Compute images max height only when we meet an image,
922                     // and only for the first one as it's the same for all
923                     // images in this paragraph
924                     ldomNode * node = (ldomNode *) src->object;
925                     if ( node && !node->isNull() ) {
926                         // We have to limit the image height so that the line
927                         // that contains it does fit in the page without any
928                         // uneeded page break
929                         m_max_img_height = m_pbuffer->page_height;
930                         // remove parent nodes' margin/border/padding
931                         m_max_img_height -= node->getSurroundingAddedHeight();
932                         // remove height taken by the strut baseline
933                         m_max_img_height -= (m_pbuffer->strut_height - m_pbuffer->strut_baseline);
934                         m_has_images = true;
935                     }
936                 }
937             }
938             else {
939                 pos += src->t.len;
940             }
941         }
942 
943         // allocate buffers
944         m_length = pos;
945 
946         TR("allocate(%d)", m_length);
947         // We start with static buffers, but when m_length reaches STATIC_BUFS_SIZE,
948         // we switch to dynamic buffers and we keep using them (realloc'ating when
949         // needed).
950         // The code in this file will fill these buffers with m_length items, so
951         // from index [0] to [m_length-1], and read them back.
952         // Willingly or not (bug?), this code may also access the buffer one slot
953         // further at [m_length], and we need to set this slot to zero to avoid
954         // a segfault. So, we need to reserve this additional slot when
955         // allocating dynamic buffers, or checking if the static buffers can be
956         // used.
957         // (memset()'ing all buffers on their full allocated size to 0 would work
958         // too, but there's a small performance hit when doing so. Just setting
959         // to zero the additional slot seems enough, as all previous slots seems
960         // to be correctly filled.)
961 
962 #define STATIC_BUFS_SIZE 8192
963 #define ITEMS_RESERVED 16
964 
965         // "m_length+1" to keep room for the additional slot to be zero'ed
966         if ( !m_staticBufs || m_length+1 > STATIC_BUFS_SIZE ) {
967             // if (!m_staticBufs && m_text == NULL) printf("allocating dynamic buffers\n");
968             if ( m_length+1 > m_size ) {
969                 // realloc
970                 m_size = m_length+ITEMS_RESERVED;
971                 m_text = cr_realloc(m_staticBufs ? NULL : m_text, m_size);
972                 m_flags = cr_realloc(m_staticBufs ? NULL : m_flags, m_size);
973                 m_charindex = cr_realloc(m_staticBufs ? NULL : m_charindex, m_size);
974                 m_srcs = cr_realloc(m_staticBufs ? NULL : m_srcs, m_size);
975                 m_widths = cr_realloc(m_staticBufs ? NULL : m_widths, m_size);
976                 #if (USE_FRIBIDI==1)
977                     // Note: we could here check for RTL chars (and have a flag
978                     // to then not do it in copyText()) so we don't need to allocate
979                     // the following ones if we won't be using them.
980                     m_bidi_ctypes = cr_realloc(m_staticBufs ? NULL : m_bidi_ctypes, m_size);
981                     m_bidi_btypes = cr_realloc(m_staticBufs ? NULL : m_bidi_btypes, m_size);
982                     m_bidi_levels = cr_realloc(m_staticBufs ? NULL : m_bidi_levels, m_size);
983                 #endif
984             }
985             m_staticBufs = false;
986         } else {
987             // static buffer space
988             static lChar32 m_static_text[STATIC_BUFS_SIZE];
989             static lUInt16 m_static_flags[STATIC_BUFS_SIZE];
990             static src_text_fragment_t * m_static_srcs[STATIC_BUFS_SIZE];
991             static lUInt16 m_static_charindex[STATIC_BUFS_SIZE];
992             static int m_static_widths[STATIC_BUFS_SIZE];
993             #if (USE_FRIBIDI==1)
994                 static FriBidiCharType m_static_bidi_ctypes[STATIC_BUFS_SIZE];
995                 static FriBidiBracketType m_static_bidi_btypes[STATIC_BUFS_SIZE];
996                 static FriBidiLevel m_static_bidi_levels[STATIC_BUFS_SIZE];
997             #endif
998             m_text = m_static_text;
999             m_flags = m_static_flags;
1000             m_charindex = m_static_charindex;
1001             m_srcs = m_static_srcs;
1002             m_widths = m_static_widths;
1003             m_staticBufs = true;
1004             m_staticBufs_inUse = true;
1005             // printf("using static buffers\n");
1006             #if (USE_FRIBIDI==1)
1007                 m_bidi_ctypes = m_static_bidi_ctypes;
1008                 m_bidi_btypes = m_static_bidi_btypes;
1009                 m_bidi_levels = m_static_bidi_levels;
1010             #endif
1011         }
1012         memset( m_flags, 0, sizeof(lUInt16)*m_length ); // start with all flags set to zero
1013 
1014         // We set to zero the additional slot that the code may peek at (with
1015         // the checks against m_length we did, we know this slot is allocated).
1016         // (This can be removed if we find this was a bug and can fix it)
1017         m_flags[m_length] = 0;
1018         m_text[m_length] = 0;
1019         m_charindex[m_length] = 0;
1020         m_srcs[m_length] = NULL;
1021         m_widths[m_length] = 0;
1022         #if (USE_FRIBIDI==1)
1023             m_bidi_ctypes[m_length] = 0;
1024             m_bidi_btypes[m_length] = 0;
1025             m_bidi_levels[m_length] = 0;
1026         #endif
1027     }
1028 
1029     /// copy text of current paragraph to buffers
copyText(int start,int end)1030     void copyText( int start, int end )
1031     {
1032         #if (USE_LIBUNIBREAK==1)
1033         struct LineBreakContext lbCtx;
1034         // Let's init it before the first char, by adding a leading Zero-Width Joiner
1035         // (Word Joiner, non-breakable) which should not change the behaviour with
1036         // the real first char coming up. We then can just use lb_process_next_char()
1037         // with the real text.
1038         // The lang lb_props will be plugged in from the TextLangCfg of the
1039         // coming up text node. We provide NULL in the meantime.
1040         lb_init_break_context(&lbCtx, 0x200D, NULL); // ZERO WIDTH JOINER
1041         #endif
1042 
1043         m_has_bidi = false; // will be set if fribidi detects it is bidirectional text
1044         m_para_dir_is_rtl = false;
1045         #if (USE_FRIBIDI==1)
1046         bool has_rtl = false; // if no RTL char, no need for expensive bidi processing
1047         // todo: according to https://www.w3.org/TR/css-text-3/#bidi-linebox
1048         // the bidi direction, if determined from the text itself (no dir= from
1049         // outer containers) must follow up to next paragraphs (separated by <BR/> or newlines).
1050         // Here in lvtextfm, each gets its own call to copyText(), so we might need some state.
1051         // This link also points out that line box direction and its text content direction
1052         // might be different... Could be we have that right (or not).
1053         // If this para final node or some upper block node specifies dir=rtl, assume fribidi
1054         // is needed, and avoid checking for rtl chars
1055         if ( m_specified_para_dir == REND_DIRECTION_RTL ) {
1056             has_rtl = true;
1057         }
1058         #endif
1059 
1060         bool has_non_space = false; // If we have non-empty text, we can do strut confining
1061 
1062         int pos = 0;
1063         int i;
1064         bool prev_was_space = true; // start with true, to get rid of all leading spaces
1065         bool is_locked_spacing = false;
1066         int last_non_collapsed_space_pos = 0; // reset to -1 if first char is not a space
1067         int last_non_space_pos = -1; // to get rid of all trailing spaces
1068         src_text_fragment_t * prev_src = NULL;
1069 
1070         for ( i=start; i<end; i++ ) {
1071             src_text_fragment_t * src = &m_pbuffer->srctext[i];
1072 
1073             // We will compute wrap rules as if there were no "white-space: nowrap", as
1074             // we might end up not ensuring nowrap. We just flag all chars (but the last
1075             // one) inside a text node with "nowrap" with LCHAR_DEPRECATED_WRAP_AFTER,
1076             // and processParagraph() will deal with chars that have both ALLOW_WRAP_AFTER
1077             // and DEPRECATED_WRAP_AFTER.
1078             bool nowrap = src->flags & LTEXT_FLAG_NOWRAP;
1079             if ( nowrap && pos > 0 ) {
1080                 // We still need to do the right thing at boundaries between 2 nodes
1081                 // with nowrap - and update flags on the last char of previous node.
1082                 // If NOWRAP|NOWRAP: wrap after last char of 1st node is permitted
1083                 // If NOWRAP|WRAP  : wrap after last char of 1st node is permitted
1084                 // If   WRAP|NOWRAP: wrap after last char of 1st node is permitted
1085                 // If   WRAP|WRAP  : it depends
1086                 bool handled = false;
1087                 if ( prev_src && (prev_src->flags & LTEXT_FLAG_NOWRAP) ) {
1088                     // We don't have much context about these text nodes.
1089                     // 2 consecutive text nodes might both have "white-space: nowrap",
1090                     // but it might be allowed to wrap between them if the node that
1091                     // contains them isn't "nowrap".
1092                     // So, try to do it that way:
1093                     // - if both have it, and not their common parent container (so
1094                     //   it's not inherited): a wrap should be allowed between them.
1095                     // - if both have it, and their parent container too, a wrap
1096                     //   shouldn't be allowed between them
1097                     ldomNode * prev_node = (ldomNode *)prev_src->object;
1098                     ldomNode * this_node = (ldomNode *)src->object;
1099                     if ( prev_node && this_node ) {
1100                         ldomXRange r = ldomXRange( ldomXPointer(prev_node,0), ldomXPointer(this_node,0) );
1101                         ldomNode * parent = r.getNearestCommonParent();
1102                         if ( parent && parent->getStyle()->white_space == css_ws_nowrap ) {
1103                             m_flags[pos-1] |= LCHAR_DEPRECATED_WRAP_AFTER;
1104                             handled = true;
1105                         }
1106                     }
1107                     else {
1108                         // One of the 2 nodes is some generated content (list marker,
1109                         // quote char, BDI wrapping chars) that does not map to a
1110                         // document node (and we can't reach its parent from here).
1111                         // Not sure if this would be always good, but let's assume
1112                         // we want nowrap continuity.
1113                         m_flags[pos-1] |= LCHAR_DEPRECATED_WRAP_AFTER;
1114                         handled = true;
1115                     }
1116                 }
1117                 if ( !handled && src->flags & (LTEXT_SRC_IS_INLINE_BOX|LTEXT_SRC_IS_OBJECT) ) {
1118                     // Not per-spec, but might be handy:
1119                     // If an image or our internal inlineBox element has been set
1120                     // to "white-space: nowrap", it's most probably that it has
1121                     // inherited it from its parent node - as it's quite unprobable
1122                     // in real-life that an image was set to "white-space: nowrap"
1123                     // itself, as it would have no purpose. As for inlineBox,
1124                     // the original element that has "display: inline-block;
1125                     // white-space: nowrap" is actually the child of the inlineBox,
1126                     // and will have it - but they are not propagated up to the
1127                     // inlineBox wrapper.
1128                     // So, assume that if such image or inlineBox has it, while
1129                     // its parent does not, it's because it has been set via
1130                     // a Style tweak, and that we have used that trick in the
1131                     // aim to prevent a wrap around it. libunibreak defaults to
1132                     // allowing a wrap on both sides of such replaced elements;
1133                     // this allows to easily change this when needed.
1134                     // (Use-case seen: book with footnotes links that are
1135                     // set "display:inline-block", which libunibreak could
1136                     // put at start of line - while we'd rather want them
1137                     // stuck to the word they follow).
1138                     ldomNode * this_node = (ldomNode *)src->object;
1139                     if ( this_node ) {
1140                         ldomNode * parent = this_node->getParentNode();
1141                         if ( parent && parent->getStyle()->white_space != css_ws_nowrap ) {
1142                             m_flags[pos-1] |= LCHAR_DEPRECATED_WRAP_AFTER; // avoid wrap before it
1143                             m_flags[pos]   |= LCHAR_DEPRECATED_WRAP_AFTER; // avoid wrap after it
1144                         }
1145                     }
1146                 }
1147             }
1148 
1149             if ( src->flags & LTEXT_SRC_IS_FLOAT ) {
1150                 m_text[pos] = 0;
1151                 m_srcs[pos] = src;
1152                 m_charindex[pos] = FLOAT_CHAR_INDEX; //0xFFFE;
1153                 m_flags[pos] = LCHAR_IS_OBJECT;
1154                     // Note: m_flags was a lUInt8, and there were already 8 LCHAR_IS_* bits/flags
1155                     //   so we couldn't add our own. But using LCHAR_IS_OBJECT should not hurt,
1156                     //   as we do the FLOAT tests before it is used.
1157                     //   m_charindex[pos] is the one to use to detect FLOATs
1158                     // m_flags has since be updated to lUint16, but no real need
1159                     // to change what we did for floats to use a new flag.
1160                 pos++;
1161                 // No need to update prev_was_space or last_non_space_pos
1162                 // No need for libunibreak object replacement character
1163             }
1164             else if ( src->flags & LTEXT_SRC_IS_INLINE_BOX ) {
1165                 // Note: we shouldn't meet any EmbeddedBlock inlineBox here (and in
1166                 // processParagraph(), addLine() and alignLine()) as they are dealt
1167                 // with specifically in splitParagraphs() by processEmbeddedBlock().
1168                 m_text[pos] = 0;
1169                 m_srcs[pos] = src;
1170                 m_charindex[pos] = INLINEBOX_CHAR_INDEX; //0xFFFD;
1171                 m_flags[pos] = LCHAR_IS_OBJECT;
1172                 #if (USE_LIBUNIBREAK==1)
1173                     // Let libunibreak know there was an object, for the followup text
1174                     // to set LCHAR_ALLOW_WRAP_AFTER on it.
1175                     // (it will allow wrap before and after an object, unless it's near
1176                     // some punctuation/quote/paren, whose rules will be ensured it seems).
1177                     int brk = lb_process_next_char(&lbCtx, (utf32_t)0xFFFC); // OBJECT REPLACEMENT CHARACTER
1178                     if (pos > 0) {
1179                         if (brk == LINEBREAK_ALLOWBREAK)
1180                             m_flags[pos-1] |= LCHAR_ALLOW_WRAP_AFTER;
1181                         else
1182                             m_flags[pos-1] &= ~LCHAR_ALLOW_WRAP_AFTER;
1183                     }
1184                 #else
1185                     m_flags[pos] |= LCHAR_ALLOW_WRAP_AFTER;
1186                 #endif
1187                 last_non_space_pos = pos;
1188                 last_non_collapsed_space_pos = -1;
1189                 prev_was_space = false;
1190                 is_locked_spacing = false;
1191                 pos++;
1192             }
1193             else if ( src->flags & LTEXT_SRC_IS_OBJECT ) {
1194                 m_text[pos] = 0;
1195                 m_srcs[pos] = src;
1196                 m_charindex[pos] = OBJECT_CHAR_INDEX; //0xFFFF;
1197                 m_flags[pos] = LCHAR_IS_OBJECT;
1198                 #if (USE_LIBUNIBREAK==1)
1199                     // Let libunibreak know there was an object
1200                     int brk = lb_process_next_char(&lbCtx, (utf32_t)0xFFFC); // OBJECT REPLACEMENT CHARACTER
1201                     if (pos > 0) {
1202                         if (brk == LINEBREAK_ALLOWBREAK)
1203                             m_flags[pos-1] |= LCHAR_ALLOW_WRAP_AFTER;
1204                         else
1205                             m_flags[pos-1] &= ~LCHAR_ALLOW_WRAP_AFTER;
1206                     }
1207                 #else
1208                     m_flags[pos] |= LCHAR_ALLOW_WRAP_AFTER;
1209                 #endif
1210                 last_non_space_pos = pos;
1211                 last_non_collapsed_space_pos = -1;
1212                 prev_was_space = false;
1213                 is_locked_spacing = false;
1214                 pos++;
1215             }
1216             else {
1217                 #if (USE_LIBUNIBREAK==1)
1218                 // We hack into lbCtx private member and switch its lbpLang
1219                 // on-the-fly to the props for a possibly new language.
1220                 lbCtx.lbpLang = src->lang_cfg->getLBProps();
1221                 #endif
1222 
1223                 int len = src->t.len;
1224                 lStr_ncpy( m_text+pos, src->t.text, len );
1225                 if ( i==0 || (src->flags & LTEXT_FLAG_NEWLINE) )
1226                     m_flags[pos] = LCHAR_MANDATORY_NEWLINE;
1227 
1228                 // On non PRE-formatted text, our XML parser have already removed
1229                 // consecutive spaces, \t, \r and \n in each single text node
1230                 // (inside and at boundaries), keeping only (if any) one leading
1231                 // space and one trailing space.
1232                 // These text nodes were simply appended (by lvrend) as is into
1233                 // the src_text_fragment_t->t.text that we are processing here.
1234                 // It may happen then that we, here, do get consecutive spaces, eg with:
1235                 //   "<div> Some <span> text </span> and <span> </span> even more. </div>"
1236                 // which would give us here:
1237                 //   " Some  text  and   even more "
1238                 //
1239                 // https://www.w3.org/TR/css-text-3/#white-space-processing states, for
1240                 // non-PRE paragraphs:
1241                 // (a "segment break" is just a \n in the HTML source)
1242                 //   (a) A sequence of segment breaks and other white space between two Chinese,
1243                 //       Japanese, or Yi characters collapses into nothing.
1244                 // (So it looks like CJY is CJK minus K - with Korean, if there is a
1245                 // space between K chars, it should be kept.)
1246                 //   (b) A zero width space before or after a white space sequence containing a
1247                 //       segment break causes the entire sequence of white space to collapse
1248                 //       into a zero width space.
1249                 //   (c) Otherwise, consecutive white space collapses into a single space.
1250                 //
1251                 // For now, we only implement (c).
1252                 // (b) can't really be implemented, as we don't know at this point
1253                 // if there was a segment break or not, as any would have already been
1254                 // converted to a space.
1255                 // (a) is not implemented, but some notes and comments are below (may be
1256                 // not too much bothering for CJK users if nothing was done to fix that?)
1257                 //
1258                 // It also states:
1259                 //     Any space immediately following another collapsible space - even one
1260                 //     outside the boundary of the inline containing that space, provided both
1261                 //     spaces are within the same inline formatting context - is collapsed to
1262                 //     have zero advance width. (It is invisible, but retains its soft wrap
1263                 //     opportunity, if any.)
1264                 // (lvtextfm actually deals with a single "inline formatting context", what
1265                 // crengine calls a "final block".)
1266                 //
1267                 // It also states:
1268                 //     - A sequence of collapsible spaces at the beginning of a line is removed.
1269                 //     - A sequence of collapsible spaces at the end of a line is removed.
1270                 //
1271                 // The specs don't say which, among the consecutive collapsible spaces, to
1272                 // keep, so let's keep the first one (they may have different width,
1273                 // eg with: <big> some </big> <small> text </small> )
1274                 //
1275                 // Note: we can't "remove" any char: m_text, src_text_fragment_t->t.text
1276                 // and the ldomNode text node own text need all to be in-sync: a shift
1277                 // because of a removed char in any of them will cause wrong XPointers
1278                 // and Rects (displaced highlights, etc...)
1279                 // We can just "replace" a char (only in m_text, gone after this paragraph
1280                 // processing) or flag (in m_flags for the time of paragraph processing,
1281                 // in word->flags if needed later for drawing).
1282 
1283                 bool preformatted = (src->flags & LTEXT_FLAG_PREFORMATTED);
1284                 for ( int k=0; k<len; k++ ) {
1285                     lChar32 c = m_text[pos];
1286 
1287                     // If not on a 'pre' text node, we should strip trailing
1288                     // spaces and collapse consecutive spaces (other spaces
1289                     // like UNICODE_NO_BREAK_SPACE should not collapse).
1290                     bool is_space = (c == ' ');
1291                     if ( is_space && !preformatted ) {
1292                         if ( prev_was_space ) {
1293                             // On non-pre text nodes, flag spaces following a space
1294                             // so we can discard them later.
1295                             // Note: the behaviour with consecutive spaces in a mix
1296                             // of pre and non-pre text nodes has not been tested,
1297                             // and what we do here might be wrong.
1298                             // Note: with a mix of normal spaces and non-break-spaces,
1299                             // we seem to behave just as Firefox.
1300                             // Note: for the empty lines or indentation we might add
1301                             // with 'txform->AddSourceLine(U" "...)', we need to
1302                             // provide LTEXT_FLAG_PREFORMATTED if we don't want them
1303                             // to be collapsed.
1304                             m_flags[pos] = LCHAR_IS_COLLAPSED_SPACE | LCHAR_ALLOW_WRAP_AFTER;
1305                             // m_text[pos] = '_'; // uncomment when debugging
1306                             // (We can replace the char to see it in printf() (m_text is not the
1307                             // text that is drawn, it's measured but we correct the measure
1308                             // by setting a zero width, it's just used here for analysis.
1309                             // But best to let it as-is except for debugging)
1310                         }
1311                         else {
1312                             last_non_collapsed_space_pos = pos;
1313                         }
1314                         // Locked spacing can be set on any space among contiguous spaces,
1315                         // but will be useful only on the non-collapsed one. We propagate
1316                         // it on all previous and following spaces so we don't have to
1317                         // redo-it after any BiDi re-ordering (not sure thus this will
1318                         // be alright...)
1319                         // (This is for now only used with FB2 run-in footnotes to ensure
1320                         // a constant width between the footnote number and its following
1321                         // text, but could be used with list item markers/numbers.)
1322                         if ( src->flags & LTEXT_LOCKED_SPACING )
1323                             is_locked_spacing = true;
1324                         if ( is_locked_spacing ) {
1325                             m_flags[pos] |= LCHAR_LOCKED_SPACING;
1326                             if ( last_non_collapsed_space_pos >= 0 ) { // update previous spaces
1327                                 for ( int j=last_non_collapsed_space_pos; j<pos; j++ ) {
1328                                     m_flags[j] |= LCHAR_LOCKED_SPACING;
1329                                 }
1330                             }
1331                         }
1332                     }
1333                     else {
1334                         // don't strip traling spaces if pre
1335                         last_non_space_pos = pos;
1336                         last_non_collapsed_space_pos = -1;
1337                         is_locked_spacing = false;
1338                         if ( !has_non_space ) {
1339                             if ( !is_space && c != UNICODE_NO_BREAK_SPACE ) {
1340                                 has_non_space = true;
1341                             }
1342                         }
1343                     }
1344                     prev_was_space = is_space || (c == '\n');
1345                         // We might meet '\n' in PRE text, which shouldn't make any space
1346                         // collapsed - except when "white-space: pre-line". So, have
1347                         // a space following a \n be allowed to collapse.
1348 
1349                     /* non-optimized implementation of "(a) A sequence of segment breaks
1350                      * and other white space between two Chinese, Japanese, or Yi characters
1351                      * collapses into nothing", not excluding Korea chars
1352                      * (to be tested/optimized by a CJK dev)
1353                     if ( ch == ' ' && k>0 && k<len-1
1354                             && (isCJKIdeograph(m_text[pos-1]) || isCJKIdeograph(m_text[pos+1])) ) {
1355                         m_flags[pos] = LCHAR_IS_COLLAPSED_SPACE | LCHAR_ALLOW_WRAP_AFTER;
1356                         // m_text[pos] = '_';
1357                     }
1358                     */
1359 
1360                     // if ( ch == '-' || ch == 0x2010 || ch == '.' || ch == '+' || ch==UNICODE_NO_BREAK_SPACE )
1361                     //     m_flags[pos] |= LCHAR_DEPRECATED_WRAP_AFTER;
1362 
1363                     // Some of these (in the 2 commented lines just above) will be set
1364                     // in lvfntman measureText().
1365                     // We might want to have them all done here, for clarity.
1366                     // We may also want to flags CJK chars to distinguish
1367                     // left|right punctuations, and those that can have their
1368                     // ideograph width expanded/collapsed if needed.
1369 
1370                     // We flag some chars as we want them to be ignored: some font
1371                     // would render a glyph (like "[PDI]") for some control chars
1372                     // that shouldn't be rendered (Harfbuzz would skip them by itself,
1373                     // but we also want to skip them when using FreeType directly).
1374                     // We don't skip them when filling these buffer, as some of them
1375                     // can give valuable information to the bidi algorithm.
1376                     // Ignore the unicode direction hints (that we may have added ourselves
1377                     // in lvrend.cpp when processing <bdi>, <bdo> and the dir= attribute).
1378                     // Try to balance the searches:
1379                     if ( c >= 0x202A ) {
1380                         if ( c <= 0x2069 ) {
1381                             if ( c <= 0x202E ) m_flags[pos] = LCHAR_IS_TO_IGNORE;      // 202A>202E
1382                             else if ( c >= 0x2066 ) m_flags[pos] = LCHAR_IS_TO_IGNORE; // 2066>2069
1383                         }
1384                     }
1385                     else if ( c <= 0x009F ) {
1386                         // Also ignore some ASCII and Unicode control chars
1387                         // in the ranges 00>1F and 7F>9F, except a few.
1388                         // (Some of these can be found in old documents or
1389                         // badly converted ones)
1390                         if ( c <= 0x001F ) {
1391                             // Let \t \n \r be (they might have already been
1392                             // expanded to spaces, converted or skipped)
1393                             if ( c != 0x000A && c!= 0x000D && c!= 0x0009 )
1394                                 m_flags[pos] = LCHAR_IS_TO_IGNORE; // 0000>001F except those above
1395                         }
1396                         else if ( c >= 0x007F ) {
1397                             m_flags[pos] = LCHAR_IS_TO_IGNORE;     // 007F>009F
1398                         }
1399                     }
1400                     // We might want to add some others when we happen to meet them.
1401                     // todo: see harfbuzz hb-unicode.hh is_default_ignorable() for how
1402                     // to do this kind of check fast
1403 
1404                     // Note: the overhead of using one of the following is quite minimal, so do if needed
1405                     /*
1406                     utf8proc_category_t uc = utf8proc_category(c);
1407                     if (uc == UTF8PROC_CATEGORY_CF)
1408                         printf("format char %x\n", c);
1409                     else if (uc == UTF8PROC_CATEGORY_CC)
1410                         printf("control char %x\n", c);
1411                     // Alternative, using HarfBuzz:
1412                     int uc = hb_unicode_general_category(hb_unicode_funcs_get_default(), c);
1413                     if (uc == HB_UNICODE_GENERAL_CATEGORY_FORMAT)
1414                         printf("format char %x\n", c);
1415                     else if (uc == HB_UNICODE_GENERAL_CATEGORY_CONTROL)
1416                         printf("control char %x\n", c);
1417                     */
1418 
1419                     #if (USE_LIBUNIBREAK==1)
1420                     if ( nowrap ) {
1421                         // If "white-space: nowrap", we flag everything but the last char
1422                         // (So, for a 1 char long text node, no flag.)
1423                         if ( k < len-1 ) {
1424                             m_flags[pos] |= LCHAR_DEPRECATED_WRAP_AFTER;
1425                         }
1426                     }
1427                     lChar32 ch = m_text[pos];
1428                     if ( src->lang_cfg->hasLBCharSubFunc() ) {
1429                         // Lang specific function may want to substitute char (for
1430                         // libunibreak only) to tweak line breaking around it
1431                         ch = src->lang_cfg->getLBCharSubFunc()(&lbCtx, m_text, pos, len-1 - k);
1432                     }
1433                     int brk = lb_process_next_char(&lbCtx, (utf32_t)ch);
1434                     if ( pos > 0 ) {
1435                         // printf("between <%c%c>: brk %d\n", m_text[pos-1], m_text[pos], brk);
1436                         // printf("between <%x.%x>: brk %d\n", m_text[pos-1], m_text[pos], brk);
1437                         if (brk != LINEBREAK_ALLOWBREAK) {
1438                             m_flags[pos-1] &= ~LCHAR_ALLOW_WRAP_AFTER;
1439                         }
1440                         else {
1441                             m_flags[pos-1] |= LCHAR_ALLOW_WRAP_AFTER;
1442                             // brk is set on the last space in a sequence of multiple spaces.
1443                             //   between <ne>: brk 2
1444                             //   between <ed>: brk 2
1445                             //   between <d.>: brk 2
1446                             //   between <. >: brk 2
1447                             //   between <  >: brk 2
1448                             //   between <  >: brk 2
1449                             //   between < T>: brk 1
1450                             //   between <Th>: brk 2
1451                             //   between <he>: brk 2
1452                             //   between <ey>: brk 2
1453                             //   between <y >: brk 2
1454                             //   between <  >: brk 2
1455                             //   between < h>: brk 1
1456                             //   between <ha>: brk 2
1457                             //   between <av>: brk 2
1458                             //   between <ve>: brk 2
1459                             //   between <e >: brk 2
1460                             //   between < a>: brk 1
1461                             //   between <as>: brk 2
1462                             // Given the algorithm described in addLine(), we want the break
1463                             // after the first space, so the following collapsed spaces can
1464                             // be at start of next line where they will be ignored.
1465                             // (Not certain this is really needed, but let's do it, as the
1466                             // code expecting that has been quite well tested and fixed over
1467                             // the months, so let's avoid adding uncertainty.)
1468                             if ( m_flags[pos-1] & LCHAR_IS_COLLAPSED_SPACE ) {
1469                                 // We have spaces before, and if we are allowed to break,
1470                                 // the break is allowed on all preceeding spaces.
1471                                 int j = pos-2;
1472                                 while ( j >= 0 && ( (m_flags[j] & LCHAR_IS_COLLAPSED_SPACE) || m_text[j] == ' ' ) ) {
1473                                     m_flags[j] |= LCHAR_ALLOW_WRAP_AFTER;
1474                                     j--;
1475                                 }
1476                             }
1477                         }
1478                     }
1479                     #endif
1480 
1481                     #if (USE_FRIBIDI==1)
1482                         // Also try to detect if we have RTL chars, so that if we don't have any,
1483                         // we don't need to invoke expensive fribidi processing below (which
1484                         // may add a 50% duration increase to the text rendering phase).
1485                         // Looking at fribidi/lib/bidi-type.tab.i and its rules for tagging
1486                         // a char as RTL, only the following ranges will trigger it:
1487                         //   0590>08FF      Hebrew, Arabic, Syriac, Thaana, Nko, Samaritan...
1488                         //   200F 202B      Right-To-Left mark/embedding control chars
1489                         //   202E 2067      Right-To-Left override/isolate control chars
1490                         //   FB1D>FDFF      Hebrew and Arabic presentation forms
1491                         //   FE70>FEFF      Arabic presentation forms
1492                         //   10800>10FFF    Other rare scripts possibly RTL
1493                         //   1E800>1EEBB    Other rare scripts possibly RTL
1494                         // (There may be LTR chars in these ranges, but it's fine, we'll
1495                         // invoke fribidi, which will say there's no bidi.)
1496                         if ( !has_rtl ) {
1497                             // Try to balance the searches
1498                             if ( c >= 0x0590 ) {
1499                                 if ( c <= 0x2067 ) {
1500                                     if ( c <= 0x08FF ) has_rtl = true;
1501                                     else if ( c >= 0x200F ) {
1502                                         if ( c == 0x200F || c == 0x202B || c == 0x202E || c == 0x2067 ) has_rtl = true;
1503                                     }
1504                                 }
1505                                 else if ( c >= 0xFB1D ) {
1506                                     if ( c <= 0xFDFF ) has_rtl = true;
1507                                     else if ( c <= 0xFEFF ) {
1508                                         if ( c >= 0xFE70) has_rtl = true;
1509                                     }
1510                                     else if ( c <= 0x1EEBB ) {
1511                                         if (c >= 0x1E800) has_rtl = true;
1512                                         else if ( c <= 0x10FFF && c >= 0x10800 ) has_rtl = true;
1513                                     }
1514                                 }
1515                             }
1516                         }
1517                     #endif
1518 
1519                     m_charindex[pos] = k;
1520                     m_srcs[pos] = src;
1521                     pos++;
1522                 }
1523             }
1524             prev_src = src;
1525         }
1526         // Also flag as collapsed all spaces at the end of text
1527         pos = pos-1; // get back last pos++
1528         if (last_non_space_pos >= 0 && last_non_space_pos+1 <= pos) {
1529             for ( int k=last_non_space_pos+1; k<=pos; k++ ) {
1530                 if (m_flags[k] == LCHAR_IS_OBJECT)
1531                     continue; // don't unflag floats
1532                 m_flags[k] = LCHAR_IS_COLLAPSED_SPACE | LCHAR_ALLOW_WRAP_AFTER;
1533                 // m_text[k] = '='; // uncomment when debugging
1534             }
1535         }
1536         TR("%s", LCSTR(lString32(m_text, m_length)));
1537 
1538         // Whether any "-cr-hint: strut-confined" should be applied: only when
1539         // we have non-space-only text in the paragraph - standalone images
1540         // possibly separated by spaces don't need to be reduced in size.
1541         // And only when we actually have a strut set (list item markers
1542         // with "list-style-position: outside" don't have any set).
1543         m_allow_strut_confining = has_non_space && m_pbuffer->strut_height > 0;
1544 
1545         #if (USE_FRIBIDI==1)
1546         if ( has_rtl ) {
1547             // Trust the direction determined by renderBlockElementEnhanced() from the
1548             // upper nodes dir= attributes or CSS style->direction.
1549             if ( m_specified_para_dir == REND_DIRECTION_RTL ) {
1550                 m_para_bidi_type = FRIBIDI_PAR_RTL; // Strong RTL
1551             }
1552             else if ( m_specified_para_dir == REND_DIRECTION_LTR ) {
1553                 m_para_bidi_type = FRIBIDI_PAR_LTR; // Strong LTR
1554             }
1555             else { // REND_DIRECTION_UNSET
1556                 m_para_bidi_type = FRIBIDI_PAR_WLTR; // Weak LTR (= auto with a bias toward LTR)
1557             }
1558 
1559             // Compute bidi levels
1560             fribidi_get_bidi_types( (const FriBidiChar*)m_text, m_length, m_bidi_ctypes);
1561             fribidi_get_bracket_types( (const FriBidiChar*)m_text, m_length, m_bidi_ctypes, m_bidi_btypes);
1562             int max_level = fribidi_get_par_embedding_levels_ex(m_bidi_ctypes, m_bidi_btypes,
1563                                 m_length, (FriBidiParType*)&m_para_bidi_type, m_bidi_levels);
1564             // If computed max level == 1, we are in plain and only LTR, so no need for
1565             // more bidi work later.
1566             if ( max_level > 1 ) {
1567                 m_has_bidi = true;
1568             }
1569             if ( m_para_bidi_type == FRIBIDI_PAR_RTL || m_para_bidi_type == FRIBIDI_PAR_WRTL )
1570                 m_para_dir_is_rtl = true;
1571 
1572             // fribidi_shape(FRIBIDI_FLAG_SHAPE_MIRRORING, m_bidi_levels, m_length, NULL, (FriBidiChar*)m_text);
1573             // No use mirroring at this point I think, as it's not the text that will
1574             // be drawn. Hoping parens & al. have the same widths when mirrored.
1575             // We'll do that in addLine() when processing words when meeting
1576             // a rtl one, with fribidi_get_mirror_char().
1577 
1578             /* For debugging:
1579                 printf("par_type %d , max_level %d\n", m_para_bidi_type, max_level);
1580                 for (int i=0; i<m_length; i++)
1581                     printf("%d", m_bidi_levels[i]);
1582                 printf("\n");
1583             // We get:
1584             //   pure LTR: par_type 272 , max_level 1  0000000000
1585             //   pure RTL: par_type 273 , max_level 2  1111111111
1586             //   LTR at start with later some RTL: par_type 272 , max_level 2  00000111111000000000000000
1587             //   RTL at start with later some LTR: par_type 273 , max_level 3  1111111111112222222222222221
1588             */
1589         }
1590         #endif
1591     }
1592 
resizeImage(int & width,int & height,int maxw,int maxh,bool isInline)1593     void resizeImage( int & width, int & height, int maxw, int maxh, bool isInline )
1594     {
1595         //CRLog::trace("Resize image (%dx%d) max %dx%d %s", width, height, maxw, maxh, isInline ? "inline" : "block");
1596         bool arbitraryImageScaling = false;
1597         int maxScale = 1;
1598         bool zoomIn = width<maxw && height<maxh;
1599         if ( isInline ) {
1600             if ( zoomIn ) {
1601                 if ( m_pbuffer->img_zoom_in_mode_inline==0 )
1602                     return; // no zoom
1603                 arbitraryImageScaling = m_pbuffer->img_zoom_in_mode_inline == 2;
1604                 // maxScale = m_pbuffer->img_zoom_in_scale_inline;
1605             } else {
1606 //                if ( m_pbuffer->img_zoom_out_mode_inline==0 )
1607 //                    return; // no zoom
1608                 arbitraryImageScaling = m_pbuffer->img_zoom_out_mode_inline == 2;
1609                 // maxScale = m_pbuffer->img_zoom_out_scale_inline;
1610             }
1611         } else {
1612             if ( zoomIn ) {
1613                 if ( m_pbuffer->img_zoom_in_mode_block==0 )
1614                     return; // no zoom
1615                 arbitraryImageScaling = m_pbuffer->img_zoom_in_mode_block == 2;
1616                 // maxScale = m_pbuffer->img_zoom_in_scale_block;
1617             } else {
1618 //                if ( m_pbuffer->img_zoom_out_mode_block==0 )
1619 //                    return; // no zoom
1620                 arbitraryImageScaling = m_pbuffer->img_zoom_out_mode_block == 2;
1621                 // maxScale = m_pbuffer->img_zoom_out_scale_block;
1622             }
1623         }
1624         resizeImage( width, height, maxw, maxh, arbitraryImageScaling, maxScale );
1625     }
1626 
resizeImage(int & width,int & height,int maxw,int maxh,bool arbitraryImageScaling,int maxScaleMult)1627     void resizeImage( int & width, int & height, int maxw, int maxh, bool arbitraryImageScaling, int maxScaleMult )
1628     {
1629         if (width == 0 || height == 0) {
1630             // Avoid a floating point exception (division by zero) crash.
1631             printf("CRE WARNING: resizeImage(width=0 or height=0)\n");
1632             return;
1633         }
1634         if (width < 0 || height < 0) {
1635             // Avoid invalid resizing if we are provided with negative values
1636             printf("CRE WARNING: resizeImage(width<0 or height<0)\n");
1637             return;
1638         }
1639         if (maxw < 0 || maxh < 0) {
1640             // Avoid invalid resizing if we are provided with negative max values
1641             printf("CRE WARNING: resizeImage(maxw<0 or maxh<0)\n");
1642             return;
1643         }
1644         //CRLog::trace("Resize image (%dx%d) max %dx%d %s  *%d", width, height, maxw, maxh, arbitraryImageScaling ? "arbitrary" : "integer", maxScaleMult);
1645         if ( maxScaleMult<1 ) maxScaleMult = 1;
1646         if ( arbitraryImageScaling ) {
1647             int pscale_x = 1000 * maxw / width;
1648             int pscale_y = 1000 * maxh / height;
1649             int pscale = pscale_x < pscale_y ? pscale_x : pscale_y;
1650             int maxscale = maxScaleMult * 1000;
1651             if ( pscale>maxscale )
1652                 pscale = maxscale;
1653             height = height * pscale / 1000;
1654             width = width * pscale / 1000;
1655         } else {
1656             if (maxw == 0 || maxh == 0) {
1657                 // Avoid a floating point exception (division by zero) crash.
1658                 printf("CRE WARNING: resizeImage(maxw=0 or maxh=0)\n");
1659                 return;
1660             }
1661             int scale_div = 1;
1662             int scale_mul = 1;
1663             int div_x = (width * 1000 / maxw);
1664             int div_y = (height * 1000 / maxh);
1665             if ( maxScaleMult>=3 && height*3 < maxh - 20
1666                     && width*3 < maxw - 20 ) {
1667                 scale_mul = 3;
1668             } else if ( maxScaleMult>=2 && height * 2 < maxh - 20
1669                     && width * 2 < maxw - 20 ) {
1670                 scale_mul = 2;
1671             } else if (div_x>1 || div_y>1) {
1672                 if (div_x>div_y)
1673                     scale_div = div_x;
1674                 else
1675                     scale_div = div_y;
1676             }
1677             height = height * 1000 * scale_mul / scale_div;
1678             width = width * 1000 * scale_mul / scale_div;
1679         }
1680     }
1681 
1682     /// measure word
measureWord(formatted_word_t * word,int & width)1683     bool measureWord(formatted_word_t * word, int & width)
1684     {
1685         src_text_fragment_t * srcline = &m_pbuffer->srctext[word->src_text_index];
1686         LVFont * srcfont= (LVFont *) srcline->t.font;
1687         const lChar32 * str = srcline->t.text + word->t.start;
1688         // Avoid malloc by using static buffers. Returns false if word too long.
1689         #define MAX_MEASURED_WORD_SIZE 127
1690         static lUInt16 widths[MAX_MEASURED_WORD_SIZE+1];
1691         static lUInt8 flags[MAX_MEASURED_WORD_SIZE+1];
1692         if (word->t.len > MAX_MEASURED_WORD_SIZE)
1693             return false;
1694         lUInt32 hints = WORD_FLAGS_TO_FNT_FLAGS(word->flags);
1695         srcfont->measureText(
1696                 str,
1697                 word->t.len,
1698                 widths, flags,
1699                 0x7FFF,
1700                 '?',
1701                 srcline->lang_cfg,
1702                 srcline->letter_spacing,
1703                 false,
1704                 hints );
1705         width = widths[word->t.len-1];
1706         return true;
1707     }
1708 
1709     /// measure text of current paragraph
measureText()1710     void measureText()
1711     {
1712         int i;
1713         src_text_fragment_t * lastSrc = NULL;
1714         LVFont * lastFont = NULL;
1715         lInt16 lastLetterSpacing = 0;
1716         int start = 0;
1717         int lastWidth = 0;
1718         #define MAX_TEXT_CHUNK_SIZE 4096
1719         static lUInt16 widths[MAX_TEXT_CHUNK_SIZE+1];
1720         static lUInt8 flags[MAX_TEXT_CHUNK_SIZE+1];
1721         int tabIndex = -1;
1722         #if (USE_FRIBIDI==1)
1723             FriBidiLevel lastBidiLevel = 0;
1724             FriBidiLevel newBidiLevel;
1725         #endif
1726         #if (USE_HARFBUZZ==1)
1727             bool checkIfHarfbuzz = true;
1728             bool usingHarfbuzz = false;
1729             // Unicode script change (note: hb_script_t is uint32_t)
1730             lUInt32 prevScript = HB_SCRIPT_COMMON;
1731             hb_unicode_funcs_t* _hb_unicode_funcs = hb_unicode_funcs_get_default();
1732             bool prevSpecificScriptIsCursive = false;
1733         #endif
1734         int first_word_len = 0; // set to -1 when done with it (only used to check
1735                                 // for single char first word, see below)
1736         for ( i=0; i<=m_length; i++ ) {
1737             LVFont * newFont = NULL;
1738             lInt16 newLetterSpacing = 0;
1739             src_text_fragment_t * newSrc = NULL;
1740             if ( tabIndex<0 && m_text[i]=='\t' ) {
1741                 tabIndex = i;
1742             }
1743             bool isObject = false;
1744             bool prevCharIsObject = false;
1745             if ( i<m_length ) {
1746                 newSrc = m_srcs[i];
1747                 isObject = m_flags[i] & LCHAR_IS_OBJECT; // image, float or inline box
1748                 newFont = isObject ? NULL : (LVFont *)newSrc->t.font;
1749                 newLetterSpacing = newSrc->letter_spacing; // 0 for objects
1750                 #if (USE_HARFBUZZ==1)
1751                     // Check if we are using Harfbuzz kerning with the first font met
1752                     if ( checkIfHarfbuzz && newFont ) {
1753                         if ( newFont->getShapingMode() == SHAPING_MODE_HARFBUZZ ) {
1754                             usingHarfbuzz = true;
1755                         }
1756                         checkIfHarfbuzz = false;
1757                     }
1758                 #endif
1759             }
1760             if (i > 0)
1761                 prevCharIsObject = m_flags[i-1] & LCHAR_IS_OBJECT; // image, float or inline box
1762             if ( !lastFont )
1763                 lastFont = newFont;
1764             if (i == 0) {
1765                 lastSrc = newSrc;
1766                 lastLetterSpacing = newLetterSpacing;
1767             }
1768             bool srcChangedAndUsingHarfbuzz = false;
1769             #if (USE_HARFBUZZ==1)
1770                 // When 2 contiguous text nodes have the same font, we measure the
1771                 // whole combined segment. But when making words, we split on
1772                 // text node change. When using full harfbuzz, we don't want it
1773                 // to make ligatures at such text nodes boundaries: we need to
1774                 // measure each text node individually.
1775                 if ( usingHarfbuzz && newSrc != lastSrc && newFont && newFont == lastFont ) {
1776                     srcChangedAndUsingHarfbuzz = true;
1777                 }
1778             #endif
1779             bool bidiLevelChanged = false;
1780             int lastDirection = 0; // unknown
1781             #if (USE_FRIBIDI==1)
1782                 lastDirection = 1; // direction known: LTR if no bidi found
1783                 if (m_has_bidi) {
1784                     newBidiLevel = m_bidi_levels[i];
1785                     if (i == 0)
1786                         lastBidiLevel = newBidiLevel;
1787                     else if ( newBidiLevel != lastBidiLevel )
1788                         bidiLevelChanged = true;
1789                     if ( FRIBIDI_LEVEL_IS_RTL(lastBidiLevel) )
1790                         lastDirection = -1; // RTL
1791                 }
1792             #endif
1793             // When measuring with Harfbuzz, we should also split on Unicode script change,
1794             // even in a same bidi level (mixed hebrew and arabic in a single text node
1795             // should be handled as multiple segments, or Harfbuzz would shape the whole
1796             // text with the script of the first kind of text it meets).
1797             bool scriptChanged = false;
1798             #if (USE_HARFBUZZ==1)
1799                 if ( usingHarfbuzz && !isObject ) {
1800                     // While we have the hb_script here, we'll update m_flags[i]
1801                     // with LCHAR_LOCKED_SPACING if the script is cursive
1802                     hb_script_t script = hb_unicode_script(_hb_unicode_funcs, m_text[i]);
1803                     if ( script != HB_SCRIPT_COMMON && script != HB_SCRIPT_INHERITED && script != HB_SCRIPT_UNKNOWN ) {
1804                         if ( script != prevScript ) {
1805                             if ( prevScript != HB_SCRIPT_COMMON ) {
1806                                 // We previously met a real script, and we're meeting a new one
1807                                 scriptChanged = true;
1808                                 m_has_multiple_scripts = true;
1809                                 // When only a single script found in a paragraph, we don't need
1810                                 // to do that same kind of work in AddLine() to split on script
1811                                 // change, as there's only one.
1812                             }
1813                             prevSpecificScriptIsCursive = isHBScriptCursive(script);
1814                         }
1815                         prevScript = script; // Real script met
1816                         if ( prevSpecificScriptIsCursive )
1817                             m_flags[i] |= LCHAR_LOCKED_SPACING;
1818                     }
1819                     // else: assume HB_SCRIPT_COMMON/INHERITED/UNKNOWN, even among cursive glyphs,
1820                     // can be letter_space'd for justification.
1821                 }
1822             #endif
1823             // Note: some additional tweaks (like disabling letter-spacing when
1824             // a cursive script is detected) are done in measureText() and drawTextString().
1825 
1826             // Make a new segment to measure when any property changes from previous char
1827             if ( i>start && (   newFont != lastFont
1828                              || newLetterSpacing != lastLetterSpacing
1829                              || srcChangedAndUsingHarfbuzz
1830                              || bidiLevelChanged
1831                              || scriptChanged
1832                              || isObject
1833                              || prevCharIsObject
1834                              || i >= start+MAX_TEXT_CHUNK_SIZE
1835                              || (m_flags[i] & LCHAR_IS_TO_IGNORE)
1836                              || (m_flags[i] & LCHAR_MANDATORY_NEWLINE) ) ) {
1837                 // measure start..i-1 chars
1838                 bool measuring_object = m_flags[i-1] & LCHAR_IS_OBJECT;
1839                 if ( !measuring_object && lastFont ) { // text node
1840                         // In our context, we'll always have a non-NULL lastFont, but
1841                         // have it checked explicitely to avoid clang-tidy warning.
1842                     // measure text
1843                     // Note: we provide text in the logical order, and measureText()
1844                     // will apply kerning in that order, which might be wrong if some
1845                     // text fragment happens to be RTL (except for Harfbuzz which will
1846                     // do the right thing).
1847                     int len = i - start;
1848                     // Provide direction and start/end of paragraph hints, for Harfbuzz
1849                     lUInt32 hints = 0;
1850                     if ( start == 0 ) hints |= LFNT_HINT_BEGINS_PARAGRAPH;
1851                     if ( i == m_length ) hints |= LFNT_HINT_ENDS_PARAGRAPH;
1852                     if ( lastDirection ) {
1853                         hints |= LFNT_HINT_DIRECTION_KNOWN;
1854                         if ( lastDirection < 0 )
1855                             hints |= LFNT_HINT_DIRECTION_IS_RTL;
1856                     }
1857                     int chars_measured = lastFont->measureText(
1858                             m_text + start,
1859                             len,
1860                             widths, flags,
1861                             0x7FFF, //pbuffer->width,
1862                             '?',
1863                             lastSrc->lang_cfg,
1864                             lastLetterSpacing,
1865                             false,
1866                             hints
1867                             );
1868                     if ( chars_measured<len ) {
1869                         // printf("######### chars_measured %d < %d\n", chars_measured, len);
1870                         // too long line
1871                         int newlen = chars_measured;
1872                         i = start + newlen;
1873                         len = newlen;
1874                         // As we're going to continue measuring this text node,
1875                         // reset newFont (the font of the next text node), so
1876                         // it does not replace lastFont at the end of the loop.
1877                         newFont = NULL;
1878                         // If we didn't measure the full text, src, letter spacing and
1879                         // bidi level are to stay the same
1880                         newSrc = lastSrc;
1881                         newLetterSpacing = lastLetterSpacing;
1882                         #if (USE_FRIBIDI==1)
1883                             if (m_has_bidi)
1884                                 newBidiLevel = lastBidiLevel;
1885                         #endif
1886                     }
1887 
1888                     // Deal with chars flagged as collapsed spaces:
1889                     // make each zero-width, so they are not accounted
1890                     // in the words width and position calculation.
1891                     // Note: widths[] (obtained from lastFont->measureText)
1892                     // and the m_widths[] we build have cumulative widths
1893                     // (width[k] is the length of the rendered text from
1894                     // chars 0 to k included).
1895                     // Also handle space width scaling if requested.
1896                     bool scale_space_width = m_pbuffer->space_width_scale_percent != 100;
1897                     if ( scale_space_width && lastSrc ) { // but not if <pre>
1898                         if ( lastSrc->flags & LTEXT_FLAG_PREFORMATTED )
1899                             scale_space_width = false;
1900                     }
1901                     int cumulative_width_removed = 0;
1902                     int prev_orig_measured_width = 0;
1903                     int char_width = 0; // current single char width
1904                     for ( int k=0; k<len; k++ ) {
1905                         // printf("%c %x f=%d w=%d\n", m_text[start+k], m_text[start+k], flags[k], widths[k]);
1906                         char_width = widths[k] - prev_orig_measured_width;
1907                         prev_orig_measured_width = widths[k];
1908                         if ( m_flags[start + k] & LCHAR_IS_COLLAPSED_SPACE) {
1909                             cumulative_width_removed += char_width;
1910                             // make it zero width: same cumulative width as previous char's
1911                             widths[k] = k>0 ? widths[k-1] : 0;
1912                             flags[k] = 0; // remove SPACE/WRAP/... flags
1913                         }
1914                         else if ( flags[k] & LCHAR_IS_SPACE ) {
1915                             // LCHAR_IS_SPACE has just been guessed, and is available in flags[], not yet in m_flags[]
1916                             if ( scale_space_width ) {
1917                                 int scaled_width = char_width * m_pbuffer->space_width_scale_percent / 100;
1918                                 // We can just account for the space reduction (or increase) in cumulative_width_removed
1919                                 cumulative_width_removed += char_width - scaled_width;
1920                             }
1921                             // remove, from the measured cumulative width, what we just, and previously, removed
1922                             widths[k] -= cumulative_width_removed;
1923                             if ( first_word_len >= 0 ) { // This is the space (or nbsp) after first word
1924                                 if ( first_word_len == 1 ) { // Previous word is a single char
1925                                     if ( k > 0 && isLeftPunctuation(m_text[k-1]) ) {
1926                                         // This space follows one of the common opening quotation marks or
1927                                         // dashes used to introduce a quotation or a part of a dialog:
1928                                         // https://en.wikipedia.org/wiki/Quotation_mark
1929                                         // Don't allow this space to change width, so text justification
1930                                         // doesn't move away next word, so that other similar paragraphs
1931                                         // get their real first words vertically aligned.
1932                                         flags[k] |= LCHAR_LOCKED_SPACING;
1933                                         // Also prevent that quotation mark or dash from getting
1934                                         // additional letter spacing for justification
1935                                         flags[k-1] |= LCHAR_LOCKED_SPACING;
1936                                         //
1937                                         // Note: we do this check here, with the text still in logical
1938                                         // order, so we get that working with RTL text too (where, in
1939                                         // visual order, we'll have lost track of which word is the
1940                                         // first word - untested though).
1941                                     }
1942                                 }
1943                                 first_word_len = -1; // We don't need to deal with this anymore
1944                             }
1945                         }
1946                         else {
1947                             // remove, from the measured cumulative width, what we previously removed
1948                             widths[k] -= cumulative_width_removed;
1949                             if ( first_word_len >= 0 ) {
1950                                 // Not a collapsed space and not a space: this will be part of first word
1951                                 first_word_len++;
1952                             }
1953                         }
1954                         m_widths[start + k] = lastWidth + widths[k];
1955                         #if (USE_LIBUNIBREAK==1)
1956                         // Reset these flags if lastFont->measureText() has set them, as we trust
1957                         // only libunibreak (which is more clever with hyphens, that our code flag
1958                         // with LCHAR_DEPRECATED_WRAP_AFTER).
1959                         flags[k] &= ~(LCHAR_ALLOW_WRAP_AFTER|LCHAR_DEPRECATED_WRAP_AFTER);
1960                         #endif
1961                         m_flags[start + k] |= flags[k];
1962                         // printf("  => w=%d\n", m_widths[start + k]);
1963                     }
1964 
1965                     /* If the following was ever needed, it was wrong to do it at this step
1966                      * of measureText(), as we then get additional fixed spacing that we may
1967                      * not need in some contexts. So don't do it: browsers do not.
1968                      * We'll handle that if LTEXT_FIT_GLYPHS when positioning words
1969                      * (not implemented for now.)
1970 
1971                     // This checks whether we're the last char of a text node, and if
1972                     // this node is italic, it adds the glyph italic overflow to the
1973                     // last char width.
1974                     // This might not be needed if the next text node is also italic,
1975                     // or if there is a space at start of next text node, and it might
1976                     // be needed at start of node too as the italic can overflow there too.
1977                     // It might also confuse our adjustment at start or end of line.
1978                     int dw = getAdditionalCharWidth(i-1, m_length);
1979                     if ( lastDirection < 0 ) // ignore it for RTL (as right side bearing is measured)
1980                         dw = 0;
1981                     if ( dw ) {
1982                         m_widths[i-1] += dw;
1983                         lastWidth += dw;
1984                     }
1985                     */
1986 
1987                     lastWidth += widths[len-1]; //len<m_length ? len : len-1];
1988                 }
1989                 else if ( measuring_object ) {
1990                     // We have start=i-1 and m_flags[i-1] & LCHAR_IS_OBJECT
1991                     if (start != i-1) {
1992                         crFatalError(126, "LCHAR_IS_OBJECT with start!=i-1");
1993                     }
1994                     if ( m_charindex[start] == FLOAT_CHAR_INDEX ) {
1995                         // Embedded floats can have a zero width in this process of
1996                         // text measurement. They'll be measured when positioned.
1997                         m_widths[start] = lastWidth;
1998                     }
1999                     else if ( m_charindex[start] == INLINEBOX_CHAR_INDEX ) {
2000                         // Render this inlineBox to get its width, similarly to how we
2001                         // render floats in addFloat(). See there for more comments.
2002                         src_text_fragment_t * src = m_srcs[start];
2003                         ldomNode * node = (ldomNode *) src->object;
2004                         bool already_rendered = false;
2005                         { // in its own scope, so this RenderRectAccessor is forgotten when left
2006                             RenderRectAccessor fmt( node );
2007                             if ( RENDER_RECT_HAS_FLAG(fmt, BOX_IS_RENDERED) ) {
2008                                 already_rendered = true;
2009                             }
2010                         }
2011                         if ( !already_rendered ) {
2012                             LVRendPageContext alt_context( NULL, m_pbuffer->page_height, false );
2013                             // inline-block and inline-table have a baseline, that renderBlockElement()
2014                             // will compute and give us back.
2015                             int baseline = REQ_BASELINE_FOR_INLINE_BLOCK;
2016                             if ( node->getChildNode(0)->getStyle()->display == css_d_inline_table ) {
2017                                 baseline = REQ_BASELINE_FOR_TABLE;
2018                             }
2019                             else if ( node->getParentNode()->getStyle()->display == css_d_ruby
2020                                         && node->getChildNode(0)->getRendMethod() == erm_table ) {
2021                                 // Ruby sub-tables don't carry css_d_inline_table, so check rend method;
2022                                 // (a table could be in a "display: inline-block" container, and it
2023                                 // would be erm_table - but we should still use REQ_BASELINE_FOR_INLINE_BLOCK,
2024                                 // so check that the parent is really css_d_ruby)
2025                                 baseline = REQ_BASELINE_FOR_TABLE;
2026                             }
2027                             // We render the inlineBox with the specified direction (from upper dir=), even
2028                             // if UNSET (and not with the direction determined by fribidi from the text).
2029                             // We provide 0,0 as the usable left/right overflows, so no glyph/hanging
2030                             // punctuation will leak outside the inlineBox (we might provide the widths
2031                             // of any blank space on either side, but here is too early as it might be
2032                             // shuffled by BiDi reordering.)
2033                             renderBlockElement( alt_context, node, 0, 0, m_pbuffer->width, 0, 0, m_specified_para_dir, &baseline );
2034                             // (renderBlockElement will ensure style->height if requested.)
2035 
2036                             // Note: this inline box we just rendered can have some overflow
2037                             // (i.e. if it has some negative margins). As these overflows are
2038                             // usually small, we'll handle that in LFormattedText::Draw() by
2039                             // just dropping the page rect clip when drawing it, so that the
2040                             // overflowing content might be drawn in the page margins.
2041                             // (Otherwise, we'd need to upgrade our frmline to store a line
2042                             // top and bottom overflows, use LTEXT_LINE_SPLIT_AVOID_BEFORE/AFTER
2043                             // to stick that line to previous or next, with the risk of bringing
2044                             // a large top margin to top of page just to display that small
2045                             // overflow in it...)
2046 
2047                             RenderRectAccessor fmt( node );
2048                             fmt.setBaseline(baseline);
2049                             RENDER_RECT_SET_FLAG(fmt, BOX_IS_RENDERED);
2050                             // We'll have alignLine() do the fmt.setX/Y once it is fully positioned
2051 
2052                             // We'd like to gather footnote links accumulated by alt_context
2053                             // (we do that for floats), but it's quite more complicated:
2054                             // we have them here too early, and we would need to associate
2055                             // the links to this "char" index, so needing in LVFormatter
2056                             // something like:
2057                             //   LVHashTable<lUInt32, lString32Collection> m_inlinebox_links
2058                             // When adding this inlineBox to a frmline, we could then get back
2059                             // the links, and associate them to the frmline (so, needing a
2060                             // new field holding a lString32Collection, which would hold
2061                             // all the links in all the inlineBoxs part of that line).
2062                             // Finally, in renderBlockElementEnhanced, when adding
2063                             // links for words, we'd also need to add the one found
2064                             // in the frmline's lString32Collection.
2065                             // A bit complicated, for a probably very rare case, so
2066                             // let's just forget it and not have footnotes from inlineBox
2067                             // among our in-page footnotes...
2068                         }
2069                         // (renderBlockElement() above may update our RenderRectAccessor(),
2070                         // so (re)get it only now)
2071                         RenderRectAccessor fmt( node );
2072                         int width = fmt.getWidth();
2073                         int height = fmt.getHeight();
2074                         int baseline = fmt.getBaseline();
2075                         m_srcs[start]->o.width = width;
2076                         m_srcs[start]->o.height = height;
2077                         m_srcs[start]->o.baseline = baseline;
2078                         lastWidth += width;
2079                         m_widths[start] = lastWidth;
2080                     }
2081                     else {
2082                         // measure image
2083                         // assume i==start+1
2084                         int width = m_srcs[start]->o.width;
2085                         int height = m_srcs[start]->o.height;
2086                         // Negative width and height mean the value is a % (of our final block width)
2087                         width = width<0 ? (-width * (m_pbuffer->width) / 100) : width;
2088                         height = height<0 ? (-height * (m_pbuffer->width) / 100) : height;
2089                         /*
2090                         printf("measureText img: o.w=%d o.h=%d > w=%d h=%d (max %d %d is_inline=%d) %s\n",
2091                             m_srcs[start]->o.width, m_srcs[start]->o.height, width, height,
2092                             m_pbuffer->width, m_max_img_height, m_length>1,
2093                             UnicodeToLocal(ldomXPointer((ldomNode*)m_srcs[start]->object, 0).toString()).c_str());
2094                         */
2095                         resizeImage(width, height, m_pbuffer->width, m_max_img_height, m_length>1);
2096                         if ( (m_srcs[start]->flags & LTEXT_STRUT_CONFINED) && m_allow_strut_confining ) {
2097                             // Text with "-cr-hint: strut-confined" might just be vertically shifted,
2098                             // but won't change widths. But images who will change height must also
2099                             // have their width reduced to keep their aspect ratio.
2100                             if ( height > m_pbuffer->strut_height ) {
2101                                 // Don't make image taller than initial strut height, so adjust width
2102                                 // to keep aspect ratio.
2103                                 width = width * m_pbuffer->strut_height / height;
2104                                 height = m_pbuffer->strut_height;
2105                             }
2106                         }
2107                         // Store the possibly resized dimensions back, so we don't have
2108                         // to recompute them later
2109                         m_srcs[start]->o.width = width;
2110                         m_srcs[start]->o.height = height;
2111                         lastWidth += width;
2112                         m_widths[start] = lastWidth;
2113                     }
2114                 }
2115                 else {
2116                     // Should not happen
2117                     crFatalError(127, "Attempting to measure Text node without a font");
2118                 }
2119                 start = i;
2120                 #if (USE_HARFBUZZ==1)
2121                     prevScript = HB_SCRIPT_COMMON; // Reset as next segment can start with any script
2122                 #endif
2123             }
2124             // Skip measuring chars to ignore.
2125             if ( m_flags[i] & LCHAR_IS_TO_IGNORE) {
2126                 m_widths[start] = lastWidth;
2127                 start++;
2128                 // This whole function here is very convoluted, it could really
2129                 // be made simpler and be more readable.
2130                 // This simple test here feels out of place, but it seems to
2131                 // work in the various cases (ignorable char at start, standalone,
2132                 // multiples, or at end).
2133             }
2134             //
2135             if (newFont)
2136                 lastFont = newFont;
2137             lastSrc = newSrc;
2138             lastLetterSpacing = newLetterSpacing;
2139             #if (USE_FRIBIDI==1)
2140                 if (m_has_bidi)
2141                     lastBidiLevel = newBidiLevel;
2142             #endif
2143         }
2144         if ( tabIndex >= 0 && m_srcs[0]->indent < 0) {
2145             // Used by obsolete rendering of css_d_list_item_legacy when css_lsp_outside,
2146             // where the marker width is provided as negative/hanging indent.
2147             int tabPosition = -m_srcs[0]->indent; // has been set to marker_width
2148             if ( tabPosition>0 && tabPosition > m_widths[tabIndex] ) {
2149                 int dx = tabPosition - m_widths[tabIndex];
2150                 for ( i=tabIndex; i<m_length; i++ )
2151                     m_widths[i] += dx;
2152             }
2153         }
2154 //        // debug dump
2155 //        lString32 buf;
2156 //        for ( int i=0; i<m_length; i++ ) {
2157 //            buf << U" " << lChar32(m_text[i]) << U" " << lString32::itoa(m_widths[i]);
2158 //        }
2159 //        TR("%s", LCSTR(buf));
2160     }
2161 
2162 #define MIN_WORD_LEN_TO_HYPHENATE 4
2163 #define MAX_WORD_SIZE 64
2164 
2165     /// align line: add or reduce widths of spaces to achieve desired text alignment
alignLine(formatted_line_t * frmline,int alignment,int rightIndent=0,bool hasInlineBoxes=false)2166     void alignLine( formatted_line_t * frmline, int alignment, int rightIndent=0, bool hasInlineBoxes=false ) {
2167         // Fetch current line x offset and max width
2168         int x_offset;
2169         int width = getAvailableWidthAtY(m_y, m_pbuffer->strut_height, x_offset);
2170         // printf("alignLine %d+%d < %d\n", frmline->x, frmline->width, width);
2171 
2172         // (frmline->x may be different from x_offset when non-zero text-indent)
2173         int usable_width = width - (frmline->x - x_offset) - rightIndent; // remove both sides indents
2174         int extra_width = usable_width - frmline->width;
2175 
2176         // We might want to prevent this when LangCfg == "de" (in german,
2177         // letter spacing is used for emphasis)
2178         if ( m_pbuffer->max_added_letter_spacing_percent > 0 // only if allowed
2179                         && alignment == LTEXT_ALIGN_WIDTH    // only when justifying
2180                         && frmline->word_count > 1           // not if single word (expanded, but not taking the full width is ugly)
2181                         && 100 * extra_width > m_pbuffer->unused_space_threshold_percent * usable_width ) {
2182             // extra_width is more than 5% of usable_width: we would be added too much spacing.
2183             // But we're allowed to add some letter spacing intoto words to reduce spacing
2184             // between words.
2185             // (We do that only when this line is justified - we could do it too when the
2186             // line is left- or right-aligned, but we do not know here if this is not the
2187             // last line of a paragraph, left aligned, that would not need to be expanded.)
2188             // We loop and increase letter spacing, and we stop as soon as we are
2189             // under the unused_space_threshold_percent (5%). If some iteration
2190             // brings us below min_extra_width (spaces shrunk too much), we go
2191             // back to the previous letter_spacing (which may put us back with
2192             // the unused extra space > 5%, but that is preferable).
2193             //
2194             // First, gather some info
2195             int min_extra_width = 0; // negative value (from the allowed spaces condensing)
2196             int max_font_size = 0;
2197             for ( int i=0; i<(int)frmline->word_count; i++ ) {
2198                 formatted_word_t * word = &frmline->words[i];
2199                 if ( word->distinct_glyphs <= 0 ) // image, inline box, cursive word
2200                     continue;
2201                 min_extra_width += word->min_width - word->width;
2202                 src_text_fragment_t * srcline = &m_pbuffer->srctext[word->src_text_index];
2203                 LVFont * font = (LVFont *)srcline->t.font;
2204                 int font_size = font->getSize();
2205                 if ( font_size > max_font_size )
2206                     max_font_size = font_size;
2207                 // Store this word font size in this temporary slot (that is not used anymore)
2208                 word->_top_to_baseline = font_size;
2209             }
2210             int added_spacing = 0;
2211             int letter_spacing_ratio = 0;
2212             while ( true ) {
2213                 letter_spacing_ratio++;
2214                 added_spacing = 0;
2215                 bool can_try_larger = false;
2216                 for ( int i=0; i<(int)frmline->word_count; i++ ) {
2217                     formatted_word_t * word = &frmline->words[i];
2218                     if ( word->distinct_glyphs <= 0 ) // image, inline box, cursive word
2219                         continue;
2220                     // Store previous value in _baseline_to_bottom (also not used anymore) in case of
2221                     // excess and the need to use previous value (so we don't have to recompute it)
2222                     word->_baseline_to_bottom = word->added_letter_spacing;
2223                     // We apply letter_spacing proportionally to the font size (words
2224                     // in a smaller font size won't get any in the loop first steps)
2225                     int word_font_size = word->_top_to_baseline;
2226                     word->added_letter_spacing = letter_spacing_ratio * word_font_size / max_font_size;
2227                     int word_max_letter_spacing = word_font_size * m_pbuffer->max_added_letter_spacing_percent / 100;
2228                     if ( word->added_letter_spacing > word_max_letter_spacing  )
2229                         word->added_letter_spacing = word_max_letter_spacing;
2230                     else
2231                         can_try_larger = true;
2232                     added_spacing += word->distinct_glyphs * word->added_letter_spacing;
2233                 }
2234                 int new_extra_width = extra_width - added_spacing;
2235                 if ( new_extra_width < min_extra_width ) { // too much added, not enough for spaces
2236                     // Get back values from previous step (which was fine)
2237                     added_spacing = 0;
2238                     for ( int i=0; i<(int)frmline->word_count; i++ ) {
2239                         formatted_word_t * word = &frmline->words[i];
2240                         if ( word->distinct_glyphs <= 0 ) // image, inline box, cursive word
2241                             continue;
2242                         word->added_letter_spacing = word->_baseline_to_bottom;
2243                         added_spacing += word->distinct_glyphs * word->added_letter_spacing;
2244                     }
2245                     break;
2246                 }
2247                 if ( !can_try_larger ) // all allowed max letter_spacing reached
2248                     break;
2249                 if ( 100 * new_extra_width <= m_pbuffer->unused_space_threshold_percent * usable_width ) {
2250                     // < 5%, we're good
2251                     break;
2252                 }
2253             }
2254             if ( added_spacing ) {
2255                 // Fix up words positions and widths
2256                 int shift_x = 0;
2257                 for ( int i=0; i<(int)frmline->word_count; i++ ) {
2258                     formatted_word_t * word = &frmline->words[i];
2259                     if ( word->distinct_glyphs > 0 ) {
2260                         int added_width = word->distinct_glyphs * word->added_letter_spacing;
2261                         if ( i == frmline->word_count-1 ) {
2262                             // For the last word on a justified line, we want to not see
2263                             // any letter_spacing added after last glyph.
2264                             // The font will draw it, but we just want to position this
2265                             // word so it's drawn outside: just remove one letter_spacing.
2266                             // But not if this last word gets a hyphen, or the hyphen
2267                             // (not part of the word but added when drawing) would be
2268                             // shifted to the left.
2269                             if ( !(word->flags & LTEXT_WORD_CAN_HYPH_BREAK_LINE_AFTER) ) {
2270                                 added_width -= word->added_letter_spacing;
2271                             }
2272                         }
2273                         word->width += added_width;
2274                         word->min_width += added_width;
2275                         word->x += shift_x;
2276                         shift_x += added_width;
2277                         frmline->width += added_width;
2278                         extra_width -= added_width;
2279                     }
2280                     else {
2281                         // Images, inline box, cursive words still need to be shifted
2282                         word->x += shift_x;
2283                     }
2284                 }
2285             }
2286         }
2287         extra_width = usable_width - frmline->width;
2288 
2289         if ( extra_width < 0 ) {
2290             // line is too wide
2291             // reduce spaces to fit line
2292             int extraSpace = -extra_width;
2293             int totalSpace = 0;
2294             int i;
2295             for ( i=0; i<(int)frmline->word_count-1; i++ ) {
2296                 if ( frmline->words[i].flags & LTEXT_WORD_CAN_ADD_SPACE_AFTER ) {
2297                     int dw = frmline->words[i].width - frmline->words[i].min_width;
2298                     if (dw>0) {
2299                         totalSpace += dw;
2300                     }
2301                 }
2302             }
2303             if ( totalSpace>0 ) {
2304                 int delta = 0;
2305                 for ( i=0; i<(int)frmline->word_count; i++ ) {
2306                     frmline->words[i].x -= delta;
2307                     if ( frmline->words[i].flags & LTEXT_WORD_CAN_ADD_SPACE_AFTER ) {
2308                         int dw = frmline->words[i].width - frmline->words[i].min_width;
2309                         if (dw>0 && totalSpace>0) {
2310                             int n = dw * extraSpace / totalSpace;
2311                             totalSpace -= dw;
2312                             extraSpace -= n;
2313                             delta += n;
2314                             frmline->width -= n;
2315                         }
2316                     }
2317                 }
2318             }
2319         }
2320         else if ( alignment==LTEXT_ALIGN_LEFT ) {
2321             // no additional alignment necessary
2322         }
2323         else if ( alignment==LTEXT_ALIGN_CENTER ) {
2324             frmline->x += extra_width / 2;
2325         }
2326         else if ( alignment==LTEXT_ALIGN_RIGHT ) {
2327             frmline->x += extra_width;
2328         }
2329         else {
2330             // LTEXT_ALIGN_WIDTH
2331             if ( extra_width > 0 ) {
2332                 // distribute additional space
2333                 int extraSpace = extra_width;
2334                 int addSpacePoints = 0;
2335                 int i;
2336                 for ( i=0; i<(int)frmline->word_count-1; i++ ) {
2337                     if ( frmline->words[i].flags & LTEXT_WORD_CAN_ADD_SPACE_AFTER )
2338                         addSpacePoints++;
2339                 }
2340                 if ( addSpacePoints>0 ) {
2341                     int addSpaceDiv = extraSpace / addSpacePoints;
2342                     int addSpaceMod = extraSpace % addSpacePoints;
2343                     int delta = 0;
2344                     for ( i=0; i<(int)frmline->word_count; i++ ) {
2345                         frmline->words[i].x += delta;
2346                         if ( frmline->words[i].flags & LTEXT_WORD_CAN_ADD_SPACE_AFTER ) {
2347                             delta += addSpaceDiv;
2348                             if ( addSpaceMod>0 ) {
2349                                 addSpaceMod--;
2350                                 delta++;
2351                             }
2352                         }
2353                     }
2354                     frmline->width += extraSpace;
2355                 }
2356             }
2357         }
2358         if ( hasInlineBoxes ) {
2359             // Now that we have the final x of each word, we can update
2360             // the RenderRectAccessor x/y of each word that is a inlineBox
2361             // (needed to correctly draw highlighted text in them).
2362             for ( int i=0; i<frmline->word_count; i++ ) {
2363                 if ( frmline->words[i].flags & LTEXT_WORD_IS_INLINE_BOX ) {
2364                     formatted_word_t * word = &frmline->words[i];
2365                     src_text_fragment_t * srcline = &m_pbuffer->srctext[word->src_text_index];
2366                     ldomNode * node = (ldomNode *) srcline->object;
2367                     RenderRectAccessor fmt( node );
2368                     fmt.setX( frmline->x + word->x );
2369                     fmt.setY( frmline->y + frmline->baseline - word->o.baseline + word->y );
2370                 }
2371             }
2372         }
2373     }
2374 
2375     /// split line into words, add space for width alignment
addLine(int start,int end,int x,src_text_fragment_t * para,bool first,bool last,bool preFormattedOnly,bool isLastPara,bool hasInlineBoxes)2376     void addLine( int start, int end, int x, src_text_fragment_t * para, bool first, bool last, bool preFormattedOnly, bool isLastPara, bool hasInlineBoxes )
2377     {
2378         // No need to do some x-alignment work if light formatting, when we
2379         // are only interested in computing block height and positioning
2380         // floats: 'is_reusable' will be unset, and any attempt at reusing
2381         // this formatting for drawing will cause a non-light re-formatting.
2382         // Except when there are inlineBoxes in the text: we need to correctly
2383         // position them to have their x/y saved in their RenderRectAccessor
2384         // (so getRect() can work accurately before the page is drawn).
2385         bool light_formatting = m_pbuffer->light_formatting && !hasInlineBoxes;
2386 
2387         // todo: we can avoid some more work below when light_formatting (and
2388         // possibly the BiDi re-ordering we need for ordering footnotes, as
2389         // if we don't re-order, we'll always have them in the logical order,
2390         // and we can just append them in lvrend.cpp instead of checking
2391         // where to insert them if RTL - but we'd still have to do that
2392         // if some inlinebox prevent doing light formatting :(.)
2393 
2394         // int maxWidth = getCurrentLineWidth(); // if needed for debug printf() below
2395 
2396         // Provided x is the line indent: as we're making words in the visual
2397         // order here, it will be line start x for LTR paragraphs; but for RTL
2398         // ones, we'll handle it as some reserved space on the right.
2399         int rightIndent = 0;
2400         if ( m_para_dir_is_rtl ) {
2401             rightIndent = x;
2402             // maxWidth -= x; // put x/first char indent on the right: reduce width
2403             x = getCurrentLineX(); // use shift induced by left floats
2404         }
2405         else {
2406             x += getCurrentLineX(); // add shift induced by left floats
2407         }
2408         // Get overflows, needed to position first and last words
2409         int usable_left_overflow;
2410         int usable_right_overflow;
2411         getCurrentLineUsableOverflows(usable_left_overflow, usable_right_overflow);
2412 
2413         // Find out text alignment to ensure for this line
2414         int align = para->flags & LTEXT_FLAG_NEWLINE;
2415 
2416         // Note that with Firefox, text-align-last applies to the first line when
2417         // it is also the last (so, it is used for a single line paragraph).
2418         // Also, when "text-align-last: justify", Firefox does justify the last
2419         // (or single) line.
2420         // We support private keywords to not behave like that for standalone lines.
2421         bool if_not_first = para->flags & LTEXT_LAST_LINE_IF_NOT_FIRST;
2422         if ( last && ( if_not_first ? !first : true ) ) { // Last line of paragraph (it is also first when standalone)
2423             // https://drafts.csswg.org/css-text-3/#text-align-last-property
2424             //  "If 'auto' is specified, content on the affected line is aligned
2425             //  per text-align-all unless text-align-all is set to justify,
2426             //  in which case it is start-aligned. All other values are
2427             //  interpreted as described for text-align. "
2428             int last_align = (para->flags >> LTEXT_LAST_LINE_ALIGN_SHIFT) & LTEXT_FLAG_NEWLINE;
2429             if ( last_align ) {
2430                 // specified (or inherited) to something other than 'auto': use it
2431                 align = last_align;
2432             }
2433             else { // text-align-last: auto (inherited default)
2434                 // Keep using value from text-align, except when it is set to 'justify'
2435                 if ( align == LTEXT_ALIGN_WIDTH ) {
2436                     // Justification is in use, and this line is the last
2437                     // (or a single line): align it to the left (or to the
2438                     // right if FriBiDi detected this paragraph is RTL)
2439                     align = m_para_dir_is_rtl ? LTEXT_ALIGN_RIGHT : LTEXT_ALIGN_LEFT;
2440                 }
2441             }
2442         }
2443 
2444         // Override it for PRE lines (or in case align has not been set)
2445         if ( preFormattedOnly || !align )
2446             align = m_para_dir_is_rtl ? LTEXT_ALIGN_RIGHT : LTEXT_ALIGN_LEFT;
2447 
2448         TR("addLine(%d, %d) y=%d  align=%d", start, end, m_y, align);
2449         // printf("addLine(%d, %d) y=%d  align=%d maxWidth=%d\n", start, end, m_y, align, maxWidth);
2450 
2451         // Note: parameter needReduceSpace and variable splitBySpaces (which
2452         // was always true) have been removed, as we always split by space:
2453         // even if we end up not changing spaces' widths, we need to make
2454         // individual words (as they may use different fonts, and for many
2455         // other reasons).
2456 
2457         // If BiDi detected, re-order line from logical order into visual order
2458         bool trustDirection = false;
2459         bool lineIsBidi = false;
2460         #if (USE_FRIBIDI==1)
2461         trustDirection = true;
2462         bool restore_last_width = false;
2463         int last_width_to_restore;
2464         if (m_has_bidi) {
2465             // We don't want to mess too much with the follow up code, so we
2466             // do the following, which might be expensive for full RTL documents:
2467             // we just reorder all chars, flags, width and references to
2468             // the original nodes, according to how fribidi decides the visual
2469             // order of chars should be.
2470             // We can mess with the m_* arrays (the range that spans the current
2471             // line) as they won't be used anymore after this function.
2472             // Except for the width of the last char (that we may modify
2473             // while zeroing the widths of collapsed spaces) that will be
2474             // used as the starting width of next line. We'll restore it
2475             // when done with this line.
2476             last_width_to_restore = m_widths[end-1];
2477             restore_last_width = true;
2478 
2479             // From fribidi documentation:
2480             // fribidi_reorder_line() reorders the characters in a line of text
2481             // from logical to final visual order. Note:
2482             // - the embedding levels may change a bit
2483             // - the bidi types and embedding levels are not reordered
2484             // - last parameter is a map of string indices which is reordered to
2485             //   reflect where each glyph ends up
2486             //
2487             // For re-ordering, we need some temporary buffers.
2488             // We use static buffers, and don't bother with dynamic buffers
2489             // in case we would overflow the static buffers.
2490             // (4096, if some glyphs spans 4 composing unicode codepoints, would
2491             // make 1000 glyphs, which with a small font of width 4px, would
2492             // allow them to be displayed on a 4000px screen.
2493             // Increase that if not enough.)
2494             #define MAX_LINE_SIZE 4096
2495             if ( end-start > MAX_LINE_SIZE ) {
2496                 // Show a warning and truncate to avoid a segfault.
2497                 printf("CRE WARNING: bidi processing line overflow (%d > %d)\n", end-start, MAX_LINE_SIZE);
2498                 end = start + MAX_LINE_SIZE;
2499             }
2500             static lChar32 bidi_tmp_text[MAX_LINE_SIZE];
2501             static lUInt16 bidi_tmp_flags[MAX_LINE_SIZE];
2502             static src_text_fragment_t * bidi_tmp_srcs[MAX_LINE_SIZE];
2503             static lUInt16 bidi_tmp_charindex[MAX_LINE_SIZE];
2504             static int     bidi_tmp_widths[MAX_LINE_SIZE];
2505             // Map of string indices which is reordered to reflect where each
2506             // glyph ends up. Note that fribidi will access it starting
2507             // from 0 (and not from 'start'): this would need us to allocate
2508             // it the size of the full m_text (instead of MAX_LINE_SIZE)!
2509             // But we can trick that by providing a fake start address,
2510             // shifted by 'start' (which is ugly and could cause a segfault
2511             // if some other part than [start:end] would be accessed, but
2512             // we know fribid doesn't - by contract as it shouldn't reorder
2513             // any other part except between start:end).
2514             static FriBidiStrIndex bidi_indices_map[MAX_LINE_SIZE];
2515             for (int i=start; i<end; i++) {
2516                 bidi_indices_map[i-start] = i;
2517             }
2518             FriBidiStrIndex * _virtual_bidi_indices_map = bidi_indices_map - start;
2519 
2520             FriBidiFlags bidi_flags = 0;
2521             // We're not using bidi_flags=FRIBIDI_FLAG_REORDER_NSM (which is mostly
2522             // needed for code drawing the resulting reordered result) as it would
2523             // mess with our indices map, and the final result would be messy.
2524             // (Looks like even Freetype drawing does not need the BIDI rule
2525             // L3 (combining-marks-must-come-after-base-char) as it draws finely
2526             // RTL when we draw the combining marks before base char.)
2527             int max_level = fribidi_reorder_line(bidi_flags, m_bidi_ctypes, end-start, start,
2528                                 m_para_bidi_type, m_bidi_levels, NULL, _virtual_bidi_indices_map);
2529             if (max_level > 1) {
2530                 lineIsBidi = true;
2531                 // bidi_tmp_* will contain things in the visual order, from which
2532                 // we will make words (exactly as if it had been LTR that way)
2533                 for (int i=start; i<end; i++) {
2534                     int bidx = i - start;
2535                     int j = bidi_indices_map[bidx]; // original indice in m_text, m_flags, m_bidi_levels...
2536                     bidi_tmp_text[bidx] = m_text[j];
2537                     bidi_tmp_srcs[bidx] = m_srcs[j];
2538                     bidi_tmp_charindex[bidx] = m_charindex[j];
2539                     // Add a flag if this char is part of a RTL segment
2540                     if ( FRIBIDI_LEVEL_IS_RTL( m_bidi_levels[j] ) )
2541                         m_flags[j] |= LCHAR_IS_RTL;
2542                     else
2543                         m_flags[j] &= ~LCHAR_IS_RTL;
2544                     bidi_tmp_flags[bidx] = m_flags[j];
2545                     // bidi_tmp_widths will contains each individual char width, that we
2546                     // compute from the accumulated width. We'll make it a new
2547                     // accumulated width in next loop
2548                     bidi_tmp_widths[bidx] = m_widths[j] - (j > 0 ? m_widths[j-1] : 0);
2549                     // todo: we should probably also need to update/move the
2550                     // LCHAR_IS_CLUSTER_TAIL flag... haven't really checked
2551                     // (might be easier or harder due to the fact that we
2552                     // don't use FRIBIDI_FLAG_REORDER_NSM?)
2553                 }
2554 
2555                 // It looks like fribidi is quite good enough at taking
2556                 // care of collapsed spaces! No real extra space seen
2557                 // when testing, except at start and end.
2558                 // Anyway, we handle collapsed spaces and their widths
2559                 // as we would expect them to be with LTR text just out
2560                 // of copyText().
2561                 bool prev_was_space = true; // start as true to make leading spaces collapsed
2562                 int prev_non_collapsed_space = -1;
2563                 int w = start > 0 ? m_widths[start-1] : 0;
2564                 for (int i=start; i<end; i++) {
2565                     int bidx = i - start;
2566                     m_text[i] = bidi_tmp_text[bidx];
2567                     m_flags[i] = bidi_tmp_flags[bidx];
2568                     m_srcs[i] = bidi_tmp_srcs[bidx];
2569                     m_charindex[i] = bidi_tmp_charindex[bidx];
2570                     // Handle consecutive spaces at start and in the text
2571                     if ( (m_srcs[i]->flags & LTEXT_FLAG_PREFORMATTED) ) {
2572                         prev_was_space = false;
2573                         prev_non_collapsed_space = -1;
2574                         m_flags[i] &= ~LCHAR_IS_COLLAPSED_SPACE;
2575                     }
2576                     else {
2577                         if ( m_text[i] == ' ' ) {
2578                             if (prev_was_space) {
2579                                 m_flags[i] |= LCHAR_IS_COLLAPSED_SPACE;
2580                                 // Put this (now collapsed, but possibly previously non-collapsed)
2581                                 // space width on the preceeding now non-collapsed space
2582                                 int w_orig = bidi_tmp_widths[bidx];
2583                                 bidi_tmp_widths[bidx] = 0;
2584                                 if ( prev_non_collapsed_space >= 0 ) {
2585                                     m_widths[prev_non_collapsed_space] += w_orig;
2586                                     w += w_orig;
2587                                 }
2588                             }
2589                             else {
2590                                 m_flags[i] &= ~LCHAR_IS_COLLAPSED_SPACE;
2591                                 prev_was_space = true;
2592                                 prev_non_collapsed_space = i;
2593                             }
2594                         }
2595                         else {
2596                             prev_was_space = false;
2597                             prev_non_collapsed_space = -1;
2598                             m_flags[i] &= ~LCHAR_IS_COLLAPSED_SPACE;
2599                         }
2600                     }
2601                     w += bidi_tmp_widths[bidx];
2602                     m_widths[i] = w;
2603                     // printf("%x:f%x,w%d ", m_text[i], m_flags[i], m_widths[i]);
2604                 }
2605                 // Also flag as collapsed the trailing spaces on the reordered line
2606                 if (prev_non_collapsed_space >= 0) {
2607                     int prev_width = prev_non_collapsed_space > 0 ? m_widths[prev_non_collapsed_space-1] :0 ;
2608                     for (int i=prev_non_collapsed_space; i<end; i++) {
2609                         m_flags[i] |= LCHAR_IS_COLLAPSED_SPACE;
2610                         m_widths[i] = prev_width;
2611                     }
2612                 }
2613 
2614             }
2615             // Note: we reordered m_text and others, which are used from now on only
2616             // to properly split words. When drawing the text, these are no more used,
2617             // and the string is taken directly from the copy of the text node string
2618             // stored as src_text_fragment_t->t.text, so FreeType and HarfBuzz will
2619             // get the text in logical order (as HarfBuzz expects it).
2620             // Also, when parens/brackets are involved in RTL text, only HarfBuzz
2621             // will correctly mirror them. When not using Harfbuzz, we'll mirror
2622             // mirrorable chars below when a word is RTL.
2623         }
2624         #endif
2625 
2626         // Note: not certain why or how useful this lastnonspace (used below) is.
2627         int lastnonspace = 0;
2628         for ( int k=end-1; k>=start; k-- ) {
2629             // Also not certain if we should skip floats or LCHAR_IS_OBJECT
2630             if ( !(m_flags[k] & LCHAR_IS_SPACE) ) {
2631                 lastnonspace = k;
2632                 break;
2633             }
2634         }
2635 
2636         // Create/add a new line to buffer
2637         formatted_line_t * frmline =  lvtextAddFormattedLine( m_pbuffer );
2638         frmline->y = m_y;
2639         frmline->x = x;
2640         // This new line starts with a minimal height and baseline, as set from the
2641         // paragraph parent node (by lvrend.cpp renderFinalBlock()). These may get
2642         // increased if some inline elements need more, but not decreased.
2643         frmline->height = m_pbuffer->strut_height;
2644         frmline->baseline = m_pbuffer->strut_baseline;
2645         if (m_has_ongoing_float)
2646             // Avoid page split when some float that started on a previous line
2647             // still spans this line
2648             frmline->flags |= LTEXT_LINE_SPLIT_AVOID_BEFORE;
2649         if ( lineIsBidi ) {
2650             // Flag that line, so createXPointer() and getRect() know it's not
2651             // a regular one and can't assume words and text nodes are linear.
2652             frmline->flags |= LTEXT_LINE_IS_BIDI;
2653         }
2654         if ( m_para_dir_is_rtl ) {
2655             frmline->flags |= LTEXT_LINE_PARA_IS_RTL;
2656             // Might be useful (we may have a bidi line in a LTR paragraph).
2657             // (Used for ordering in-page footnote links)
2658         }
2659 
2660         if ( preFormattedOnly && (start == end) ) {
2661             // Specific for preformatted text when consecutive \n\n:
2662             // start == end, and we have no source text to point to,
2663             // but we should draw en empty line (we can't just simply
2664             // increase m_y and m_pbuffer->height, we need to have
2665             // a frmline as Draw() loops thru these lines - a frmline
2666             // with no word will do).
2667             src_text_fragment_t * srcline = m_srcs[start];
2668             if (srcline->interval > 0) { // should always be the case
2669                 if (srcline->interval > frmline->height) // keep strut_height if greater
2670                     frmline->height = srcline->interval;
2671             }
2672             else { // fall back to line-height: normal
2673                 LVFont * font = (LVFont*)srcline->t.font;
2674                 frmline->height = font->getHeight();
2675             }
2676             m_y += frmline->height;
2677             m_pbuffer->height = m_y;
2678             return;
2679         }
2680 
2681         src_text_fragment_t * lastSrc = m_srcs[start];
2682         // We can just skip FLOATs in addLine(), as they were taken
2683         // care of in processParagraph() to just reduce the available width
2684         // So skip floats at start:
2685         while (lastSrc && (lastSrc->flags & LTEXT_SRC_IS_FLOAT) ) {
2686             start++;
2687             lastSrc = m_srcs[start];
2688         }
2689         if (!lastSrc) { // nothing but floats
2690             // A line has already been added: just make it zero-height.
2691             frmline->height = 0;
2692             return;
2693         }
2694         // Ignore space at start of line (this rarely happens, as line
2695         // splitting discards the space on which a split is made - but it
2696         // can happen in other rare wrap cases like lastDeprecatedWrap)
2697         if ( (m_flags[start] & LCHAR_IS_SPACE) && !(lastSrc->flags & LTEXT_FLAG_PREFORMATTED) ) {
2698             // But do it only if we're going to stay in same text node (if not
2699             // the space may have some reason - there's sometimes a no-break-space
2700             // before an image)
2701             if (start < end-1 && m_srcs[start+1] == m_srcs[start]) {
2702                 start++;
2703                 lastSrc = m_srcs[start];
2704             }
2705         }
2706 
2707         // Some words vertical-align positioning might need to be fixed
2708         // only once the whole line has been laid out
2709         bool delayed_valign_computation = false;
2710 
2711         // Make out words, making a new one when some properties change
2712         int wstart = start;
2713         bool firstWord = true;
2714         bool lastWord = false;
2715         bool lastIsSpace = false;
2716         bool isSpace = false;
2717         bool space = false;
2718         // Bidi
2719         bool lastIsRTL = false;
2720         bool isRTL = false;
2721         bool bidiLogicalIndicesShift = false;
2722         // Unicode script change
2723         bool scriptChanged = false;
2724         #if (USE_HARFBUZZ==1)
2725             lUInt32 prevScript = HB_SCRIPT_COMMON;
2726             hb_unicode_funcs_t* _hb_unicode_funcs = hb_unicode_funcs_get_default();
2727         #endif
2728         // Ignorables
2729         bool isToIgnore = false;
2730         // Used when LTEXT_FIT_GLYPHS and preceeding or following word is an image or inline box
2731         int prev_word_overflow = 0;
2732         bool prev_word_is_object = false;
2733         for ( int i=start; i<=end; i++ ) { // loop thru each char
2734             src_text_fragment_t * newSrc = i<end ? m_srcs[i] : NULL;
2735             if ( i<end ) {
2736                 isSpace = (m_flags[i] & LCHAR_IS_SPACE)!=0; // current char is a space
2737                 space = lastIsSpace && !isSpace && i<=lastnonspace;
2738                 // /\ previous char was a space, current char is not a space
2739                 //     Note: last check was initially "&& i<lastnonspace", but with
2740                 //     this, a line containing "thing inside a " (ending with a
2741                 //     1-char word) would be considered only 2 words ("thing" and
2742                 //     "inside a") and, when justify'ing text, space would not be
2743                 //     distributed between "inside" and "a"...
2744                 //     Not really sure what's the purpose of this last test...
2745                 #if (USE_HARFBUZZ==1)
2746                     // To be done only when we met multiple scripts in a same paragraph
2747                     // while measuring (which we checked only when using Harfbuzz kerning)
2748                     if ( m_has_multiple_scripts && !(m_flags[i] & LCHAR_IS_OBJECT) ) {
2749                         hb_script_t script = hb_unicode_script(_hb_unicode_funcs, m_text[i]);
2750                         if ( script != HB_SCRIPT_COMMON && script != HB_SCRIPT_INHERITED && script != HB_SCRIPT_UNKNOWN ) {
2751                             if ( prevScript != HB_SCRIPT_COMMON && script != prevScript ) {
2752                                 scriptChanged = true;
2753                             }
2754                             prevScript = script;
2755                         }
2756                     }
2757                 #endif
2758                 isToIgnore = m_flags[i] & LCHAR_IS_TO_IGNORE;
2759                 isRTL = m_flags[i] & LCHAR_IS_RTL;
2760                 bidiLogicalIndicesShift = false;
2761                 if ( lineIsBidi && isRTL == lastIsRTL && i > 0) {
2762                     // The bidi algo may have reordered logical chars, and
2763                     // put side by side same-direction chars that where
2764                     // not consecutive in the original logical text.
2765                     // We need to make a new word when we see these
2766                     // reordered indices shifting by more than +/- 1,
2767                     // as when drawing the words, we'll use the source
2768                     // text nodes' logical text.
2769                     if ( isRTL ) { // indices should be decreasing by 1
2770                         if ( m_charindex[i] != m_charindex[i-1] - 1 )
2771                             bidiLogicalIndicesShift = true;
2772                     }
2773                     else { // LTR: indices should be increasing by 1
2774                         if ( m_charindex[i] != m_charindex[i-1] + 1 )
2775                             bidiLogicalIndicesShift = true;
2776                     }
2777                     // (m_charindex[i-1] might be bad when i-1 is from
2778                     // another text node, or an object - but no need
2779                     // for checking that as it will have triggered
2780                     // another condition for making a word.)
2781                 }
2782             }
2783             else {
2784                 lastWord = true;
2785             }
2786 
2787             // This loop goes thru each char, and create a new word when it meets:
2788             // - a non-space char that follows a space (this non-space char will be
2789             //   the first char of next word).
2790             // - a char from a different text node (eg: "<span>first</span>next")
2791             // - a CJK char (whether or not preceded by a space): each becomes a word
2792             // - the end of text, which makes the last word
2793             //
2794             // It so grabs all spaces (0 or 1 with our XML parser) following
2795             // the current real word, and includes it in the word. So a word
2796             // includes its following space if any, but should not start with
2797             // a space. The trailing space is needed for the word processing
2798             // code below to properly set flags and guess the amount of spaces
2799             // that can be increased or reduced for proper alignment.
2800             // Also, these words being then stacked to each other to build the
2801             // line, the ending space should be kept to be drawn and seen
2802             // between each word (some words may not be separated by space when
2803             // from different text nodes or CJK).
2804             // Note: a "word" in our current context is just a unit of text that
2805             // should be rendered together, and can be moved on the x-axis for
2806             // alignment purpose (the 2 french words "qu'autrefois" make a
2807             // single "word" here, the single word "quelconque", if hyphenated
2808             // as "quel-conque" will make one "word" on this line and another
2809             // "word" on the next line.
2810             //
2811             // In a sequence of collapsing spaces, only the first was kept as
2812             // a LCHAR_IS_SPACE. The following ones were flagged as
2813             // LCHAR_IS_COLLAPSED_SPACE, and thus are not LCHAR_IS_SPACE.
2814             // With the algorithm described just above, these collapsed spaces
2815             // can then only be at the start of a word.
2816             // Their calculated width has been made to 0, but the drawing code
2817             // (LFormattedText::Draw() below) will use the original srctext text
2818             // to draw the string: we can't override this original text (it is
2819             // made read-only with the use of 'const') to replace the space with
2820             // a zero-width char (which may not be zero-width in a monospace font).
2821             // So, we need to adjust each word start index to get rid of the
2822             // collapsed spaces.
2823             //
2824             // Note: if we were to make a space between 2 CJY chars a collapsed
2825             // space, we would have it at the end of each word, which may
2826             // be fine without additional work needed (not verified):
2827             // having a zero-width, it would not change the width of the
2828             // CJKchar/word, and would not affect the next CJKchar/word position.
2829             // It would be drawn as a space, but the next CJKchar would override
2830             // it when it is drawn next.
2831 
2832             if ( i>wstart && (   newSrc!=lastSrc
2833                               || space
2834                               || lastWord
2835                               || isCJKIdeograph(m_text[i])
2836                               || isRTL != lastIsRTL
2837                               || bidiLogicalIndicesShift
2838                               || scriptChanged
2839                               || isToIgnore
2840                              ) ) {
2841                 // New HTML source node, space met just before, last word, or CJK char:
2842                 // create and add new word with chars from wstart to i-1
2843 
2844                 #if (USE_HARFBUZZ==1)
2845                     if ( m_has_multiple_scripts ) {
2846                         // Reset as next segment can start with any script
2847                         prevScript = HB_SCRIPT_COMMON;
2848                         scriptChanged = false;
2849                     }
2850                 #endif
2851 
2852                 // Remove any collapsed space at start of word: they
2853                 // may have a zero width and not influence positioning,
2854                 // but they will be drawn as a space by Draw(). We need
2855                 // to increment the start index into the src_text_fragment_t
2856                 // for Draw() to start rendering the text from this position.
2857                 // Also skip floating nodes and chars flagged as to be ignored.
2858                 while (wstart < i) {
2859                     if ( !(m_flags[wstart] & LCHAR_IS_COLLAPSED_SPACE) &&
2860                          !(m_flags[wstart] & LCHAR_IS_TO_IGNORE) &&
2861                             !(m_srcs[wstart]->flags & LTEXT_SRC_IS_FLOAT) )
2862                         break;
2863                     // printf("_"); // to see when we remove one, before the TR() below
2864                     wstart++;
2865                 }
2866                 if (wstart == i) { // word is only collapsed spaces or ignorable chars
2867                     // No need to create it.
2868                     // Except if it is the last word, and we have not yet added any:
2869                     // we need a word for the line to have a height (frmline->height)
2870                     // so that the following line is one line below the empty line we
2871                     // made (eg, when <br/><br/>)
2872                     // However, we don't do that if it would be the last empty line in
2873                     // the last paragraph (paragraphs here are just sections of the final
2874                     // block cut by <BR>): most browsers don't display the line break
2875                     // implied by the BR when we have: "<div>some text<br/> </div>more text"
2876                     // or "<div>some text<br/> <span> </span> </div>more text".
2877                     if (lastWord && firstWord) {
2878                         if (!isLastPara) {
2879                             wstart--; // make a single word with a single collapsed space
2880                             if (m_flags[wstart] & LCHAR_IS_TO_IGNORE) {
2881                                 // In this (edgy) case, we would be rendering this char we
2882                                 // want to ignore.
2883                                 // This is a bit hacky, but no other solution: just
2884                                 // replace that ignorable char with a space in the
2885                                 // src text
2886                                 *((lChar32 *) (m_srcs[wstart]->t.text + m_charindex[wstart])) = U' ';
2887                             }
2888                         }
2889                         else { // Last or single para with no word
2890                             // A line has already been added: just make
2891                             // it zero height.
2892                             frmline->height = 0;
2893                             frmline->baseline = 0;
2894                             continue;
2895                             // We'll then just exit the loop as we are lastWord
2896                         }
2897                     }
2898                     else {
2899                         // no word made, get ready for next loop
2900                         lastSrc = newSrc;
2901                         lastIsSpace = isSpace;
2902                         lastIsRTL = isRTL;
2903                         continue;
2904                     }
2905                 }
2906 
2907                 // Create/add a new word to this frmline
2908                 formatted_word_t * word = lvtextAddFormattedWord(frmline);
2909                 src_text_fragment_t * srcline = m_srcs[wstart]; // should be identical to lastSrc
2910                 word->src_text_index = srcline->index;
2911 
2912                 // This LTEXT_VALIGN_ flag is now only of use with objects (images)
2913                 int vertical_align_flag = srcline->flags & LTEXT_VALIGN_MASK;
2914                 // These will be used later to adjust the main line baseline and height:
2915                 int top_to_baseline; // distance from this word top to its own baseline (formerly named 'b')
2916                 int baseline_to_bottom; // descender below baseline for this word (formerly named 'h')
2917                 // For each word, we'll have to check and adjust line height and baseline,
2918                 // except when LTEXT_VALIGN_TOP and LTEXT_VALIGN_BOTTOM where it has to
2919                 // be delayed until the full line is laid out. Until that, we store some
2920                 // info into word->_top_to_baseline and word->_baseline_to_bottom.
2921                 bool adjust_line_box = true;
2922                 // We will make sure elements with "-cr-hint: strut-confined"
2923                 // do not change the strut baseline and height
2924                 bool strut_confined = (srcline->flags & LTEXT_STRUT_CONFINED) && m_allow_strut_confining;
2925 
2926                 if ( srcline->flags & LTEXT_SRC_IS_OBJECT ) {
2927                     // object: image or inline-block box (floats have been skipped above)
2928 
2929                     // This is set or used only when LTEXT_FIT_GLYPHS
2930                     if ( prev_word_overflow ) {
2931                         frmline->width += prev_word_overflow;
2932                         frmline->words[frmline->word_count-2].width += prev_word_overflow;
2933                         frmline->words[frmline->word_count-2].min_width += prev_word_overflow;
2934                         prev_word_overflow = 0;
2935                     }
2936                     prev_word_is_object = true; // to be used when processing next word
2937 
2938                     word->distinct_glyphs = 0;
2939                     word->x = frmline->width;
2940                     word->width = srcline->o.width;
2941                     word->min_width = word->width;
2942                     word->o.height = srcline->o.height;
2943                     if ( srcline->flags & LTEXT_SRC_IS_INLINE_BOX ) { // inline-block
2944                         word->flags = LTEXT_WORD_IS_INLINE_BOX;
2945                         // For inline-block boxes, the baseline may not be the bottom; it has
2946                         // been computed in measureText().
2947                         word->o.baseline = srcline->o.baseline;
2948                         top_to_baseline = word->o.baseline;
2949                         baseline_to_bottom = word->o.height - word->o.baseline;
2950                         // We can't really ensure strut_confined with inline-block boxes,
2951                         // or we could miss content (it would be overwritten by next lines)
2952                     }
2953                     else { // image
2954                         word->flags = LTEXT_WORD_IS_OBJECT;
2955                         // The image dimensions have already been resized to fit
2956                         // into m_pbuffer->width (and strut confining if requested.
2957                         // Note: it can happen when there is some text-indent than
2958                         // the image width exceeds the available width: it might be
2959                         // shown overflowing or overrideing other content.
2960                         word->width = srcline->o.width;
2961                         word->o.height = srcline->o.height;
2962                         // todo: adjust m_max_img_height with this image valign_dy/vertical_align_flag
2963                         // Per specs, the baseline is the bottom of the image
2964                         top_to_baseline = word->o.height;
2965                         baseline_to_bottom = 0;
2966                     }
2967 
2968                     // srcline->valign_dy sets the baseline, except in a few specific cases
2969                     // word->y has to be set to where the baseline should be
2970                     // For vertical-align: top or bottom, delay computation as we need to
2971                     // know the final frmline height and baseline, which might change
2972                     // with upcoming words.
2973                     if ( vertical_align_flag == LTEXT_VALIGN_TOP ) {
2974                         // was (before we delayed computation):
2975                         // word->y = top_to_baseline - frmline->baseline;
2976                         adjust_line_box = false;
2977                         delayed_valign_computation = true;
2978                         word->flags |= LTEXT_WORD_VALIGN_TOP;
2979                         if ( strut_confined )
2980                             word->flags |= LTEXT_WORD_STRUT_CONFINED;
2981                         word->_top_to_baseline = top_to_baseline;
2982                         word->_baseline_to_bottom = baseline_to_bottom;
2983                         word->y = top_to_baseline;
2984                     }
2985                     else if ( vertical_align_flag == LTEXT_VALIGN_BOTTOM ) {
2986                         // was (before we delayed computation):
2987                         // word->y = frmline->height - frmline->baseline;
2988                         adjust_line_box = false;
2989                         delayed_valign_computation = true;
2990                         word->flags |= LTEXT_WORD_VALIGN_BOTTOM;
2991                         if ( strut_confined )
2992                             word->flags |= LTEXT_WORD_STRUT_CONFINED;
2993                         word->_top_to_baseline = top_to_baseline;
2994                         word->_baseline_to_bottom = baseline_to_bottom;
2995                         word->y = - baseline_to_bottom;
2996                     }
2997                     else if ( vertical_align_flag == LTEXT_VALIGN_TEXT_TOP ) {
2998                         // srcline->valign_dy has been set to where top of image or box should be
2999                         word->y = srcline->valign_dy + top_to_baseline;
3000                     }
3001                     else if ( vertical_align_flag == LTEXT_VALIGN_TEXT_BOTTOM ) {
3002                         // srcline->valign_dy has been set to where bottom of image or box should be
3003                         word->y = srcline->valign_dy - baseline_to_bottom;
3004                     }
3005                     else if ( vertical_align_flag == LTEXT_VALIGN_MIDDLE ) {
3006                         // srcline->valign_dy has been set to where the middle of image or box should be
3007                         word->y = srcline->valign_dy - (top_to_baseline + baseline_to_bottom)/2 + top_to_baseline;
3008                     }
3009                     else { // otherwise, align baseline according to valign_dy (computed in lvrend.cpp)
3010                         word->y = srcline->valign_dy;
3011                     }
3012 
3013                     // Inline image or inline-block: ensure any "page-break-before/after: avoid"
3014                     // specified on them (the specs say those apply to "block-level elements
3015                     // in the normal flow of the root element. User agents may also apply it
3016                     // to other elements like table-row elements", so it's mostly assumed that
3017                     // they won't apply on inline elements and we'll never meet them - but as
3018                     // it doesn't say we should not, let's ensure them if provided - and
3019                     // only "avoid" as it may have some purpose to stick a full-width image
3020                     // or inline-block to the previous or next line).
3021                     ldomNode * node = (ldomNode *) srcline->object;
3022                     if ( node && srcline->flags & LTEXT_SRC_IS_INLINE_BOX ) {
3023                         // We have not propagated page_break styles from the original
3024                         // inline-block to its inlineBox wrapper
3025                         node = node->getChildNode(0);
3026                     }
3027                     if ( node ) {
3028                         css_style_ref_t style = node->getStyle();
3029                         if ( style->page_break_before == css_pb_avoid )
3030                             frmline->flags |= LTEXT_LINE_SPLIT_AVOID_BEFORE;
3031                         if ( style->page_break_after == css_pb_avoid )
3032                             frmline->flags |= LTEXT_LINE_SPLIT_AVOID_AFTER;
3033                     }
3034                 }
3035                 else {
3036                     // word
3037                     // wstart points to the previous first non-space char
3038                     // i points to a non-space char that will be in next word
3039                     // i-1 may be a space, or not (when different html tag/text nodes stuck to each other)
3040                     word->flags = 0;
3041 
3042                     // Handle vertical positioning of this word
3043                     LVFont * font = (LVFont*)srcline->t.font;
3044                     int vertical_align_flag = srcline->flags & LTEXT_VALIGN_MASK;
3045                     int line_height = srcline->interval;
3046                     int fh = font->getHeight();
3047                     if ( strut_confined && line_height > m_pbuffer->strut_height ) {
3048                         // If we'll be confining text inside the strut, get rid of any
3049                         // excessive line-height for the following computations).
3050                         // But we should keep it at least fh so drawn text doesn't
3051                         // overflow the box we'll try to confine into the strut.
3052                         line_height = fh > m_pbuffer->strut_height ? fh : m_pbuffer->strut_height;
3053                     }
3054                     // As we do only +/- arithmetic, the following values being negative should be fine.
3055                     // Accounts for line-height (adds what most documentation calls half-leading to top
3056                     // and to bottom  - note that "leading" is a typography term referring to "lead" the
3057                     // metal, and not to lead/leader/head/header - so the half use for bottom should not
3058                     // be called half-tailing :):
3059                     int half_leading = (line_height - fh) / 2;
3060                     int half_leading_bottom = line_height - fh - half_leading;
3061                     top_to_baseline = font->getBaseline() + half_leading;
3062                     baseline_to_bottom = line_height - top_to_baseline;
3063                     // For vertical-align: top or bottom, delay computation as we need to
3064                     // know the final frmline height and baseline, which might change
3065                     // with upcoming words.
3066                     if ( vertical_align_flag == LTEXT_VALIGN_TOP ) {
3067                         // was (before we delayed computation):
3068                         // word->y = font->getBaseline() - frmline->baseline + half_leading;
3069                         adjust_line_box = false;
3070                         delayed_valign_computation = true;
3071                         word->flags |= LTEXT_WORD_VALIGN_TOP;
3072                         if ( strut_confined )
3073                             word->flags |= LTEXT_WORD_STRUT_CONFINED;
3074                         word->_top_to_baseline = top_to_baseline;
3075                         word->_baseline_to_bottom = baseline_to_bottom;
3076                         word->y = font->getBaseline() + half_leading;
3077                     }
3078                     else if ( vertical_align_flag == LTEXT_VALIGN_BOTTOM ) {
3079                         // was (before we delayed computation):
3080                         // word->y = frmline->height - fh + font->getBaseline() - frmline->baseline - half_leading;
3081                         adjust_line_box = false;
3082                         delayed_valign_computation = true;
3083                         word->flags |= LTEXT_WORD_VALIGN_BOTTOM;
3084                         if ( strut_confined )
3085                             word->flags |= LTEXT_WORD_STRUT_CONFINED;
3086                         word->_top_to_baseline = top_to_baseline;
3087                         word->_baseline_to_bottom = baseline_to_bottom;
3088                         word->y = - fh + font->getBaseline() - half_leading_bottom;
3089                     }
3090                     else {
3091                         // For others, vertical-align computation is done in lvrend.cpp renderFinalBlock()
3092                         word->y = srcline->valign_dy;
3093                     }
3094                     // printf("baseline_to_bottom=%d top_to_baseline=%d word->y=%d txt=|%s|\n", baseline_to_bottom,
3095                     //   top_to_baseline, word->y, UnicodeToLocal(lString32(srcline->t.text, srcline->t.len)).c_str());
3096 
3097                     // Set word start and end (start+len-1) indices in the source text node
3098                     if ( !m_has_bidi ) {
3099                         // No bidi, everything is linear
3100                         word->t.start = m_charindex[wstart];
3101                         word->t.len = i - wstart;
3102                     }
3103                     else if ( m_flags[wstart] & LCHAR_IS_RTL ) {
3104                         // Bidi and first char RTL.
3105                         // As we split on bidi level change, the full word is RTL.
3106                         // As we split on src text fragment, we are sure all chars
3107                         // are in the same text node.
3108                         // charindex may have been reordered, and may not be sync'ed with wstart/i-1,
3109                         // but it is linearly decreasing between i-1 and wstart
3110                         word->t.start = m_charindex[i-1];
3111                         word->t.len = m_charindex[wstart] - m_charindex[i-1] + 1;
3112                         word->flags |= LTEXT_WORD_DIRECTION_IS_RTL; // Draw glyphs in reverse order
3113                         #if (USE_FRIBIDI==1)
3114                         // If not using Harfbuzz, procede to mirror parens & al (don't
3115                         // do that if Harfbuzz is used, as it does that by itself, and
3116                         // would mirror back our mirrored chars!)
3117                         if ( font->getShapingMode() != SHAPING_MODE_HARFBUZZ) {
3118                             lChar32 * str = (lChar32*)(srcline->t.text + word->t.start);
3119                             FriBidiChar mirror;
3120                             for (int i=0; i < word->t.len; i++) {
3121                                 if ( fribidi_get_mirror_char( (FriBidiChar)(str[i]), &mirror) )
3122                                     str[i] = (lChar32)mirror;
3123                             }
3124                         }
3125                         #endif
3126                     }
3127                     else {
3128                         // Bidi and first char LTR. Same comments as above, except for last one:
3129                         // it is linearly increasing between wstart and i-1
3130                         word->t.start = m_charindex[wstart];
3131                         word->t.len = m_charindex[i-1] + 1 - m_charindex[wstart];
3132                     }
3133 
3134                     // Flag word that are the start of a link (for in-page footnotes)
3135                     if ( word->t.start==0 && srcline->flags & LTEXT_IS_LINK ) {
3136                         word->flags |= LTEXT_WORD_IS_LINK_START;
3137                         // todo: we might miss some links if the source text starts with a space
3138                     }
3139 
3140                     // Below this are stuff that could be skipped if light_formatting
3141                     // (We need bidi and the above adjustment only to get correctly ordered
3142                     // in-page footnotes links.)
3143 
3144                     // For Harfbuzz, which may shape differently words at start or end of paragraph.
3145                     // todo: this is probably wrong if some multi bidi levels re-ordering has been done
3146                     if ( first ) { // first line of paragraph
3147                         if ( m_para_dir_is_rtl ? lastWord : firstWord )
3148                             word->flags |= LTEXT_WORD_BEGINS_PARAGRAPH;
3149                     }
3150                     if ( last ) { // last line of paragraph
3151                         if ( m_para_dir_is_rtl ? firstWord : lastWord )
3152                             word->flags |= LTEXT_WORD_ENDS_PARAGRAPH;
3153                     }
3154                     if ( trustDirection)
3155                         word->flags |= LTEXT_WORD_DIRECTION_KNOWN;
3156 
3157                     // We need to compute how many glyphs can have letter_spacing added, that
3158                     // might be done in alignLine() (or not). We have to do it now even if
3159                     // not used, as we won't have that information anymore in alignLine().
3160                     word->added_letter_spacing = 0;
3161                     word->distinct_glyphs = word->t.len; // start with all chars are distinct glyphs
3162                     bool seen_non_space = false;
3163                     int tailing_spaces = 0;
3164                     for ( int j=i-1; j >= wstart; j-- ) {
3165                         if ( m_flags[j] & LCHAR_LOCKED_SPACING ) {
3166                             // A single char flagged with this makes the whole word non tweakable
3167                             word->distinct_glyphs = 0;
3168                             tailing_spaces = 0; // prevent tailing spaces correction
3169                             break;
3170                         }
3171                         if ( !seen_non_space && (m_flags[j] & LCHAR_IS_SPACE) ) {
3172                             // We'd rather not include the space that ends most words.
3173                             word->distinct_glyphs--;
3174                             // But some words can be made of a single space, that we'd rather
3175                             // not ignore when adjusting spacing.
3176                             tailing_spaces++;
3177                             continue;
3178                         }
3179                         seen_non_space = true;
3180                         if ( m_flags[j] & (LCHAR_IS_CLUSTER_TAIL|LCHAR_IS_COLLAPSED_SPACE|LCHAR_IS_TO_IGNORE) ) {
3181                             word->distinct_glyphs--;
3182                         }
3183                     }
3184                     if ( !seen_non_space && tailing_spaces ) {
3185                         word->distinct_glyphs += tailing_spaces;
3186                     }
3187 
3188                     // If we're asked to fit glyphs (avoid glyphs from overflowing line edges and
3189                     // on neighbour text nodes), we might need to tweak words x and width
3190                     bool fit_glyphs = srcline->flags & LTEXT_FIT_GLYPHS;
3191 
3192                     if ( fit_glyphs && !firstWord && prev_word_is_object ) {
3193                         int lsb = font->getLeftSideBearing(m_text[wstart]);
3194                         if ( lsb < 0 ) {
3195                             // Prev word was an image or inline box: avoid first glyph
3196                             // from overflowing in it by shifting this new word start
3197                             // on the right
3198                             frmline->width += -lsb;
3199                         }
3200                     }
3201 
3202                     if ( firstWord && (align == LTEXT_ALIGN_LEFT || align == LTEXT_ALIGN_WIDTH) ) {
3203                         // Adjust line start x if needed
3204                         // No need to do it when line is centered or right aligned (doing so
3205                         // might increase the line width and change space widths for no reason).
3206                         // We currently have no chance to get an added hyphen for hyphenation
3207                         // at start of line, as we handle only hyphenation with LTR text.
3208                         // It feels we have to do it even for the first line with text-indent,
3209                         // as some page might have multiple consecutive single lines that can
3210                         // benefit from hanging so the margin looks clean too.
3211                         int lsb = font->getLeftSideBearing(m_text[wstart]);
3212                         int left_overflow = lsb < 0 ? -lsb : 0;
3213                         if ( fit_glyphs ) {
3214                             // We don't want any part of the glyph to overflow in the left margin.
3215                             // We correct only overflows - keeping underflows (so, not having
3216                             // the glyph blackbox really fit the edge) respects the natural
3217                             // alignment.
3218                             // We also prevent hanging punctuation as it de facto overflows.
3219                             // (We used to correct it only for italic fonts, where "J" or "f"
3220                             // can have have huge negative overflow for their part below baseline
3221                             // and so leak on the left. On the left, we were also correcting
3222                             // underflows, so fitting italic glyphs to the left edge - but we
3223                             // don't anymore as it doesn't really feel needed.)
3224                             frmline->x += left_overflow; // so that the glyph's overflow is at original frmline->x
3225                             // printf("%c lsb=%d\n", m_text[wstart], font->getLeftSideBearing(m_text[wstart]));
3226                         }
3227                         else {
3228                             // We prevent hanging punctuation on the common opening quotation marks
3229                             // or dashes that we flagged with LCHAR_LOCKED_SPACING (most of these
3230                             // are characters that can hang) - and on fully-pre lines and when
3231                             // the font is monospace.
3232                             // Note that some CJK fonts might have full-width glyphs for some of our
3233                             // common hanging chars, but not for others, and this might look bad with
3234                             // them, and different whether it is used as the main font or as a fallback.
3235                             // (Noto Sans CJK SC has full-width glyphs for single or double quotation
3236                             // marks (‘ ’ “ ”), but not for all our other hanging chars.)
3237                             // Reducing CJK half-blank full-width glyphs's width should be handled
3238                             // more generically elsewhere.
3239                             // We try to avoid hanging these with some heuristic below.
3240                             bool allow_hanging = m_hanging_punctuation &&
3241                                                  !preFormattedOnly &&
3242                                                  !(m_flags[wstart] & LCHAR_LOCKED_SPACING) &&
3243                                                  font->getFontFamily() != css_ff_monospace;
3244                             int shift_x = 0;
3245                             if ( allow_hanging ) {
3246                                 bool check_font;
3247                                 int percent = srcline->lang_cfg->getHangingPercent(false, check_font, m_text, wstart, end-wstart-1);
3248                                 if ( percent && check_font && left_overflow > 0 ) {
3249                                     // Some fonts might already have enough negative
3250                                     // left side bearing for some chars, that would
3251                                     // make them naturally hang on the left.
3252                                     percent = 0;
3253                                 }
3254                                 if ( percent ) {
3255                                     int first_char_width = m_widths[wstart] - (wstart>0 ? m_widths[wstart-1] : 0);
3256                                     shift_x = first_char_width * percent / 100;
3257                                     if ( shift_x == 0 ) // Force at least 1px if division rounded it to 0
3258                                         shift_x = 1;
3259                                     // Cancel it if this char looks like it might be full-width
3260                                     // (0.9 * font size, in case HarfBuzz has reduced the advance)
3261                                     // and it has a lot of positive left side bearing (left half
3262                                     // of the glyph blank) - see above.
3263                                     if ( first_char_width > 0.9 * font->getSize() && lsb > 0.4 * first_char_width ) {
3264                                         shift_x = 0;
3265                                     }
3266                                 }
3267                             }
3268                             if ( shift_x - lsb > usable_left_overflow ) {
3269                                 shift_x = usable_left_overflow + lsb;
3270                             }
3271                             frmline->x -= shift_x;
3272                         }
3273                     }
3274 
3275                     // Word x position on line: for now, we just stack words after each other.
3276                     // They will be adjusted if needed in alignLine()
3277                     word->x = frmline->width;
3278 
3279                     // Set and adjust word natural width (and min_width which might be used in alignLine())
3280                     word->width = m_widths[i>0 ? i-1 : 0] - (wstart>0 ? m_widths[wstart-1] : 0);
3281                     word->min_width = word->width;
3282                     TR("addLine - word(%d, %d) x=%d (%d..%d)[%d] |%s|", wstart, i, frmline->width, wstart>0 ? m_widths[wstart-1] : 0, m_widths[i-1], word->width, LCSTR(lString32(m_text+wstart, i-wstart)));
3283                     if ( m_flags[wstart] & LCHAR_IS_CLUSTER_TAIL ) {
3284                         // The start of this word is part of a ligature that started
3285                         // in a previous word: some hyphenation wrap happened on
3286                         // this ligature, which will not be rendered as such.
3287                         // We are the second part of the hyphenated word, and our first
3288                         // char(s) have a width of 0 (for being part of the ligature):
3289                         // we need to re-measure this half of the original word.
3290                         int new_width;
3291                         if ( measureWord(word, new_width) ) {
3292                             word->width = new_width;
3293                             word->min_width = word->width;
3294                         }
3295                     }
3296                     if ( m_flags[i-1] & LCHAR_ALLOW_HYPH_WRAP_AFTER ) {
3297                         if ( m_flags[i] & LCHAR_IS_CLUSTER_TAIL ) {
3298                             // The end of this word is part of a ligature that, because
3299                             // of hyphenation, has been splitted onto next word.
3300                             // We are the first part of the hyphenated word, and
3301                             // our last char(s) have been assigned the width of the
3302                             // ligature glyph, which will not be rendered as such:
3303                             // we need to re-measure this half of the original word.
3304                             int new_width;
3305                             if ( measureWord(word, new_width) ) {
3306                                 word->width = new_width;
3307                             }
3308                         }
3309                         word->width += font->getHyphenWidth();
3310                         word->min_width = word->width;
3311                         word->flags |= LTEXT_WORD_CAN_HYPH_BREAK_LINE_AFTER;
3312                     }
3313 
3314                     bool preformatted = srcline->flags & LTEXT_FLAG_PREFORMATTED;
3315                     if ( m_flags[i-1] & LCHAR_IS_SPACE ) {
3316                         // Current word ends with a space.
3317                         // Each word ending with a space (except in some conditions) can
3318                         // have its width reduced by a fraction of this space width or
3319                         // increased if needed (for text justification), so actually
3320                         // making that space larger or smaller.
3321                         // Note: checking if the first word of first line is one of the
3322                         // common opening quotation marks or dashes is done in measureText(),
3323                         // to have it work also with BiDi/RTL text (checking that here
3324                         // would be too late, as reordering has been done).
3325                         if ( !(m_flags[i-1] & LCHAR_LOCKED_SPACING) ) {
3326                             word->flags |= LTEXT_WORD_CAN_ADD_SPACE_AFTER;
3327                             int dw = getMaxCondensedSpaceTruncation(i-1);
3328                             if (dw>0) {
3329                                 word->min_width = word->width - dw;
3330                             }
3331                         }
3332                         if ( lastWord && !preformatted ) {
3333                             // If last word of line, remove any trailing space
3334                             // from word's width (but not with preformatted, in
3335                             // case of text-align:right where we don't want to
3336                             // lose any trailing space)
3337                             word->width = m_widths[i>1 ? i-2 : 0] - (wstart>0 ? m_widths[wstart-1] : 0);
3338                             word->min_width = word->width;
3339                         }
3340                     }
3341                     else if ( !firstWord && m_flags[wstart] & LCHAR_IS_SPACE ) {
3342                         // Current word starts with a space (looks like this should not happen):
3343                         // we can increase the space between previous word and this one if needed
3344                         //if ( word->t.len<2 || m_text[i-1]!=UNICODE_NO_BREAK_SPACE || m_text[i-2]!=UNICODE_NO_BREAK_SPACE)
3345                         //if ( m_text[wstart]==UNICODE_NO_BREAK_SPACE && m_text[wstart+1]==UNICODE_NO_BREAK_SPACE)
3346                         //    CRLog::trace("Double nbsp text[-1]=%04x", m_text[wstart-1]);
3347                         //else
3348                         frmline->words[frmline->word_count-2].flags |= LTEXT_WORD_CAN_ADD_SPACE_AFTER;
3349                     }
3350                     else if ( !firstWord && isCJKIdeograph(m_text[i]) ) {
3351                         // Current word is a CJK char: we can increase the space
3352                         // between previous word and this one if needed
3353                         frmline->words[frmline->word_count-2].flags |= LTEXT_WORD_CAN_ADD_SPACE_AFTER;
3354                     }
3355                     // if ( m_flags[i-1] & LCHAR_ALLOW_WRAP_AFTER )
3356                     //     word->flags |= LTEXT_WORD_CAN_BREAK_LINE_AFTER; // not used anywhere
3357 
3358                     if ( lastWord && (align == LTEXT_ALIGN_RIGHT || align == LTEXT_ALIGN_WIDTH) ) {
3359                         // Adjust line end if needed.
3360                         // If we need to adjust last word's last char, we need to put the delta
3361                         // in this word->width, which will make it into frmline->width.
3362 
3363                         // Find the real last drawn glyph
3364                         int lastnonspace = i-1;
3365                         for ( int k=i-1; k>=wstart; k-- ) {
3366                             if ( !(m_flags[k] & LCHAR_IS_SPACE) ) {
3367                                 lastnonspace = k;
3368                                 break;
3369                             }
3370                         }
3371                         bool ends_with_hyphen = m_flags[lastnonspace] & LCHAR_ALLOW_HYPH_WRAP_AFTER;
3372                         int rsb = 0; // don't bother with hyphen rsb, which can't overflow
3373                         int right_overflow = 0;
3374                         if ( !ends_with_hyphen ) {
3375                             rsb = font->getRightSideBearing(m_text[lastnonspace]);
3376                             if ( rsb < 0 )
3377                                 right_overflow = -rsb;
3378                         }
3379                         if ( fit_glyphs ) {
3380                             // We don't want any part of the glyph to overflow in the right margin.
3381                             // (We used to correct it only for italic fonts, where "J" or "f"
3382                             // can have have huge negative overflow for their part above baseline
3383                             // and so leak on the right. We were previously also correcting only
3384                             // overflows and not underflows.)
3385                             word->width += right_overflow;
3386                         }
3387                         else {
3388                             // We prevent hanging punctuation in a few cases (see above)
3389                             bool allow_hanging = m_hanging_punctuation &&
3390                                                  !preFormattedOnly &&
3391                                                  font->getFontFamily() != css_ff_monospace;
3392                             int shift_w = 0;
3393                             if ( allow_hanging ) {
3394                                 if ( ends_with_hyphen ) {
3395                                     int percent = srcline->lang_cfg->getHyphenHangingPercent();
3396                                     if ( percent ) {
3397                                         shift_w = font->getHyphenWidth() * percent / 100;
3398                                         if ( shift_w == 0 ) // Force at least 1px if division rounded it to 0
3399                                             shift_w = 1;
3400                                     }
3401                                     // Note: some part of text in bold or in a bigger font size inside
3402                                     // a paragraph may stand out more than the regular text, and this
3403                                     // is quite noticable with the hyphen.
3404                                     // We might want to limit or force hyphen hanging to what it should
3405                                     // be with the main paragraph font, but that might not work well in
3406                                     // some situations.
3407                                     // See https://github.com/koreader/crengine/pull/355#issuecomment-656760791
3408                                 }
3409                                 else {
3410                                     bool check_font;
3411                                     int percent = srcline->lang_cfg->getHangingPercent(true, check_font, m_text, lastnonspace, end-lastnonspace-1);
3412                                     if ( percent && check_font && right_overflow > 0 ) {
3413                                         // Some fonts might already have enough negative
3414                                         // right side bearing for some chars, that would
3415                                         // make them naturally hang on the right.
3416                                         percent = 0;
3417                                     }
3418                                     if ( percent ) {
3419                                         int last_char_width = m_widths[lastnonspace] - (lastnonspace>0 ? m_widths[lastnonspace-1] : 0);
3420                                         shift_w = last_char_width * percent / 100;
3421                                         if ( shift_w == 0 ) // Force at least 1px if division rounded it to 0
3422                                             shift_w = 1;
3423                                         // Cancel it if this char looks like it might be full-width
3424                                         // (0.9 * font size, in case HarfBuzz has reduced the advance)
3425                                         // and it has a lot of positive right side bearing (right half
3426                                         // of the glyph blank) - see comment above in 'firstWord' handling.
3427                                         if ( last_char_width > 0.9 * font->getSize() && rsb > 0.4 * last_char_width ) {
3428                                             shift_w = 0;
3429                                         }
3430                                     }
3431                                 }
3432                             }
3433                             if ( shift_w - rsb > usable_right_overflow ) {
3434                                 shift_w = usable_right_overflow + rsb;
3435                             }
3436                             word->width -= shift_w;
3437                         }
3438                     }
3439 
3440                     // This is set or used only when LTEXT_FIT_GLYPHS
3441                     prev_word_is_object = false;
3442                     prev_word_overflow = 0;
3443                     if ( fit_glyphs && !lastWord ) {
3444                         int rsb = font->getRightSideBearing(m_text[i-1]);
3445                         if ( rsb < 0 ) {
3446                             // This may be added to shit word width if next
3447                             // word is an image or an inline box
3448                             prev_word_overflow = -rsb;
3449                         }
3450                     }
3451 
3452                     /* Hanging punctuation (with CJK specifics) old code:
3453                      *
3454                     bool visualAlignmentEnabled = m_hanging_punctuation && (align != LTEXT_ALIGN_CENTER);
3455                     if ( visualAlignmentEnabled && lastWord ) { // if floating punctuation enabled
3456                         int endp = i-1;
3457                         int lastc = m_text[endp];
3458                         int wAlign = font->getVisualAligmentWidth();
3459                         word->width += wAlign/2;
3460                         while ( (m_flags[endp] & LCHAR_IS_SPACE) && endp>0 ) { // || lastc=='\r' || lastc=='\n'
3461                             word->width -= m_widths[endp] - m_widths[endp-1];
3462                             endp--;
3463                             lastc = m_text[endp];
3464                         }
3465                         // We reduce the word width from the hanging char width, so it's naturally pushed
3466                         // outside in the margin by the alignLine code
3467                         if ( word->flags & LTEXT_WORD_CAN_HYPH_BREAK_LINE_AFTER ) {
3468                             word->width -= font->getHyphenWidth(); // TODO: strange fix - need some other solution
3469                         }
3470                         else if ( lastc=='.' || lastc==',' || lastc=='!' || lastc==':' || lastc==';' || lastc=='?') {
3471                             FONT_GUARD
3472                             int w = font->getCharWidth(lastc);
3473                             TR("floating: %c w=%d", lastc, w);
3474                             if (frmline->width + w + wAlign + x >= maxWidth)
3475                                 word->width -= w; //fix russian "?" at line end
3476                         }
3477                         else if ( lastc==0x2019 || lastc==0x201d ||   // ’ ” right quotation marks
3478                                   lastc==0x3001 || lastc==0x3002 ||   // 、 。 ideographic comma and full stop
3479                                   lastc==0x300d || lastc==0x300f ||   // 」 』 ideographic right bracket
3480                                   lastc==0xff01 || lastc==0xff0c ||   // ! , fullwidth ! and ,
3481                                   lastc==0xff1a || lastc==0xff1b ) {  // : ; fullwidth : and ;
3482                             FONT_GUARD
3483                             int w = font->getCharWidth(lastc);
3484                             if (frmline->width + w + wAlign + x >= maxWidth)
3485                                 word->width -= w;
3486                             else if (w!=0) {
3487                                 // (This looks like some awkward way of detecting if the line
3488                                 // is made out of solely same-fixed-width CJK ideographs,
3489                                 // which will fail if there's enough variable-width western
3490                                 // chars to fail the rounded division vs nb of char comparison.)
3491                                 if (end - start == int((maxWidth - wAlign) / w))
3492                                     word->width -= w; // Chinese floating punctuation
3493                                 else if (x/w >= 1 && (end-start==int(maxWidth-wAlign-x)/w)-1)
3494                                     word->width -= w; // first line with text-indent
3495                             }
3496                         }
3497                         if (frmline->width!=0 && last && align!=LTEXT_ALIGN_CENTER) {
3498                             // (Chinese) add spaces between words in last line or single line
3499                             // (so they get visually aligned on a grid with the char on the
3500                             // previous justified lines)
3501                             FONT_GUARD
3502                             int properwordcount = maxWidth/font->getSize() - 2;
3503                             int extraSpace = maxWidth - properwordcount*font->getSize() - wAlign;
3504                             int exccess = (frmline->width + x + word->width + extraSpace) - maxWidth;
3505                             if ( exccess>0 && exccess<maxWidth ) { // prevent the line exceeds screen boundary
3506                                 extraSpace -= exccess;
3507                             }
3508                             if ( extraSpace>0 ) {
3509                                 int addSpacePoints = 0;
3510                                 int a;
3511                                 int points=0;
3512                                 for ( a=0; a<(int)frmline->word_count-1; a++ ) {
3513                                     if ( frmline->words[a].flags & LTEXT_WORD_CAN_ADD_SPACE_AFTER )
3514                                         points++;
3515                                 }
3516                                 addSpacePoints = properwordcount - (frmline->word_count - 1 - points);
3517                                 if (addSpacePoints > 0) {
3518                                     int addSpaceDiv = extraSpace / addSpacePoints;
3519                                     int addSpaceMod = extraSpace % addSpacePoints;
3520                                     int delta = 0;
3521                                     for (a = 0; a < (int) frmline->word_count; a++) {
3522                                         frmline->words[a].x +=  delta;
3523                                         {
3524                                             delta += addSpaceDiv;
3525                                             if (addSpaceMod > 0) {
3526                                                 addSpaceMod--;
3527                                                 delta++;
3528                                             }
3529                                         }
3530                                     }
3531                                 }
3532                             }
3533                             word->width+=extraSpace;
3534                         }
3535                         if ( first && font->getSize()!=0 && (maxWidth/font->getSize()-2)!=0 ) {
3536                             // proportionally enlarge text-indent when visualAlignment or
3537                             // floating punctuation is enabled
3538                             FONT_GUARD
3539                             int cnt = ((x-wAlign/2)%font->getSize()==0) ? (x-wAlign/2)/font->getSize() : 0;
3540                                 // ugly way to caculate text-indent value, I can not get text-indent from here
3541                             int p = cnt*(cnt+1)/2;
3542                             int asd = (2*font->getSize()-font->getCharWidth(lastc)) / (maxWidth/font->getSize()-2);
3543                             int width = p*asd + cnt; //same math as delta above
3544                             if (width>0)
3545                                 frmline->x+=width;
3546                         }
3547                         word->min_width = word->width;
3548                     } // done if floating punctuation enabled
3549                     * End of old code for handling hanging punctuation
3550                     */
3551 
3552                     // printf("addLine - word(%d, %d) x=%d (%d..%d)[%d>%d %x] |%s|\n", wstart, i,
3553                     //      frmline->width, wstart>0 ? m_widths[wstart-1] : 0, m_widths[i-1], word->width,
3554                     //      word->min_width, word->flags, LCSTR(lString32(m_text+wstart, i-wstart)));
3555                 }
3556 
3557                 // Word added: adjust frmline height and baseline to account for this word
3558                 if ( adjust_line_box ) {
3559                     // Adjust full line box height and baseline if needed:
3560                     // frmline->height is the current line height
3561                     // frmline->baseline is the distance from line top to the main baseline of the line
3562                     // top_to_baseline (normally positive number) is the distance from this word top to its own baseline.
3563                     // baseline_to_bottom (normally positive number) is the descender below baseline for this word
3564                     // word->y is the distance from this word baseline to the line main baseline
3565                     //   it is positive when word is subscript, negative when word is superscript
3566                     //
3567                     // negative word->y means it's superscript, so the line's baseline might need to go
3568                     // down (increase) to make room for the superscript
3569                     int needed_baseline = top_to_baseline - word->y;
3570                     if ( needed_baseline > frmline->baseline ) {
3571                         // shift the line baseline and height by the amount needed at top
3572                         int shift_down = needed_baseline - frmline->baseline;
3573                         // if (frmline->baseline) printf("pushed down +%d\n", shift_down);
3574                         // if (frmline->baseline && srcline->object)
3575                         //     printf("%s\n", UnicodeToLocal(ldomXPointer((ldomNode*)srcline->object, 0).toString()).c_str());
3576                         if ( !strut_confined ) {
3577                             // move line away from the strut baseline
3578                             frmline->baseline += shift_down;
3579                             frmline->height += shift_down;
3580                         }
3581                         else { // except if "-cr-hint: strut-confined":
3582                             // Keep the strut, move the word down
3583                             word->y += shift_down;
3584                         }
3585                     }
3586                     // positive word->y means it's subscript, so the line's baseline does not need to be
3587                     // changed, but more room below might be needed to display the subscript: increase
3588                     // line height so next line is pushed down and dont overwrite the subscript
3589                     int needed_height = frmline->baseline + baseline_to_bottom + word->y;
3590                     if ( needed_height > frmline->height ) {
3591                         // printf("extended down +%d\n", needed_height-frmline->height);
3592                         if ( !strut_confined ) {
3593                             frmline->height = needed_height;
3594                         }
3595                         else { // except if "-cr-hint: strut-confined":
3596                             // We'd rather move the word up, but it shouldn't go
3597                             // above the top of the line, so it's not drawn over
3598                             // previous line text. If it's taller than line height,
3599                             // it's ok to have it overflow bottom: some part of
3600                             // it might be overwritten by next line, which we'd
3601                             // rather have fully readable.
3602                             word->y -= needed_height - frmline->height;
3603                             int top_dy = top_to_baseline - word->y - frmline->baseline;
3604                             if ( top_dy > 0 )
3605                                 word->y += top_dy;
3606                         }
3607                     }
3608                 }
3609 
3610                 frmline->width += word->width;
3611                 firstWord = false;
3612 
3613                 lastSrc = newSrc;
3614                 wstart = i;
3615             }
3616             lastIsSpace = isSpace;
3617             lastIsRTL = isRTL;
3618         }
3619         // All words added
3620 
3621         if ( delayed_valign_computation ) {
3622             // Delayed computation and line box adjustment when we have some words
3623             // (or images, or inline-boxes) with vertical-align: top or bottom.
3624             // First, see if we need to adjust frmline->baseline and frmline->height,
3625             // similarly as done above if adjust_line_box:
3626             for ( int i=0; i<frmline->word_count; i++ ) {
3627                 if ( frmline->words[i].flags & (LTEXT_WORD_VALIGN_TOP|LTEXT_WORD_VALIGN_BOTTOM) ) {
3628                     formatted_word_t * word = &frmline->words[i];
3629                     if ( word->flags & LTEXT_WORD_STRUT_CONFINED )
3630                         continue; // don't have such words affect current line height & baseline
3631                     // Update incomplete word->y with current frmline baseline & height,
3632                     // just as it would have been done if not delayed
3633                     int cur_word_y;
3634                     if ( word->flags & LTEXT_WORD_VALIGN_TOP )
3635                         cur_word_y = word->y - frmline->baseline;
3636                     else if ( word->flags & LTEXT_WORD_VALIGN_BOTTOM )
3637                         cur_word_y = word->y + frmline->height - frmline->baseline;
3638                     else // should not happen
3639                         cur_word_y = word->y;
3640                     int needed_baseline = word->_top_to_baseline - cur_word_y;
3641                     if ( needed_baseline > frmline->baseline ) {
3642                         // shift the line baseline and height by the amount needed at top
3643                         int shift_down = needed_baseline - frmline->baseline;
3644                         frmline->baseline += shift_down;
3645                         frmline->height += shift_down;
3646                     }
3647                     int needed_height = frmline->baseline + word->_baseline_to_bottom + cur_word_y;
3648                     if ( needed_height > frmline->height ) {
3649                         frmline->height = needed_height;
3650                     }
3651                 }
3652             }
3653             // Then, get the final word->y (baseline) that aligns the word to top or bottom of frmline
3654             for ( int i=0; i<frmline->word_count; i++ ) {
3655                 if ( frmline->words[i].flags & (LTEXT_WORD_VALIGN_TOP|LTEXT_WORD_VALIGN_BOTTOM) ) {
3656                     formatted_word_t * word = &frmline->words[i];
3657                     if ( word->flags & LTEXT_WORD_VALIGN_TOP ) {
3658                         word->y = word->y - frmline->baseline;
3659                     }
3660                     else if ( word->flags & LTEXT_WORD_VALIGN_BOTTOM ) {
3661                         word->y = word->y + frmline->height - frmline->baseline;
3662                     }
3663                     if ( word->flags & LTEXT_WORD_STRUT_CONFINED ) {
3664                         // If this word is taller than final line height,
3665                         // we'd rather have it overflows bottom.
3666                         int top_dy = word->_top_to_baseline - word->y - frmline->baseline;
3667                         if ( top_dy > 0 )
3668                             word->y += top_dy; // move it down
3669                     }
3670                 }
3671             }
3672         }
3673 
3674         if ( !light_formatting ) {
3675             // Fix up words position and width to ensure requested alignment and indent
3676             alignLine( frmline, align, rightIndent, hasInlineBoxes );
3677         }
3678 
3679         // Get ready for next line
3680         m_y += frmline->height;
3681         m_pbuffer->height = m_y;
3682         checkOngoingFloat();
3683         positionDelayedFloats();
3684         #if (USE_FRIBIDI==1)
3685         if ( restore_last_width ) // bidi: restore last width to not mess with next line
3686             m_widths[end-1] = last_width_to_restore;
3687         #endif
3688     }
3689 
getMaxCondensedSpaceTruncation(int pos)3690     int getMaxCondensedSpaceTruncation(int pos) {
3691         if (pos<0 || pos>=m_length || !(m_flags[pos] & LCHAR_IS_SPACE))
3692             return 0;
3693         if (m_pbuffer->min_space_condensing_percent==100)
3694             return 0;
3695         int w = (m_widths[pos] - (pos > 0 ? m_widths[pos-1] : 0));
3696         int dw = w * (100 - m_pbuffer->min_space_condensing_percent) / 100;
3697         if ( dw>0 ) {
3698             // typographic rule: don't use spaces narrower than 1/4 of font size
3699             /* 20191126: disabled, to allow experimenting with lower %
3700             LVFont * fnt = (LVFont *)m_srcs[pos]->t.font;
3701             int fntBasedSpaceWidthDiv2 = fnt->getSize() * 3 / 4;
3702             if ( dw>fntBasedSpaceWidthDiv2 )
3703                 dw = fntBasedSpaceWidthDiv2;
3704             */
3705             return dw;
3706         }
3707         return 0;
3708     }
3709 
isCJKIdeograph(lChar32 c)3710     bool isCJKIdeograph(lChar32 c) {
3711         return c >= UNICODE_CJK_IDEOGRAPHS_BEGIN &&
3712                c <= UNICODE_CJK_IDEOGRAPHS_END   &&
3713                ( c <= UNICODE_CJK_PUNCTUATION_HALF_AND_FULL_WIDTH_BEGIN ||
3714                  c >= UNICODE_CJK_PUNCTUATION_HALF_AND_FULL_WIDTH_END );
3715     }
3716 
3717     #if (USE_LIBUNIBREAK!=1)
isCJKPunctuation(lChar32 c)3718     bool isCJKPunctuation(lChar32 c) {
3719         return ( c >= UNICODE_CJK_PUNCTUATION_BEGIN && c <= UNICODE_CJK_PUNCTUATION_END ) ||
3720                ( c >= UNICODE_GENERAL_PUNCTUATION_BEGIN && c <= UNICODE_GENERAL_PUNCTUATION_END &&
3721                     c!=0x2018 && c!=0x201a && c!=0x201b &&    // ‘ ‚ ‛  left quotation marks
3722                     c!=0x201c && c!=0x201e && c!=0x201f &&    // “ „ ‟  left double quotation marks
3723                     c!=0x2035 && c!=0x2036 && c!=0x2037 &&    // ‵ ‶ ‷ reversed single/double/triple primes
3724                     c!=0x2039 && c!=0x2045 && c!=0x204c  ) || // ‹ ⁅ ⁌ left angle quot mark, bracket, bullet
3725                ( c >= UNICODE_CJK_PUNCTUATION_HALF_AND_FULL_WIDTH_BEGIN &&
3726                  c <= UNICODE_CJK_PUNCTUATION_HALF_AND_FULL_WIDTH_END ) ||
3727                ( c == 0x00b7 ); // · middle dot
3728     }
3729 
isCJKLeftPunctuation(lChar32 c)3730     bool isCJKLeftPunctuation(lChar32 c) {
3731         return c==0x2018 || c==0x201c || // ‘ “ left single and double quotation marks
3732                c==0x3008 || c==0x300a || c==0x300c || c==0x300e || c==0x3010 || // 〈 《 「 『 【 CJK left brackets
3733                c==0xff08; // ( fullwidth left parenthesis
3734     }
3735     #endif
3736 
isLeftPunctuation(lChar32 c)3737     bool isLeftPunctuation(lChar32 c) {
3738         // Opening quotation marks and dashes that we don't want a followup space to
3739         // have its width changed
3740         return ( c >= 0x2010 && c <= 0x2027 ) || // Hyphens, dashes, quotation marks, bullets...
3741                ( c >= 0x2032 && c <= 0x205E ) || // Primes, bullets...
3742                ( c >= 0x002A && c <= 0x002F ) || // Ascii * + , - . /
3743                  c == 0x00AB || c == 0x00BB   || // Quotation marks (including right pointing, for german text)
3744                  c == 0x0022 || c == 0x0027 || c == 0x0023; // Ascii " ' #
3745 
3746     }
3747 
3748     /// Split paragraph into lines
processParagraph(int start,int end,bool isLastPara)3749     void processParagraph( int start, int end, bool isLastPara )
3750     {
3751         TR("processParagraph(%d, %d)", start, end);
3752 
3753         // ensure buffer size is ok for paragraph
3754         allocate( start, end );
3755         // copy paragraph text to buffer
3756         copyText( start, end );
3757         // measure paragraph text
3758         measureText();
3759 
3760         // We keep as 'para' the first source text, as it carries
3761         // the text alignment to use with all added lines.
3762         src_text_fragment_t * para = &m_pbuffer->srctext[start];
3763 
3764         // detect case with inline preformatted text inside block with line feeds -- override align=left for this case
3765         bool preFormattedOnly = true;
3766         for ( int i=start; i<end; i++ ) {
3767             if ( !(m_pbuffer->srctext[i].flags & LTEXT_FLAG_PREFORMATTED) ) {
3768                 preFormattedOnly = false;
3769                 break;
3770             }
3771         }
3772         if ( preFormattedOnly ) {
3773             bool lfFound = false;
3774             for ( int i=0; i<m_length; i++ ) {
3775                 if ( m_text[i]=='\n' ) {
3776                     lfFound = true;
3777                     break;
3778                 }
3779             }
3780             preFormattedOnly = preFormattedOnly && lfFound;
3781         }
3782 
3783         // Not per-specs, but when floats reduce the available width, skip y until
3784         // we have the width to draw at least a few chars on a line.
3785         // We use N x strut_height because it's one easily acccessible font metric here.
3786         int minWidth = 3 * m_pbuffer->strut_height;
3787 
3788         // split paragraph into lines, export lines
3789         int pos = 0;
3790         #if (USE_LIBUNIBREAK!=1)
3791         int upSkipPos = -1;
3792         #endif
3793 
3794         // Note: we no longer adjust here x and width to account for first or
3795         // last italic glyphs side bearings or hanging punctuation, as here,
3796         // we're still just walking the text in logical order, which might
3797         // be re-ordered when BiDi.
3798         // We'll handle that in AddLine() where we'll make words in visual
3799         // order; the small shifts we might have on the final width vs the
3800         // width measured here will hopefully be compensated on the space chars.
3801 
3802         while ( pos<m_length ) { // each loop makes a line
3803             // x is this line indent. We use it like a x coordinates below, but
3804             // we'll use it on the right in addLine() if para is RTL.
3805             int x;
3806             if (para->flags & LTEXT_LEGACY_RENDERING) {
3807                 x = para->indent > 0 ? (pos == 0 ? para->indent : 0 ) : (pos==0 ? 0 : -para->indent);
3808             } else {
3809                 x = m_indent_current;
3810                 if ( !m_indent_first_line_done ) {
3811                     m_indent_first_line_done = true;
3812                     m_indent_current = m_indent_after_first_line;
3813                 }
3814             }
3815             int w0 = pos>0 ? m_widths[pos-1] : 0; // measured cumulative width at start of this line
3816             int lastNormalWrap = -1;
3817             int lastDeprecatedWrap = -1; // Different usage whether USE_LIBUNIBREAK or not (see below)
3818             int lastHyphWrap = -1;
3819             int lastMandatoryWrap = -1;
3820             int spaceReduceWidth = 0; // max total line width which can be reduced by narrowing of spaces
3821             int firstInlineBoxPos = -1;
3822 
3823             int maxWidth = getCurrentLineWidth();
3824             if (maxWidth <= minWidth) {
3825                 // Find y with available minWidth
3826                 int unused_x;
3827                 // We need to provide a height to find some width available over
3828                 // this height, but we don't know yet the height of text (that
3829                 // may have some vertical-align or use a bigger font) or images
3830                 // that will end up on this line (line height is handled later,
3831                 // by AddLine()), we can only ask for the only height we know
3832                 // about: m_pbuffer->strut_height...
3833                 // todo: find a way to be sure or react to that
3834                 int new_y = getYWithAvailableWidth(m_y, minWidth, m_pbuffer->strut_height, unused_x);
3835                 fillAndMoveToY( new_y );
3836                 maxWidth = getCurrentLineWidth();
3837             }
3838 
3839             // Find candidates where end of line is possible
3840             bool seen_non_collapsed_space = false;
3841             bool seen_first_rendered_char = false;
3842             int i;
3843             for ( i=pos; i<m_length; i++ ) {
3844                 if ( m_text[i]=='\n' ) { // might happen in <pre>formatted only (?)
3845                     lastMandatoryWrap = i;
3846                     break;
3847                 }
3848                 lUInt16 flags = m_flags[i];
3849                 if ( flags & LCHAR_IS_OBJECT ) {
3850                     if ( m_charindex[i] == FLOAT_CHAR_INDEX ) { // float
3851                         src_text_fragment_t * src = m_srcs[i];
3852                         // Not sure if we can be called again on the same LVFormatter
3853                         // object, but the whole code allows for re-formatting and
3854                         // they should give the same result.
3855                         // So, use a flag to not re-add already processed floats.
3856                         if ( !(src->flags & LTEXT_SRC_IS_FLOAT_DONE) ) {
3857                             int currentWidth = x + m_widths[i]-w0 - spaceReduceWidth;
3858                             addFloat( src, currentWidth );
3859                             src->flags |= LTEXT_SRC_IS_FLOAT_DONE;
3860                             maxWidth = getCurrentLineWidth();
3861                         }
3862                         // We don't set lastNormalWrap when collapsed spaces,
3863                         // so let's not for floats either.
3864                         // But we need to when the float is the last source (as
3865                         // done below, otherwise we would not update wrapPos and
3866                         // we'd get another ghost line, and this real last line
3867                         // might be wrongly justified).
3868                         if ( i==m_length-1 ) {
3869                             lastNormalWrap = i;
3870                         }
3871                         continue;
3872                     }
3873                     if ( m_charindex[i] == INLINEBOX_CHAR_INDEX && firstInlineBoxPos < 0 ) {
3874                         firstInlineBoxPos = i;
3875                     }
3876                 }
3877                 // We would not need to bother with LCHAR_IS_COLLAPSED_SPACE, as they have zero
3878                 // width and so can be grabbed here. They carry LCHAR_ALLOW_WRAP_AFTER just like
3879                 // a space, so they will set lastNormalWrap.
3880                 // But we don't want any collapsed space at start to make a new line if the
3881                 // following text is a long word that doesn't fit in the available width (which
3882                 // can happen in a small table cell). So, ignore them at start of line:
3883                 if (!seen_non_collapsed_space) {
3884                     if (flags & LCHAR_IS_COLLAPSED_SPACE)
3885                         continue;
3886                     seen_non_collapsed_space = true;
3887                 }
3888                 if ( !seen_first_rendered_char ) {
3889                     seen_first_rendered_char = true;
3890                     // First real non ignoreable char (collapsed spaces skipped):
3891                     // it might be a wide image or inlineBox. Check that we have
3892                     // enough current width to have it on this line, otherwise,
3893                     // move down until we find a y where it would fit (but only
3894                     // if we're sure we'll find some)
3895                     int needed_width = x + m_widths[i]-w0;
3896                     if ( needed_width > maxWidth && needed_width <= m_pbuffer->width ) {
3897                         // Find y with available needed_width
3898                         int unused_x;
3899                         // todo: provide the height of the image or inline-box
3900                         int new_y = getYWithAvailableWidth(m_y, needed_width, m_pbuffer->strut_height, unused_x);
3901                         fillAndMoveToY( new_y );
3902                         maxWidth = getCurrentLineWidth();
3903                     }
3904                 }
3905 
3906                 bool grabbedExceedingSpace = false;
3907                 if ( x + m_widths[i]-w0 > maxWidth + spaceReduceWidth ) {
3908                     // It's possible the char at i is a space whose width exceeds maxWidth,
3909                     // but it should be a candidate for lastNormalWrap (otherwise, the
3910                     // previous word will be hyphenated and we will get spaces widen for
3911                     // text justification)
3912                     if ( (flags & LCHAR_IS_SPACE) && (flags & LCHAR_ALLOW_WRAP_AFTER) ) // don't break yet
3913                         grabbedExceedingSpace = true;
3914                     else
3915                         break;
3916                 }
3917                 #if (USE_LIBUNIBREAK==1)
3918                 // Note: with libunibreak, we can't assume anymore that LCHAR_ALLOW_WRAP_AFTER is synonym to IS_SPACE.
3919                 if (flags & LCHAR_ALLOW_WRAP_AFTER) {
3920                     if (flags & LCHAR_DEPRECATED_WRAP_AFTER) {
3921                         // Allowed by libunibreak, but prevented by "white-space: nowrap" on
3922                         // this text node parent. Store this opportunity as lastDeprecatedWrap,
3923                         // that we will use only if no lastNormalWrap found.
3924                         lastDeprecatedWrap = i;
3925                     }
3926                     else {
3927                         lastNormalWrap = i;
3928                     }
3929                 }
3930                 #else
3931                 // A space or a CJK ideograph make a normal allowed wrap
3932                 // Note: upstream has added in:
3933                 //   https://github.com/buggins/coolreader/commit/e2a1cf3306b6b083467d77d99dad751dc3aa07d9
3934                 // to the next if:
3935                 //  || lGetCharProps(m_text[i]) == 0
3936                 // but this does not look right, as any other unicode char would allow wrap.
3937                 if ((flags & LCHAR_ALLOW_WRAP_AFTER) || isCJKIdeograph(m_text[i])) {
3938                     // Need to check if previous and next non-space char request a wrap on
3939                     // this space (or CJK char) to be avoided
3940                     bool avoidWrap = false;
3941                     // Look first at following char(s)
3942                     for (int j = i+1; j < m_length; j++) {
3943                         if ( m_flags[j] & LCHAR_IS_OBJECT ) {
3944                             if (m_charindex[j] == FLOAT_CHAR_INDEX) // skip floats
3945                                 continue;
3946                             else // allow wrap between space/CJK and image or inline-box
3947                                 break;
3948                         }
3949                         if ( !(m_flags[j] & LCHAR_ALLOW_WRAP_AFTER) ) { // not another (collapsible) space
3950                             avoidWrap = lGetCharProps(m_text[j]) & CH_PROP_AVOID_WRAP_BEFORE;
3951                             break;
3952                         }
3953                     }
3954                     if (!avoidWrap && i < m_length-1) { // Look at preceding char(s)
3955                         // (but not if it is the last char, where a wrap is fine
3956                         // even if it ends after a CH_PROP_AVOID_WRAP_AFTER char)
3957                         for (int j = i-1; j >= 0; j--) {
3958                             if ( m_flags[j] & LCHAR_IS_OBJECT ) {
3959                                 if (m_charindex[j] == FLOAT_CHAR_INDEX) // skip floats
3960                                     continue;
3961                                 else // allow wrap after a space following an image or inline-box
3962                                     break;
3963                             }
3964                             if ( !(m_flags[j] & LCHAR_ALLOW_WRAP_AFTER) ) { // not another (collapsible) space
3965                                 avoidWrap = lGetCharProps(m_text[j]) & CH_PROP_AVOID_WRAP_AFTER;
3966                                 break;
3967                             }
3968                         }
3969                     }
3970                     if (!avoidWrap)
3971                         lastNormalWrap = i;
3972                     // We could use lastDeprecatedWrap, but it then get too much real chances to be used:
3973                     // else lastDeprecatedWrap = i;
3974                     // Note that a wrap can happen AFTER a '-' (that has CH_PROP_AVOID_WRAP_AFTER)
3975                     // when lastDeprecatedWrap is prefered below.
3976                 }
3977                 else if ( flags & LCHAR_DEPRECATED_WRAP_AFTER ) {
3978                     // Different meaning than when USE_LIBUNIBREAK: it is set
3979                     // by lastFont->measureText() on some hyphens.
3980                     // (To keep this legacy behaviour and not complexify things, we don't
3981                     // ensure "white-space: nowrap" when not using libunibreak.)
3982                     lastDeprecatedWrap = i; // Hyphens make a less priority wrap
3983                 }
3984                 #endif // not USE_LIBUNIBREAK==1
3985                 if ( i==m_length-1 ) // Last char always provides a normal wrap
3986                     lastNormalWrap = i;
3987                 if ( !grabbedExceedingSpace &&
3988                         m_pbuffer->min_space_condensing_percent != 100 &&
3989                         i < m_length-1 &&
3990                         ( m_flags[i] & LCHAR_IS_SPACE ) &&
3991                         !(m_flags[i+1] & LCHAR_IS_SPACE) ) {
3992                     // Each space not followed by a space is candidate for space condensing
3993                     int dw = getMaxCondensedSpaceTruncation(i);
3994                     if ( dw>0 )
3995                         spaceReduceWidth += dw;
3996                     // TODO do that too for CJK punctuation whose glyph might be half blank
3997                 }
3998                 if (grabbedExceedingSpace)
3999                     break; // delayed break
4000             }
4001 
4002             // Glyph at i exceeds available width, or mandatory break. We have
4003             // found a lastNormWrap, and computed spaceReduceWidth.
4004 
4005             // It feels there's no need to do anything if there's been one single float
4006             // that took all the width: we moved i and can wrap.
4007             if (i<=pos)
4008                 i = pos + 1; // allow at least one character to be shown on line
4009             int wordpos = i-1; // Last char which fits: hyphenation does not need to check further
4010 
4011             #if (USE_LIBUNIBREAK==1)
4012                 // If no normal wrap found, and if we have a deprecated wrap (a normal wrap
4013                 // as determined by libunibreak, but prevented by "white-space: nowrap",
4014                 // it's because the line has no wrap opportunity outside nodes with
4015                 // "white-space: nowrap".
4016                 // We need to wrap, and it's best to do so at a regular opportunity rather
4017                 // than at some arbitrary point: do as it there were no "nowrap".
4018                 if ( lastNormalWrap < 0 && lastDeprecatedWrap > 0 ) {
4019                     lastNormalWrap = lastDeprecatedWrap;
4020                 }
4021             #endif
4022             int normalWrapWidth = lastNormalWrap > 0 ? x + m_widths[lastNormalWrap]-w0 : 0;
4023             int unusedSpace = maxWidth - normalWrapWidth;
4024             int unusedPercent = maxWidth > 0 ? unusedSpace * 100 / maxWidth : 0;
4025             #if (USE_LIBUNIBREAK!=1)
4026                 // (Different usage of deprecatedWrap than above)
4027                 int deprecatedWrapWidth = lastDeprecatedWrap > 0 ? x + m_widths[lastDeprecatedWrap]-w0 : 0;
4028                 if ( deprecatedWrapWidth > normalWrapWidth && unusedPercent > 3 ) { // only 3%
4029                     lastNormalWrap = lastDeprecatedWrap;
4030                 }
4031             #endif
4032 
4033             // If, with normal wrapping, more than 5% of the line would not be used,
4034             // try to find a word (from where we stopped back to lastNormalWrap) to
4035             // hyphenate, if hyphenation is not forbidden by CSS.
4036             // todo: decide if we should hyphenate if bidi is happening up to now
4037             if ( lastMandatoryWrap<0 && lastNormalWrap<m_length-1 && unusedPercent > m_pbuffer->unused_space_threshold_percent ) {
4038                 // There may be more than one word between wordpos and lastNormalWrap (or
4039                 // pos, the start of this line): if hyphenation is not possible with
4040                 // the right most one, we have to try the previous words.
4041                 // #define DEBUG_HYPH_EXTRA_LOOPS // Uncomment for debugging loops
4042                 #ifdef DEBUG_HYPH_EXTRA_LOOPS
4043                     int debug_loop_num = 0;
4044                 #endif
4045                 int wordpos_min = lastNormalWrap > pos ? lastNormalWrap : pos;
4046                 while ( wordpos > wordpos_min ) {
4047                     if ( m_srcs[wordpos]->flags & LTEXT_SRC_IS_OBJECT ) {
4048                         wordpos--; // skip images & floats
4049                         continue;
4050                     }
4051                     #ifdef DEBUG_HYPH_EXTRA_LOOPS
4052                         debug_loop_num++;
4053                         if (debug_loop_num > 1)
4054                             printf("hyph loop #%d checking: %s\n", debug_loop_num,
4055                                 LCSTR(lString32(m_text+wordpos_min, i-wordpos_min+1)));
4056                     #endif
4057                     if ( !(m_srcs[wordpos]->flags & LTEXT_HYPHENATE) || (m_srcs[wordpos]->flags & LTEXT_FLAG_NOWRAP) ) {
4058                         // The word at worpos can't be hyphenated, but it might be
4059                         // allowed on some earlier word in another text node.
4060                         // As this is a rare situation (they are mostly all hyphenat'able,
4061                         // or none of them are), and to skip some loops, as the min size
4062                         // of a word to go look for hyphenation is 4, skip by 4 chars.
4063                         wordpos = wordpos - MIN_WORD_LEN_TO_HYPHENATE;
4064                         continue;
4065                     }
4066                     // lStr_findWordBounds() will find the word contained at wordpos
4067                     // (or the previous word if wordpos happens to be a space or some
4068                     // punctuation) by looking only for alpha chars in m_text.
4069                     // Note: it actually does that with the char at wordpos-1 - not sure
4070                     // if we shoud correct it, here or there - or if this is fine - but
4071                     // let's go with it as-is as it might be a safety and might help
4072                     // us not be stuck in some infinite loop here.
4073                     int wstart, wend;
4074                     lStr_findWordBounds( m_text, m_length, wordpos, wstart, wend );
4075                     if ( wend <= lastNormalWrap ) {
4076                         // We passed back lastNormalWrap: no need to look for more
4077                         break;
4078                     }
4079                     int len = wend - wstart;
4080                     if ( len < MIN_WORD_LEN_TO_HYPHENATE ) {
4081                         // Too short word found, skip it
4082                         wordpos = wstart - 1;
4083                         continue;
4084                     }
4085                     if ( wstart >= wordpos ) {
4086                         // Shouldn't happen, but let's be sure we don't get stuck
4087                         wordpos = wordpos - MIN_WORD_LEN_TO_HYPHENATE;
4088                         continue;
4089                     }
4090                     #ifdef DEBUG_HYPH_EXTRA_LOOPS
4091                         if (debug_loop_num > 1)
4092                             printf("  hyphenating: %s\n", LCSTR(lString32(m_text+wstart, len)));
4093                     #endif
4094                     #if TRACE_LINE_SPLITTING==1
4095                         TR("wordBounds(%s) unusedSpace=%d wordWidth=%d",
4096                                 LCSTR(lString32(m_text+wstart, len)), unusedSpace, m_widths[wend]-m_widths[wstart]);
4097                     #endif
4098                     // We have a valid word to look for hyphenation
4099                     if ( len > MAX_WORD_SIZE ) // hyphenate() stops/truncates at 64 chars
4100                         len = MAX_WORD_SIZE;
4101                     // ->hyphenate(), which is used by some other parts of the code,
4102                     // expects a lUInt8 array. We added flagSize=1|2 so it can set the correct
4103                     // flags on our upgraded (from lUInt8 to lUInt16) m_flags.
4104                     lUInt8 * flags = (lUInt8*) (m_flags + wstart);
4105                     // Fill static array with cumulative widths relative to word start
4106                     static lUInt16 widths[MAX_WORD_SIZE];
4107                     int wordStart_w = wstart>0 ? m_widths[wstart-1] : 0;
4108                     for ( int i=0; i<len; i++ ) {
4109                         widths[i] = m_widths[wstart+i] - wordStart_w;
4110                     }
4111                     int max_width = maxWidth + spaceReduceWidth - (x + (wordStart_w - w0));
4112                     // In some rare cases, a word here can be made with parts from multiple text nodes.
4113                     // Use the font of the first text node to compute the hyphen width, which
4114                     // might then be wrong - but that will be smoothed by alignLine().
4115                     // (lStr_findWordBounds() might grab objects or inlineboxes as part of
4116                     // the word, so skip them when looking for a font)
4117                     int _hyphen_width = 0;
4118                     for ( int i=wstart; i<wend; i++ ) {
4119                         if ( !(m_srcs[i]->flags & LTEXT_SRC_IS_OBJECT) ) {
4120                             _hyphen_width = ((LVFont*)m_srcs[i]->t.font)->getHyphenWidth();
4121                             break;
4122                         }
4123                     }
4124                     // Use the hyph method of the source node that contains wordpos
4125                     if ( m_srcs[wordpos]->lang_cfg->getHyphMethod()->hyphenate(m_text+wstart, len, widths, flags, _hyphen_width, max_width, 2) ) {
4126                         // We need to reset the flag for the multiple hyphenation
4127                         // opportunities we will not be using (or they could cause
4128                         // spurious spaces, as a word here may be multiple words
4129                         // in AddLine() if parts from different text nodes).
4130                         for ( int i=0; i<len; i++ ) {
4131                             if ( m_flags[wstart+i] & LCHAR_ALLOW_HYPH_WRAP_AFTER ) {
4132                                 if ( widths[i] + _hyphen_width > max_width ) {
4133                                     TR("hyphen found, but max width reached at char %d", i);
4134                                     m_flags[wstart+i] &= ~LCHAR_ALLOW_HYPH_WRAP_AFTER; // reset flag
4135                                 }
4136                                 else if ( wstart + i > pos+1 ) {
4137                                     if ( lastHyphWrap >= 0 ) { // reset flag on previous candidate
4138                                         m_flags[lastHyphWrap] &= ~LCHAR_ALLOW_HYPH_WRAP_AFTER;
4139                                     }
4140                                     lastHyphWrap = wstart + i;
4141                                     // Keep looking for some other candidates in that word
4142                                 }
4143                                 else if ( wstart + i >= pos ) {
4144                                     m_flags[wstart+i] &= ~LCHAR_ALLOW_HYPH_WRAP_AFTER; // reset flag
4145                                 }
4146                                 // Don't reset those < pos as they are part of previous line
4147                             }
4148                         }
4149                         if ( lastHyphWrap >= 0 ) {
4150                             // Found in this word, no need to look at previous words
4151                             break;
4152                         }
4153                     }
4154                     TR("no hyphen found - max_width=%d", max_width);
4155                     // Look at previous words if any
4156                     wordpos = wstart - 1;
4157                 }
4158             }
4159 
4160             // Decide best position to end this line
4161             int wrapPos = lastHyphWrap;
4162             if ( lastMandatoryWrap>=0 )
4163                 wrapPos = lastMandatoryWrap;
4164             else {
4165                 if ( wrapPos < lastNormalWrap )
4166                     wrapPos = lastNormalWrap;
4167                 if ( wrapPos < 0 ) // no wrap opportunity (e.g. very long non-hyphenable word)
4168                     wrapPos = i-1;
4169                 #if (USE_LIBUNIBREAK!=1)
4170                 if ( wrapPos <= upSkipPos ) {
4171                     // Ensure that what, when dealing with previous line, we pushed to
4172                     // next line (below) is actually on this new line.
4173                     //CRLog::trace("guard old wrapPos at %d", wrapPos);
4174                     wrapPos = upSkipPos+1;
4175                     //CRLog::trace("guard new wrapPos at %d", wrapPos);
4176                     upSkipPos = -1;
4177                 }
4178                 #endif
4179             }
4180             // End (not included) of current line
4181             int endp = wrapPos + (lastMandatoryWrap<0 ? 1 : 0);
4182 
4183             // Specific handling of CJK punctuation that should not happen at start or
4184             // end of line. When using libunibreak, we trust it to handle them correctly.
4185             #if (USE_LIBUNIBREAK!=1)
4186             // The following looks left (up) and right (down) if there are any chars/punctuation
4187             // that should be prevented from being at the end of line or start of line, and if
4188             // yes adjust wrapPos so they are pushed to next line, or brought to this line.
4189             // It might be a bit of a duplication of what's done above (for latin punctuations)
4190             // in the avoidWrap section.
4191             int downSkipCount = 0;
4192             int upSkipCount = 0;
4193             if (endp > 1 && isCJKLeftPunctuation(*(m_text + endp))) {
4194                 // Next char will be fine at the start of next line.
4195                 //CRLog::trace("skip skip punctuation %s, at index %d", LCSTR(lString32(m_text+endp, 1)), endp);
4196             } else if (endp > 1 && endp < m_length - 1 && isCJKLeftPunctuation(*(m_text + endp - 1))) {
4197                 // Most right char is left punctuation: go back 1 char so this one
4198                 // goes onto next line.
4199                 upSkipPos = endp;
4200                 endp--; wrapPos--;
4201                 //CRLog::trace("up skip left punctuation %s, at index %d", LCSTR(lString32(m_text+endp, 1)), endp);
4202             } else if (endp > 1 && isCJKPunctuation(*(m_text + endp))) {
4203                 // Next char (start of next line) is some right punctuation that
4204                 // is not allowed at start of line.
4205                 // Look if it's better to wrap before (up) or after (down), and how
4206                 // much up or down we find an adequate wrap position, and decide
4207                 // which to use.
4208                 for (int epos = endp; epos<m_length; epos++, downSkipCount++) {
4209                    if ( !isCJKPunctuation(*(m_text + epos)) ) break;
4210                    //CRLog::trace("down skip punctuation %s, at index %d", LCSTR(lString32(m_text + epos, 1)), epos);
4211                 }
4212                 for (int epos = endp; epos>=start; epos--, upSkipCount++) {
4213                    if ( !isCJKPunctuation(*(m_text + epos)) ) break;
4214                    //CRLog::trace("up skip punctuation %s, at index %d", LCSTR(lString32(m_text + epos, 1)), epos);
4215                 }
4216                 if (downSkipCount <= upSkipCount && downSkipCount <= 2 && false ) {
4217                             // last check was "&& m_hanging_punctuation", but we
4218                             // have to skip that in this old code after the hanging
4219                             // punctuation handling changes
4220                    // Less skips if we bring next char on this line, and hanging
4221                    // punctuation is enabled so this punctuation will naturally
4222                    // find it's place in the reserved right area.
4223                    endp += downSkipCount;
4224                    wrapPos += downSkipCount;
4225                    //CRLog::trace("finally down skip punctuations %d", downSkipCount);
4226                 } else if (upSkipCount <= 2) {
4227                    // Otherwise put it on next line (spaces or inter-ideograph spaces
4228                    // will be expanded for justification).
4229                    upSkipPos = endp;
4230                    endp -= upSkipCount;
4231                    wrapPos -= upSkipCount;
4232                    //CRLog::trace("finally up skip punctuations %d", upSkipCount);
4233                 }
4234             }
4235             #endif
4236             if (endp > m_length)
4237                 endp = m_length;
4238 
4239             // Best position to end this line found.
4240             bool hasInlineBoxes = firstInlineBoxPos >= 0 && firstInlineBoxPos < endp;
4241             addLine(pos, endp, x, para, pos==0, wrapPos>=m_length-1, preFormattedOnly, isLastPara, hasInlineBoxes);
4242             pos = wrapPos + 1; // start of next line
4243 
4244             #if (USE_LIBUNIBREAK==1)
4245             // (Only when using libunibreak, which we trust decisions to wrap on hyphens.)
4246             if ( m_srcs[wrapPos]->lang_cfg->duplicateRealHyphenOnNextLine() && pos > 0 && pos < m_length-1 ) {
4247                 if ( m_text[wrapPos] == '-' || m_text[wrapPos] == UNICODE_HYPHEN ) {
4248                     pos--; // Have that last hyphen also at the start of next line
4249                            // (small caveat: the duplicated hyphen at start of next
4250                            // line won't be part of the highlighted text)
4251                     // And forbid a break after this duplicated hyphen (this avoids
4252                     // a possible infinite loop and out of memory when no allowed
4253                     // wrap is found on next line, as we would continuously AddLine()
4254                     // lines with only this hyphen)
4255                     m_flags[pos] &= ~LCHAR_ALLOW_WRAP_AFTER;
4256                 }
4257             }
4258             #endif
4259         }
4260     }
4261 
processEmbeddedBlock(int idx)4262     void processEmbeddedBlock( int idx )
4263     {
4264         ldomNode * node = (ldomNode *) m_pbuffer->srctext[idx].object;
4265         // Use current width available at current y position for the whole block
4266         // (Firefox would lay out this block content around the floats met along
4267         // the way, but it would be quite tedious to do the same... so, we don't).
4268         int width = getCurrentLineWidth();
4269         int block_x = getCurrentLineX();
4270         int cur_y = m_y;
4271 
4272         bool already_rendered = false;
4273         { // in its own scope, so this RenderRectAccessor is forgotten when left
4274             RenderRectAccessor fmt( node );
4275             if ( RENDER_RECT_HAS_FLAG(fmt, BOX_IS_RENDERED) ) {
4276                 already_rendered = true;
4277             }
4278         }
4279         // On the first rendering (after type settings changes), we want to forward
4280         // this block individual lines to the main page splitting context.
4281         // But on later calls (once already_rendered), used for drawing or text
4282         // selection, we want to have a single line with the inlineBox.
4283         // We'll mark the first rendering with is_reusable=false, so that we go
4284         // reformatting this final node when we need to draw it.
4285         // (We could mix the individual lines with the main inlineBox line, but
4286         // that would need added code at various places to ignore one or the
4287         // others depending on what's needed there.)
4288         if ( !already_rendered ) {
4289             LVRendPageContext context( NULL, m_pbuffer->page_height );
4290             // We don't know if the upper LVRendPageContext wants lines or not,
4291             // so assume it does (the main flow does).
4292             int rend_flags = node->getDocument()->getRenderBlockRenderingFlags();
4293             // We want to avoid negative margins (if allowed in global flags) and
4294             // going back the flow y, as the transfered lines would not reflect
4295             // that, and we could get some small mismatches and glitches.
4296             rend_flags &= ~BLOCK_RENDERING_ALLOW_NEGATIVE_COLLAPSED_MARGINS;
4297             int baseline = REQ_BASELINE_FOR_TABLE; // baseline of block is baseline of its first line
4298             // The same usable overflows provided for the container (possibly
4299             // adjusted for floats) can be used for this full-width inlineBox.
4300             int usable_left_overflow;
4301             int usable_right_overflow;
4302             getCurrentLineUsableOverflows(usable_left_overflow, usable_right_overflow);
4303             renderBlockElement( context, node, 0, 0, width, usable_left_overflow, usable_right_overflow,
4304                                 m_specified_para_dir, &baseline, rend_flags);
4305             RenderRectAccessor fmt( node );
4306             fmt.setX(block_x);
4307             fmt.setY(m_y);
4308             fmt.setBaseline(baseline);
4309             RENDER_RECT_SET_FLAG(fmt, BOX_IS_RENDERED);
4310             // Transfer individual lines from this sub-context into real frmlines (they
4311             // will be transferred to the upper context by renderBlockElementEnhanced())
4312             if ( context.getLines() ) {
4313                 LVPtrVector<LVRendLineInfo> * lines = context.getLines();
4314                 for ( int i=0; i < lines->length(); i++ ) {
4315                     LVRendLineInfo * line = lines->get(i);
4316                     formatted_line_t * frmline = lvtextAddFormattedLine( m_pbuffer );
4317                     frmline->x = block_x;
4318                     frmline->y = cur_y + line->getStart();
4319                     frmline->height = line->getHeight();
4320                     frmline->flags = line->getFlags();
4321                     if (m_has_ongoing_float)
4322                         frmline->flags |= LTEXT_LINE_SPLIT_AVOID_BEFORE;
4323                     // Unfortunaltely, we can't easily forward footnotes links
4324                     // gathered by this sub-context via frmlines.
4325                     // printf("emb line %d>%d\n", frmline->y, frmline->height);
4326                     m_y += frmline->height;
4327                     // We only check for already positioned floats to ensure
4328                     // no page break along them. We'll positioned yet-to-be
4329                     // positioned floats only when done with this embedded block.
4330                     checkOngoingFloat();
4331                 }
4332             }
4333             // Next time we have to use this LFormattedText for drawing, have it
4334             // trashed: we'll re-format it by going into the following 'else'.
4335             m_pbuffer->is_reusable = false;
4336         }
4337         else {
4338             RenderRectAccessor fmt( node );
4339             int height = fmt.getHeight();
4340             formatted_line_t * frmline = lvtextAddFormattedLine( m_pbuffer );
4341             frmline->x = block_x;
4342             frmline->width = width; // single word width
4343             frmline->y = cur_y;
4344             frmline->height = height;
4345             frmline->flags = 0; // no flags needed once page split has been done
4346             // printf("final line %d>%d\n", frmline->y, frmline->height);
4347             // This line has a single word: the inlineBox.
4348             formatted_word_t * word = lvtextAddFormattedWord(frmline);
4349             word->src_text_index = idx;
4350             word->flags = LTEXT_WORD_IS_INLINE_BOX;
4351             word->x = 0;
4352             word->width = width;
4353             m_y = cur_y + height;
4354             m_pbuffer->height = m_y;
4355         }
4356         // Not tested how this would work with floats...
4357         checkOngoingFloat();
4358         positionDelayedFloats();
4359     }
4360 
4361     /// split source data into paragraphs
splitParagraphs()4362     void splitParagraphs()
4363     {
4364         int start = 0;
4365         int i;
4366 
4367         int srctextlen = m_pbuffer->srctextlen;
4368         int clear_after_last_flag = 0;
4369         if ( srctextlen>0 && (m_pbuffer->srctext[srctextlen-1].flags & LTEXT_SRC_IS_CLEAR_LAST) ) {
4370             // Ignorable source line added to carry a last <br clear=>.
4371             clear_after_last_flag = m_pbuffer->srctext[srctextlen-1].flags & LTEXT_SRC_IS_CLEAR_BOTH;
4372             srctextlen -= 1; // Don't process this last srctext
4373         }
4374 
4375         for ( i=1; i<=srctextlen; i++ ) {
4376             // Split on LTEXT_FLAG_NEWLINE, mostly set when <BR/> met
4377             // (we check m_pbuffer->srctext[i], the next srctext that we are not
4378             // adding to the current paragraph, as <BR> and its clear= are carried
4379             // by the following text.)
4380             bool isLastPara = (i == srctextlen);
4381             if ( isLastPara || (m_pbuffer->srctext[i].flags & LTEXT_FLAG_NEWLINE) ) {
4382                 if ( m_pbuffer->srctext[start].flags & LTEXT_SRC_IS_CLEAR_BOTH ) {
4383                     // (LTEXT_SRC_IS_CLEAR_BOTH is a mask, will match _LEFT and _RIGHT too)
4384                     floatClearText( m_pbuffer->srctext[start].flags & LTEXT_SRC_IS_CLEAR_BOTH );
4385                 }
4386                 // We do not need to go thru processParagraph() to handle an embedded block
4387                 // (bogus block element children of an inline element): we have a dedicated
4388                 // handler for it.
4389                 bool isEmbeddedBlock = false;
4390                 if ( i == start + 1 ) {
4391                     // Embedded block among inlines had been surrounded by LTEXT_FLAG_NEWLINE,
4392                     // so we'll get one standalone here.
4393                     if ( m_pbuffer->srctext[start].flags & LTEXT_SRC_IS_INLINE_BOX ) {
4394                         // We used LTEXT_SRC_IS_INLINE_BOX for embedded blocks too (to not
4395                         // waste a bit in the lUInt32 for LTEXT_SRC_IS_EMBEDDED_BLOCK that
4396                         // we would only be using here), so do this check to see if it
4397                         // really is an embedded block.
4398                         ldomNode * node = (ldomNode *) m_pbuffer->srctext[start].object;
4399                         if ( node->isEmbeddedBlockBoxingInlineBox() ) {
4400                             isEmbeddedBlock = true;
4401                         }
4402                     }
4403                 }
4404                 if ( isEmbeddedBlock )
4405                     processEmbeddedBlock( start );
4406                 else
4407                     processParagraph( start, i, isLastPara );
4408                 start = i;
4409             }
4410         }
4411         if ( !m_no_clear_own_floats ) {
4412             // Clear our own floats so they are fully contained in this final block.
4413             finalizeFloats();
4414         }
4415         if ( clear_after_last_flag ) {
4416             floatClearText( clear_after_last_flag );
4417         }
4418     }
4419 
dealloc()4420     void dealloc()
4421     {
4422         if ( !m_staticBufs ) {
4423             free( m_text );
4424             free( m_flags );
4425             free( m_srcs );
4426             free( m_charindex );
4427             free( m_widths );
4428             m_text = NULL;
4429             m_flags = NULL;
4430             m_srcs = NULL;
4431             m_charindex = NULL;
4432             m_widths = NULL;
4433             #if (USE_FRIBIDI==1)
4434                 free( m_bidi_ctypes );
4435                 free( m_bidi_btypes );
4436                 free( m_bidi_levels );
4437                 m_bidi_ctypes = NULL;
4438                 m_bidi_btypes = NULL;
4439                 m_bidi_levels = NULL;
4440             #endif
4441             m_staticBufs = true;
4442             // printf("freeing dynamic buffers\n");
4443         }
4444         else {
4445             m_staticBufs_inUse = false;
4446             // printf("releasing static buffers\n");
4447         }
4448     }
4449 
4450     /// format source data
format()4451     int format()
4452     {
4453         // split and process all paragraphs
4454         splitParagraphs();
4455         // cleanup
4456         dealloc();
4457         TR("format() finished: h=%d  lines=%d", m_y, m_pbuffer->frmlinecount);
4458         return m_y;
4459     }
4460 };
4461 
4462 bool LVFormatter::m_staticBufs_inUse = false;
4463 #if (USE_LIBUNIBREAK==1)
4464 bool LVFormatter::m_libunibreak_init_done = false;
4465 #endif
4466 
freeFrmLines(formatted_text_fragment_t * m_pbuffer)4467 static void freeFrmLines( formatted_text_fragment_t * m_pbuffer )
4468 {
4469     // clear existing formatted data, if any
4470     if (m_pbuffer->frmlines)
4471     {
4472         for (int i=0; i<m_pbuffer->frmlinecount; i++)
4473         {
4474             lvtextFreeFormattedLine( m_pbuffer->frmlines[i] );
4475         }
4476         free( m_pbuffer->frmlines );
4477     }
4478     m_pbuffer->frmlines = NULL;
4479     m_pbuffer->frmlinecount = 0;
4480 
4481     // Also clear floats
4482     if (m_pbuffer->floats)
4483     {
4484         for (int i=0; i<m_pbuffer->floatcount; i++)
4485         {
4486             if (m_pbuffer->floats[i]->links) {
4487                 delete m_pbuffer->floats[i]->links;
4488             }
4489             free( m_pbuffer->floats[i] );
4490         }
4491         free( m_pbuffer->floats );
4492     }
4493     m_pbuffer->floats = NULL;
4494     m_pbuffer->floatcount = 0;
4495 }
4496 
4497 // experimental formatter
Format(lUInt16 width,lUInt16 page_height,int para_direction,int usable_left_overflow,int usable_right_overflow,bool hanging_punctuation,BlockFloatFootprint * float_footprint)4498 lUInt32 LFormattedText::Format(lUInt16 width, lUInt16 page_height, int para_direction,
4499                 int usable_left_overflow, int usable_right_overflow, bool hanging_punctuation,
4500                 BlockFloatFootprint * float_footprint)
4501 {
4502     // clear existing formatted data, if any
4503     freeFrmLines( m_pbuffer );
4504     // setup new page size
4505     m_pbuffer->width = width;
4506     m_pbuffer->height = 0;
4507     m_pbuffer->page_height = page_height;
4508     m_pbuffer->is_reusable = !m_pbuffer->light_formatting;
4509     // format text
4510     LVFormatter formatter( m_pbuffer );
4511 
4512     // Set (as properties of the whole final block) the text-indent computed
4513     // values for the first line and for the next lines, by taking it
4514     // from the first src_text_fragment_t added (see comment in lvrend.cpp
4515     // renderFinalBlock() why we do it that way - while it might be better
4516     // if it were provided as a parameter to LFormattedText::Format()).
4517     int indent = m_pbuffer->srctextlen > 0 ? m_pbuffer->srctext[0].indent : 0;
4518     formatter.m_indent_first_line_done = false;
4519     if ( indent >= 0 ) { // positive indent affects only first line
4520         formatter.m_indent_current = indent;
4521         formatter.m_indent_after_first_line = 0;
4522     }
4523     else { // negative indent affects all but first lines
4524         formatter.m_indent_current = 0;
4525         formatter.m_indent_after_first_line = -indent;
4526     }
4527 
4528     // Set specified para direction (can be REND_DIRECTION_UNSET, in which case
4529     // it will be detected by fribidi)
4530     formatter.m_specified_para_dir = para_direction;
4531 
4532     formatter.m_usable_left_overflow = usable_left_overflow;
4533     formatter.m_usable_right_overflow = usable_right_overflow;
4534     formatter.m_hanging_punctuation = hanging_punctuation;
4535 
4536     if (float_footprint) {
4537         formatter.m_no_clear_own_floats = float_footprint->no_clear_own_floats;
4538 
4539         // BlockFloatFootprint provides a set of floats to represent
4540         // outer floats possibly having some footprint over the final
4541         // block that is to be formatted.
4542         // See FlowState->getFloatFootprint() for details.
4543         // So, for each of them, just add an embedded_float_t (without
4544         // a scrtext as they are not ours) to the buffer so our
4545         // positioning code can handle them.
4546         for (int i=0; i<float_footprint->floats_cnt; i++) {
4547             embedded_float_t * flt =  lvtextAddEmbeddedFloat( m_pbuffer );
4548             flt->srctext = NULL; // not our own float
4549             flt->x = float_footprint->floats[i][0];
4550             flt->y = float_footprint->floats[i][1];
4551             flt->width = float_footprint->floats[i][2];
4552             flt->height = float_footprint->floats[i][3];
4553             flt->is_right = (bool)(float_footprint->floats[i][4]);
4554             flt->inward_margin = float_footprint->floats[i][5];
4555         }
4556     }
4557 
4558     lUInt32 h = formatter.format();
4559 
4560     if ( float_footprint && float_footprint->no_clear_own_floats ) {
4561         // If we did not finalize/clear our embedded floats, forward
4562         // them to FlowState so it can ensure layout around them of
4563         // other block or final nodes.
4564         for (int i=0; i<m_pbuffer->floatcount; i++) {
4565             embedded_float_t * flt = m_pbuffer->floats[i];
4566             if (flt->srctext == NULL) // ignore outer floats given to us by flow
4567                 continue;
4568             float_footprint->forwardOverflowingFloat(flt->x, flt->y, flt->width, flt->height,
4569                                         flt->is_right, (ldomNode *)flt->srctext->object);
4570         }
4571     }
4572 
4573     return h;
4574 }
4575 
setImageScalingOptions(img_scaling_options_t * options)4576 void LFormattedText::setImageScalingOptions( img_scaling_options_t * options )
4577 {
4578     m_pbuffer->img_zoom_in_mode_block = options->zoom_in_block.mode;
4579     m_pbuffer->img_zoom_in_scale_block = options->zoom_in_block.max_scale;
4580     m_pbuffer->img_zoom_in_mode_inline = options->zoom_in_inline.mode;
4581     m_pbuffer->img_zoom_in_scale_inline = options->zoom_in_inline.max_scale;
4582     m_pbuffer->img_zoom_out_mode_block = options->zoom_out_block.mode;
4583     m_pbuffer->img_zoom_out_scale_block = options->zoom_out_block.max_scale;
4584     m_pbuffer->img_zoom_out_mode_inline = options->zoom_out_inline.mode;
4585     m_pbuffer->img_zoom_out_scale_inline = options->zoom_out_inline.max_scale;
4586 }
4587 
setSpaceWidthScalePercent(int spaceWidthScalePercent)4588 void LFormattedText::setSpaceWidthScalePercent(int spaceWidthScalePercent)
4589 {
4590     if (spaceWidthScalePercent>=10 && spaceWidthScalePercent<=500)
4591         m_pbuffer->space_width_scale_percent = spaceWidthScalePercent;
4592 }
4593 
setMinSpaceCondensingPercent(int minSpaceCondensingPercent)4594 void LFormattedText::setMinSpaceCondensingPercent(int minSpaceCondensingPercent)
4595 {
4596     if (minSpaceCondensingPercent>=25 && minSpaceCondensingPercent<=100)
4597         m_pbuffer->min_space_condensing_percent = minSpaceCondensingPercent;
4598 }
4599 
setUnusedSpaceThresholdPercent(int unusedSpaceThresholdPercent)4600 void LFormattedText::setUnusedSpaceThresholdPercent(int unusedSpaceThresholdPercent)
4601 {
4602     if (unusedSpaceThresholdPercent>=0 && unusedSpaceThresholdPercent<=20)
4603         m_pbuffer->unused_space_threshold_percent = unusedSpaceThresholdPercent;
4604 }
4605 
setMaxAddedLetterSpacingPercent(int maxAddedLetterSpacingPercent)4606 void LFormattedText::setMaxAddedLetterSpacingPercent(int maxAddedLetterSpacingPercent)
4607 {
4608     if (maxAddedLetterSpacingPercent>=0 && maxAddedLetterSpacingPercent<=20)
4609         m_pbuffer->max_added_letter_spacing_percent = maxAddedLetterSpacingPercent;
4610 }
4611 
4612 /// set colors for selection and bookmarks
setHighlightOptions(text_highlight_options_t * v)4613 void LFormattedText::setHighlightOptions(text_highlight_options_t * v)
4614 {
4615     m_pbuffer->highlight_options.selectionColor = v->selectionColor;
4616     m_pbuffer->highlight_options.commentColor = v->commentColor;
4617     m_pbuffer->highlight_options.correctionColor = v->correctionColor;
4618     m_pbuffer->highlight_options.bookmarkHighlightMode = v->bookmarkHighlightMode;
4619 }
4620 
4621 
DrawBookmarkTextUnderline(LVDrawBuf & drawbuf,int x0,int y0,int x1,int y1,int y,int flags,text_highlight_options_t * options)4622 void DrawBookmarkTextUnderline(LVDrawBuf & drawbuf, int x0, int y0, int x1, int y1, int y, int flags, text_highlight_options_t * options) {
4623     if (!(flags & (4 | 8)))
4624         return;
4625     if (options->bookmarkHighlightMode == highlight_mode_none)
4626         return;
4627     bool isGray = drawbuf.GetBitsPerPixel() <= 8;
4628     lUInt32 cl = 0x000000;
4629     if (isGray) {
4630         if (options->bookmarkHighlightMode == highlight_mode_solid)
4631             cl = (flags & 4) ? 0xCCCCCC : 0xAAAAAA;
4632     } else {
4633         cl = (flags & 4) ? options->commentColor : options->correctionColor;
4634     }
4635 
4636     if (options->bookmarkHighlightMode == highlight_mode_solid) {
4637         // solid fill
4638         lUInt32 cl2 = (cl & 0xFFFFFF) | 0xA0000000;
4639         drawbuf.FillRect(x0, y0, x1, y1, cl2);
4640     }
4641 
4642     if (options->bookmarkHighlightMode == highlight_mode_underline) {
4643         // underline
4644         cl = (cl & 0xFFFFFF);
4645         lUInt32 cl2 = cl | 0x80000000;
4646         int step = 4;
4647         int index = 0;
4648         for (int x = x0; x < x1; x += step ) {
4649 
4650             int x2 = x + step;
4651             if (x2 > x1)
4652                 x2 = x1;
4653             if (flags & 8) {
4654                 // correction
4655                 int yy = (index & 1) ? y - 1 : y;
4656                 drawbuf.FillRect(x, yy-1, x+1, yy, cl2);
4657                 drawbuf.FillRect(x+1, yy-1, x2-1, yy, cl);
4658                 drawbuf.FillRect(x2-1, yy-1, x2, yy, cl2);
4659             } else if (flags & 4) {
4660                 if (index & 1)
4661                     drawbuf.FillRect(x, y-1, x2 + 1, y, cl);
4662             }
4663             index++;
4664         }
4665     }
4666 }
4667 
getAbsMarksFromMarks(ldomMarkedRangeList * marks,ldomMarkedRangeList * absmarks,ldomNode * node)4668 static void getAbsMarksFromMarks(ldomMarkedRangeList * marks, ldomMarkedRangeList * absmarks, ldomNode * node) {
4669     // Provided ldomMarkedRangeList * marks are ranges made from the words
4670     // of a selection currently being made (native highlights by crengine).
4671     // Their coordinates have been translated from absolute to relative
4672     // to the final node, by the DrawDocument() that called
4673     // LFormattedText::Draw() for this final node.
4674     // In LFormattedText::Draw(), when we need to call DrawDocument() to
4675     // draw floats or inlineBoxes, we need to translate them back to
4676     // absolute coordinates (DrawDocument() will translate them again
4677     // to relative coordinates in the drawn float or inlineBox).
4678     // (They are matched in LFormattedText::Draw() against the lineRect,
4679     // which have coordinates in the context of where we are drawing.)
4680     // The 'node' provided to this function must be a floatBox or inlineBox:
4681     // its parent is either the final node that contains them, or some
4682     // inline node contained in it.
4683 
4684     // We need to know the current final node that contains the provided
4685     // node, and its absolute coordinates
4686     ldomNode * final_node = node->getParentNode();
4687     for ( ; final_node; final_node = final_node->getParentNode() ) {
4688         int rm = final_node->getRendMethod();
4689         if ( rm == erm_final )
4690             break;
4691     }
4692     lvRect final_node_rect = lvRect();
4693     if ( final_node )
4694         final_node->getAbsRect( final_node_rect, true );
4695 
4696     // Fill the second provided ldomMarkedRangeList with marks in absolute
4697     // coordinates.
4698     for ( int i=0; i<marks->length(); i++ ) {
4699         ldomMarkedRange * mark = marks->get(i);
4700         ldomMarkedRange * newmark = new ldomMarkedRange( *mark );
4701         newmark->start.y += final_node_rect.top;
4702         newmark->end.y += final_node_rect.top;
4703         newmark->start.x += final_node_rect.left;
4704         newmark->end.x += final_node_rect.left;
4705             // (Note: early when developping this, NOT updating x gave the
4706             // expected results, although logically it should be updated...
4707             // But now, it seems to work, and is needed to correctly shift
4708             // highlight marks in inlineBox by the containing final block's
4709             // left margin...)
4710         absmarks->add(newmark);
4711     }
4712 }
4713 
Draw(LVDrawBuf * buf,int x,int y,ldomMarkedRangeList * marks,ldomMarkedRangeList * bookmarks)4714 void LFormattedText::Draw( LVDrawBuf * buf, int x, int y, ldomMarkedRangeList * marks, ldomMarkedRangeList *bookmarks )
4715 {
4716     int i, j;
4717     formatted_line_t * frmline;
4718     src_text_fragment_t * srcline;
4719     formatted_word_t * word;
4720     LVFont * font;
4721     lvRect clip;
4722     buf->GetClipRect( &clip );
4723     const lChar32 * str;
4724     int line_y = y;
4725 
4726     // We might need to translate "marks" (native highlights) from relative
4727     // coordinates to absolute coordinates if we have to draw floats or
4728     // inlineBoxes: we'll do that when dealing with the first of these if any.
4729     ldomMarkedRangeList * absmarks = new ldomMarkedRangeList();
4730     bool absmarks_update_needed = marks!=NULL && marks->length()>0;
4731 
4732     // printf("x/y: %d/%d clip.top/bottom: %d %d\n", x, y, clip.top, clip.bottom);
4733     // When drawing a paragraph that spans 3 pages, we may get:
4734     //   x/y: 9/407 clip.top/bottom: 13 559
4735     //   x/y: 9/-139 clip.top/bottom: 13 583
4736     //   x/y: 9/-709 clip.top/bottom: 13 545
4737 
4738     for (i=0; i<m_pbuffer->frmlinecount; i++)
4739     {
4740         if (line_y >= clip.bottom)
4741             break;
4742         frmline = m_pbuffer->frmlines[i];
4743         if (line_y + frmline->height > clip.top)
4744         {
4745             // process background
4746 
4747             //lUInt32 bgcl = buf->GetBackgroundColor();
4748             //buf->FillRect( x+frmline->x, y + frmline->y, x+frmline->x + frmline->width, y + frmline->y + frmline->height, bgcl );
4749 
4750             // draw background for each word
4751             // (if multiple consecutive words share the same bgcolor, this will
4752             // actually fill a single rect encompassing these words)
4753             // todo: the way background color (not inherited in lvrend.cpp) is
4754             // handled here (only looking at the style of the inline node
4755             // that contains the word, and not at its other inline parents),
4756             // some words may not get their proper bgcolor
4757             lUInt32 lastWordColor = 0xFFFFFFFF;
4758             int lastWordStart = -1;
4759             int lastWordEnd = -1;
4760             for (j=0; j<frmline->word_count; j++)
4761             {
4762                 word = &frmline->words[j];
4763                 srcline = &m_pbuffer->srctext[word->src_text_index];
4764                 if (word->flags & LTEXT_WORD_IS_OBJECT)
4765                 {
4766                     // no background, TODO
4767                 }
4768                 else if (word->flags & LTEXT_WORD_IS_INLINE_BOX)
4769                 {
4770                     // background if any will be drawn when drawing the box below
4771                 }
4772                 else
4773                 {
4774                     lUInt32 bgcl = srcline->bgcolor;
4775                     if ( lastWordColor!=bgcl || lastWordStart==-1 ) {
4776                         if ( lastWordStart!=-1 )
4777                             if ( ((lastWordColor>>24) & 0xFF) < 128 )
4778                                 buf->FillRect( lastWordStart, y + frmline->y, lastWordEnd, y + frmline->y + frmline->height, lastWordColor );
4779                         lastWordColor=bgcl;
4780                         lastWordStart = x+frmline->x+word->x;
4781                     }
4782                     lastWordEnd = x+frmline->x+word->x+word->width;
4783                 }
4784             }
4785             if ( lastWordStart!=-1 )
4786                 if ( ((lastWordColor>>24) & 0xFF) < 128 )
4787                     buf->FillRect( lastWordStart, y + frmline->y, lastWordEnd, y + frmline->y + frmline->height, lastWordColor );
4788 
4789             // process marks
4790 #ifndef CR_USE_INVERT_FOR_SELECTION_MARKS
4791             if ( marks!=NULL && marks->length()>0 ) {
4792                 // Here is drawn the "native highlighting" of a selection in progress
4793                 lvRect lineRect( frmline->x, frmline->y, frmline->x + frmline->width, frmline->y + frmline->height );
4794                 for ( int i=0; i<marks->length(); i++ ) {
4795                     lvRect mark;
4796                     ldomMarkedRange * range = marks->get(i);
4797                     // printf("marks #%d %d %d > %d %d\n", i, range->start.x, range->start.y, range->end.x, range->end.y);
4798                     if ( range->intersects( lineRect, mark ) ) {
4799                         //
4800                         buf->FillRect(mark.left + x, mark.top + y, mark.right + x, mark.bottom + y, m_pbuffer->highlight_options.selectionColor);
4801                     }
4802                 }
4803             }
4804             if (bookmarks!=NULL && bookmarks->length()>0) {
4805                 lvRect lineRect( frmline->x, frmline->y, frmline->x + frmline->width, frmline->y + frmline->height );
4806                 for ( int i=0; i<bookmarks->length(); i++ ) {
4807                     lvRect mark;
4808                     ldomMarkedRange * range = bookmarks->get(i);
4809                     if ( range->intersects( lineRect, mark ) ) {
4810                         //
4811                         DrawBookmarkTextUnderline(*buf, mark.left + x, mark.top + y, mark.right + x, mark.bottom + y, mark.bottom + y - 2, range->flags,
4812                                                   &m_pbuffer->highlight_options);
4813                     }
4814                 }
4815             }
4816 #endif
4817 #ifdef CR_USE_INVERT_FOR_SELECTION_MARKS
4818             // process bookmarks
4819             if ( bookmarks != NULL && bookmarks->length() > 0 ) {
4820                 lvRect lineRect( frmline->x, frmline->y, frmline->x + frmline->width, frmline->y + frmline->height );
4821                 for ( int i=0; i<bookmarks->length(); i++ ) {
4822                     lvRect bookmark_rc;
4823                     ldomMarkedRange * range = bookmarks->get(i);
4824                     if ( range->intersects( lineRect, bookmark_rc ) ) {
4825                         buf->FillRect( bookmark_rc.left + x, bookmark_rc.top + y, bookmark_rc.right + x, bookmark_rc.bottom + y, 0xAAAAAA );
4826                     }
4827                 }
4828             }
4829 #endif
4830 
4831             int text_decoration_back_gap;
4832             lUInt16 lastWordSrcIndex;
4833             for (j=0; j<frmline->word_count; j++)
4834             {
4835                 word = &frmline->words[j];
4836                 if (word->flags & LTEXT_WORD_IS_OBJECT)
4837                 {
4838                     srcline = &m_pbuffer->srctext[word->src_text_index];
4839                     ldomNode * node = (ldomNode *) srcline->object;
4840                     if (node) {
4841                         LVImageSourceRef img = node->getObjectImageSource();
4842                         if ( img.isNull() )
4843                             img = LVCreateDummyImageSource( node, word->width, word->o.height );
4844                         int xx = x + frmline->x + word->x;
4845                         int yy = line_y + frmline->baseline - word->o.height + word->y;
4846                         buf->Draw( img, xx, yy, word->width, word->o.height );
4847                         //buf->FillRect( xx, yy, xx+word->width, yy+word->height, 1 );
4848                     }
4849                 }
4850                 else if (word->flags & LTEXT_WORD_IS_INLINE_BOX)
4851                 {
4852                     srcline = &m_pbuffer->srctext[word->src_text_index];
4853                     ldomNode * node = (ldomNode *) srcline->object;
4854                     // Logically, the coordinates of the top left of the box are:
4855                     // int x0 = x + frmline->x + word->x;
4856                     // int y0 = line_y + frmline->baseline - word->o.baseline + word->y;
4857                     // But we have updated the node's RenderRectAccesor x/y in alignLine(),
4858                     // ahd DrawDocument() will by default fetch them to shift the block
4859                     // it has to draw. So, we can use the provided x/y as-is, with
4860                     // the offsets from the RenderRectAccesor.
4861                     RenderRectAccessor fmt( node );
4862                     int x0 = x + fmt.getX();
4863                     int y0 = y + fmt.getY();
4864                     int doc_x = 0 - fmt.getX();
4865                     int doc_y = 0 - fmt.getY();
4866                     int dx = m_pbuffer->width;
4867                     int dy = frmline->height; // can be > m_pbuffer->page_height
4868                             // A frmline can be bigger than page_height, if
4869                             // this inlineBox contains many long paragraphs
4870                     int page_height = m_pbuffer->page_height;
4871                     if ( absmarks_update_needed ) {
4872                         getAbsMarksFromMarks(marks, absmarks, node);
4873                         absmarks_update_needed = false;
4874                     }
4875                     if ( node->isEmbeddedBlockBoxingInlineBox() ) {
4876                         // With embedded blocks, we shouldn't drop the clip (as we do next
4877                         // for regular inline-block boxes)
4878                         DrawDocument( *buf, node, x0, y0, dx, dy, doc_x, doc_y, page_height, absmarks, bookmarks );
4879                     }
4880                     else {
4881                         // inline-block boxes with negative margins can overflow the
4882                         // line height, and so possibly the page when that line is
4883                         // at top or bottom of page.
4884                         // When witnessed, that overflow was very small, and probably
4885                         // aimed at vertically aligning the box vs the text, but enough
4886                         // to have their glyphs truncated when clipped to the page rect.
4887                         // So, to avoid that, we just drop that clip when drawing the
4888                         // box, and restore it when done.
4889                         lvRect curclip;
4890                         buf->GetClipRect( &curclip ); // backup clip
4891                         buf->SetClipRect(NULL); // no clipping
4892                         DrawDocument( *buf, node, x0, y0, dx, dy, doc_x, doc_y, page_height, absmarks, bookmarks );
4893                         buf->SetClipRect(&curclip); // restore original page clip
4894                     }
4895                 }
4896                 else
4897                 {
4898                     bool flgHyphen = false;
4899                     if ( word->flags&LTEXT_WORD_CAN_HYPH_BREAK_LINE_AFTER) {
4900                         if (j==frmline->word_count-1)
4901                             flgHyphen = true;
4902                         // Also do that even if it's not the last word in the line
4903                         // AND the line is bidi: the hyphen may be in the middle of
4904                         // the text, but it's fine for some people with bidi, see
4905                         // conversation "Bidi reordering of soft hyphen" at:
4906                         //   https://unicode.org/pipermail/unicode/2014-April/thread.html#348
4907                         // If that's not desirable, just disable hyphenation lookup
4908                         // in processParagraph() if m_has_bidi or if chars found in
4909                         // line span multilple bidi levels (so that we don't get
4910                         // a blank space for a hyphen not drawn after this word).
4911                         else if (frmline->flags & LTEXT_LINE_IS_BIDI)
4912                             flgHyphen = true;
4913                     }
4914                     srcline = &m_pbuffer->srctext[word->src_text_index];
4915                     font = (LVFont *) srcline->t.font;
4916                     str = srcline->t.text + word->t.start;
4917                     /*
4918                     lUInt32 srcFlags = srcline->flags;
4919                     if ( srcFlags & LTEXT_BACKGROUND_MARK_FLAGS ) {
4920                         lvRect rc;
4921                         rc.left = x + frmline->x + word->x;
4922                         rc.top = line_y + (frmline->baseline - font->getBaseline()) + word->y;
4923                         rc.right = rc.left + word->width;
4924                         rc.bottom = rc.top + font->getHeight();
4925                         buf->FillRect( rc.left, rc.top, rc.right, rc.bottom, 0xAAAAAA );
4926                     }
4927                     */
4928                     // Check if we need to continue the text decoration from previous word.
4929                     // For now, we only ensure it if this word and previous one are in the
4930                     // same text node. We wrongly won't when one of these is in a sub <SPAN>
4931                     // because we can't detect that rightly at this point anymore...
4932                     text_decoration_back_gap = 0;
4933                     if (j > 0 && word->src_text_index == lastWordSrcIndex) {
4934                         text_decoration_back_gap = word->x - lastWordEnd;
4935                     }
4936                     lUInt32 oldColor = buf->GetTextColor();
4937                     lUInt32 oldBgColor = buf->GetBackgroundColor();
4938                     lUInt32 cl = srcline->color;
4939                     lUInt32 bgcl = srcline->bgcolor;
4940                     if ( cl!=0xFFFFFFFF )
4941                         buf->SetTextColor( cl );
4942                     if ( bgcl!=0xFFFFFFFF )
4943                         buf->SetBackgroundColor( bgcl );
4944                     // Add drawing flags: text decoration (underline...)
4945                     lUInt32 drawFlags = srcline->flags & LTEXT_TD_MASK;
4946                     // and chars direction, and if word begins or ends paragraph (for Harfbuzz)
4947                     drawFlags |= WORD_FLAGS_TO_FNT_FLAGS(word->flags);
4948                     font->DrawTextString(
4949                         buf,
4950                         x + frmline->x + word->x,
4951                         line_y + (frmline->baseline - font->getBaseline()) + word->y,
4952                         str,
4953                         word->t.len,
4954                         '?',
4955                         NULL,
4956                         flgHyphen,
4957                         srcline->lang_cfg,
4958                         drawFlags,
4959                         srcline->letter_spacing + word->added_letter_spacing,
4960                         word->width,
4961                         text_decoration_back_gap);
4962                     /* To display the added letter spacing % at end of line
4963                     if (j == frmline->word_count-1 && word->added_letter_spacing ) {
4964                         // lString32 val = lString32::itoa(word->added_letter_spacing);
4965                         lString32 val = lString32::itoa(100*word->added_letter_spacing / font->getSize());
4966                         font->DrawTextString( buf, x + frmline->x + word->x + word->width + 10,
4967                             line_y + (frmline->baseline - font->getBaseline()) + word->y,
4968                             val.c_str(), val.length(), '?', NULL, false);
4969                     }
4970                     */
4971                     if ( cl!=0xFFFFFFFF )
4972                         buf->SetTextColor( oldColor );
4973                     if ( bgcl!=0xFFFFFFFF )
4974                         buf->SetBackgroundColor( oldBgColor );
4975                 }
4976                 lastWordSrcIndex = word->src_text_index;
4977                 lastWordEnd = word->x + word->width;
4978             }
4979 
4980 #ifdef CR_USE_INVERT_FOR_SELECTION_MARKS
4981             // process marks
4982             if ( marks!=NULL && marks->length()>0 ) {
4983                 lvRect lineRect( frmline->x, frmline->y, frmline->x + frmline->width, frmline->y + frmline->height );
4984                 for ( int i=0; i<marks->length(); i++ ) {
4985                     lvRect mark;
4986                     ldomMarkedRange * range = marks->get(i);
4987                     if ( range->intersects( lineRect, mark ) ) {
4988                         buf->InvertRect( mark.left + x, mark.top + y, mark.right + x, mark.bottom + y);
4989                     }
4990                 }
4991             }
4992 #endif
4993         }
4994         line_y += frmline->height;
4995     }
4996 
4997     // Draw floats if any
4998     for (i=0; i<m_pbuffer->floatcount; i++) {
4999         embedded_float_t * flt = m_pbuffer->floats[i];
5000         if (flt->srctext == NULL) {
5001             // Ignore outer floats (they are either fake footprint floats,
5002             // or real outer floats not to be drawn by us)
5003             continue;
5004         }
5005         ldomNode * node = (ldomNode *) flt->srctext->object;
5006 
5007         // Only some part of this float needs to be in the clip area.
5008         // Also account for the overflows, so we can render fully
5009         // floats with negative margins.
5010         RenderRectAccessor fmt( node );
5011         int top_overflow = fmt.getTopOverflow();
5012         int bottom_overflow = fmt.getBottomOverflow();
5013         // Note: some dropcaps may still not being draw in spite of this
5014         // because of the checks with _hidePartialGlyphs in lvdrawbuf.cpp
5015         // (todo: get rid of these _hidePartialGlyphs checks ?)
5016 
5017         if (y + flt->y - top_overflow < clip.bottom && y + flt->y + flt->height + bottom_overflow > clip.top) {
5018             // DrawDocument() parameters (y0 + doc_y must be equal to our y,
5019             // doc_y just shift the viewport, so anything outside is not drawn).
5020             int x0 = x + flt->x;
5021             int y0 = y + flt->y;
5022             int doc_x = 0 - flt->x;
5023             int doc_y = 0 - flt->y;
5024             int dx = m_pbuffer->width;
5025             int dy = m_pbuffer->page_height;
5026             int page_height = m_pbuffer->page_height;
5027             if ( absmarks_update_needed ) {
5028                 getAbsMarksFromMarks(marks, absmarks, node);
5029                 absmarks_update_needed = false;
5030             }
5031             DrawDocument( *buf, node, x0, y0, dx, dy, doc_x, doc_y, page_height, absmarks, bookmarks );
5032         }
5033     }
5034     delete absmarks;
5035 }
5036 
5037 #endif
5038