1 #include "mupdf/fitz.h"
2 #include "mupdf/ucdn.h"
3
4 #include <math.h>
5 #include <float.h>
6 #include <string.h>
7
8 /* Simple layout structure */
9
fz_new_layout(fz_context * ctx)10 fz_layout_block *fz_new_layout(fz_context *ctx)
11 {
12 fz_pool *pool = fz_new_pool(ctx);
13 fz_layout_block *block;
14 fz_try(ctx)
15 {
16 block = fz_pool_alloc(ctx, pool, sizeof (fz_layout_block));
17 block->pool = pool;
18 block->head = NULL;
19 block->tailp = &block->head;
20 }
21 fz_catch(ctx)
22 {
23 fz_drop_pool(ctx, pool);
24 fz_rethrow(ctx);
25 }
26 return block;
27 }
28
fz_drop_layout(fz_context * ctx,fz_layout_block * block)29 void fz_drop_layout(fz_context *ctx, fz_layout_block *block)
30 {
31 if (block)
32 fz_drop_pool(ctx, block->pool);
33 }
34
fz_add_layout_line(fz_context * ctx,fz_layout_block * block,float x,float y,float h,const char * p)35 void fz_add_layout_line(fz_context *ctx, fz_layout_block *block, float x, float y, float h, const char *p)
36 {
37 fz_layout_line *line = fz_pool_alloc(ctx, block->pool, sizeof (fz_layout_line));
38 line->x = x;
39 line->y = y;
40 line->h = h;
41 line->p = p;
42 line->text = NULL;
43 line->next = NULL;
44 *block->tailp = line;
45 block->tailp = &line->next;
46 block->text_tailp = &line->text;
47 }
48
fz_add_layout_char(fz_context * ctx,fz_layout_block * block,float x,float w,const char * p)49 void fz_add_layout_char(fz_context *ctx, fz_layout_block *block, float x, float w, const char *p)
50 {
51 fz_layout_char *ch = fz_pool_alloc(ctx, block->pool, sizeof (fz_layout_char));
52 ch->x = x;
53 ch->w = w;
54 ch->p = p;
55 ch->next = NULL;
56 *block->text_tailp = ch;
57 block->text_tailp = &ch->next;
58 }
59
60 /* Extract text into blocks and lines. */
61
62 #define PARAGRAPH_DIST 1.5f
63 #define SPACE_DIST 0.15f
64 #define SPACE_MAX_DIST 0.8f
65
66 typedef struct
67 {
68 fz_device super;
69 fz_stext_page *page;
70 fz_point pen, start;
71 fz_matrix trm;
72 int new_obj;
73 int curdir;
74 int lastchar;
75 int flags;
76 int color;
77 const fz_text *lasttext;
78 } fz_stext_device;
79
80 const char *fz_stext_options_usage =
81 "Text output options:\n"
82 "\tinhibit-spaces: don't add spaces between gaps in the text\n"
83 "\tpreserve-images: keep images in output\n"
84 "\tpreserve-ligatures: do not expand ligatures into constituent characters\n"
85 "\tpreserve-whitespace: do not convert all whitespace into space characters\n"
86 "\tpreserve-spans: do not merge spans on the same line\n"
87 "\tdehyphenate: attempt to join up hyphenated words\n"
88 "\n";
89
90 fz_stext_page *
fz_new_stext_page(fz_context * ctx,fz_rect mediabox)91 fz_new_stext_page(fz_context *ctx, fz_rect mediabox)
92 {
93 fz_pool *pool = fz_new_pool(ctx);
94 fz_stext_page *page = NULL;
95 fz_try(ctx)
96 {
97 page = fz_pool_alloc(ctx, pool, sizeof(*page));
98 page->pool = pool;
99 page->mediabox = mediabox;
100 page->first_block = NULL;
101 page->last_block = NULL;
102 }
103 fz_catch(ctx)
104 {
105 fz_drop_pool(ctx, pool);
106 fz_rethrow(ctx);
107 }
108 return page;
109 }
110
111 void
fz_drop_stext_page(fz_context * ctx,fz_stext_page * page)112 fz_drop_stext_page(fz_context *ctx, fz_stext_page *page)
113 {
114 if (page)
115 {
116 fz_stext_block *block;
117 for (block = page->first_block; block; block = block->next)
118 if (block->type == FZ_STEXT_BLOCK_IMAGE)
119 fz_drop_image(ctx, block->u.i.image);
120 fz_drop_pool(ctx, page->pool);
121 }
122 }
123
124 static fz_stext_block *
add_block_to_page(fz_context * ctx,fz_stext_page * page)125 add_block_to_page(fz_context *ctx, fz_stext_page *page)
126 {
127 fz_stext_block *block = fz_pool_alloc(ctx, page->pool, sizeof *page->first_block);
128 block->prev = page->last_block;
129 if (!page->first_block)
130 page->first_block = page->last_block = block;
131 else
132 {
133 page->last_block->next = block;
134 page->last_block = block;
135 }
136 return block;
137 }
138
139 static fz_stext_block *
add_text_block_to_page(fz_context * ctx,fz_stext_page * page)140 add_text_block_to_page(fz_context *ctx, fz_stext_page *page)
141 {
142 fz_stext_block *block = add_block_to_page(ctx, page);
143 block->type = FZ_STEXT_BLOCK_TEXT;
144 return block;
145 }
146
147 static fz_stext_block *
add_image_block_to_page(fz_context * ctx,fz_stext_page * page,fz_matrix ctm,fz_image * image)148 add_image_block_to_page(fz_context *ctx, fz_stext_page *page, fz_matrix ctm, fz_image *image)
149 {
150 fz_stext_block *block = add_block_to_page(ctx, page);
151 block->type = FZ_STEXT_BLOCK_IMAGE;
152 block->u.i.transform = ctm;
153 block->u.i.image = fz_keep_image(ctx, image);
154 block->bbox = fz_transform_rect(fz_unit_rect, ctm);
155 return block;
156 }
157
158 static fz_stext_line *
add_line_to_block(fz_context * ctx,fz_stext_page * page,fz_stext_block * block,const fz_point * dir,int wmode)159 add_line_to_block(fz_context *ctx, fz_stext_page *page, fz_stext_block *block, const fz_point *dir, int wmode)
160 {
161 fz_stext_line *line = fz_pool_alloc(ctx, page->pool, sizeof *block->u.t.first_line);
162 line->prev = block->u.t.last_line;
163 if (!block->u.t.first_line)
164 block->u.t.first_line = block->u.t.last_line = line;
165 else
166 {
167 block->u.t.last_line->next = line;
168 block->u.t.last_line = line;
169 }
170
171 line->dir = *dir;
172 line->wmode = wmode;
173
174 return line;
175 }
176
177 static fz_stext_char *
add_char_to_line(fz_context * ctx,fz_stext_page * page,fz_stext_line * line,fz_matrix trm,fz_font * font,float size,int c,fz_point * p,fz_point * q,int color)178 add_char_to_line(fz_context *ctx, fz_stext_page *page, fz_stext_line *line, fz_matrix trm, fz_font *font, float size, int c, fz_point *p, fz_point *q, int color)
179 {
180 fz_stext_char *ch = fz_pool_alloc(ctx, page->pool, sizeof *line->first_char);
181 fz_point a, d;
182
183 if (!line->first_char)
184 line->first_char = line->last_char = ch;
185 else
186 {
187 line->last_char->next = ch;
188 line->last_char = ch;
189 }
190
191 ch->c = c;
192 ch->color = color;
193 ch->origin = *p;
194 ch->size = size;
195 ch->font = font; /* TODO: keep and drop */
196
197 if (line->wmode == 0)
198 {
199 a.x = 0;
200 d.x = 0;
201 a.y = fz_font_ascender(ctx, font);
202 d.y = fz_font_descender(ctx, font);
203 }
204 else
205 {
206 fz_rect bbox = fz_font_bbox(ctx, font);
207 a.x = bbox.x1;
208 d.x = bbox.x0;
209 a.y = 0;
210 d.y = 0;
211 }
212 a = fz_transform_vector(a, trm);
213 d = fz_transform_vector(d, trm);
214
215 ch->quad.ll = fz_make_point(p->x + d.x, p->y + d.y);
216 ch->quad.ul = fz_make_point(p->x + a.x, p->y + a.y);
217 ch->quad.lr = fz_make_point(q->x + d.x, q->y + d.y);
218 ch->quad.ur = fz_make_point(q->x + a.x, q->y + a.y);
219
220 return ch;
221 }
222
223 static void
remove_last_char(fz_context * ctx,fz_stext_line * line)224 remove_last_char(fz_context *ctx, fz_stext_line *line)
225 {
226 if (line && line->first_char)
227 {
228 fz_stext_char *prev = NULL;
229 fz_stext_char *ch = line->first_char;
230 while (ch->next)
231 {
232 prev = ch;
233 ch = ch->next;
234 }
235 if (prev)
236 {
237 /* the characters are pool allocated, so we don't actually leak the removed node */
238 line->last_char = prev;
239 line->last_char->next = NULL;
240 }
241 }
242 }
243
244 static int
direction_from_bidi_class(int bidiclass,int curdir)245 direction_from_bidi_class(int bidiclass, int curdir)
246 {
247 switch (bidiclass)
248 {
249 /* strong */
250 case UCDN_BIDI_CLASS_L: return 1;
251 case UCDN_BIDI_CLASS_R: return -1;
252 case UCDN_BIDI_CLASS_AL: return -1;
253
254 /* weak */
255 case UCDN_BIDI_CLASS_EN:
256 case UCDN_BIDI_CLASS_ES:
257 case UCDN_BIDI_CLASS_ET:
258 case UCDN_BIDI_CLASS_AN:
259 case UCDN_BIDI_CLASS_CS:
260 case UCDN_BIDI_CLASS_NSM:
261 case UCDN_BIDI_CLASS_BN:
262 return curdir;
263
264 /* neutral */
265 case UCDN_BIDI_CLASS_B:
266 case UCDN_BIDI_CLASS_S:
267 case UCDN_BIDI_CLASS_WS:
268 case UCDN_BIDI_CLASS_ON:
269 return curdir;
270
271 /* embedding, override, pop ... we don't support them */
272 default:
273 return 0;
274 }
275 }
276
is_hyphen(int c)277 static int is_hyphen(int c)
278 {
279 /* check for: hyphen-minus, soft hyphen, hyphen, and non-breaking hyphen */
280 return (c == '-' || c == 0xAD || c == 0x2010 || c == 0x2011);
281 }
282
283 static float
vec_dot(const fz_point * a,const fz_point * b)284 vec_dot(const fz_point *a, const fz_point *b)
285 {
286 return a->x * b->x + a->y * b->y;
287 }
288
289 static void
fz_add_stext_char_imp(fz_context * ctx,fz_stext_device * dev,fz_font * font,int c,int glyph,fz_matrix trm,float adv,int wmode,int force_new_line)290 fz_add_stext_char_imp(fz_context *ctx, fz_stext_device *dev, fz_font *font, int c, int glyph, fz_matrix trm, float adv, int wmode, int force_new_line)
291 {
292 fz_stext_page *page = dev->page;
293 fz_stext_block *cur_block;
294 fz_stext_line *cur_line;
295
296 int new_para = 0;
297 int new_line = 1;
298 int add_space = 0;
299 fz_point dir, ndir, p, q;
300 float size;
301 fz_point delta;
302 float spacing = 0;
303 float base_offset = 0;
304 int rtl = 0;
305
306 dev->curdir = direction_from_bidi_class(ucdn_get_bidi_class(c), dev->curdir);
307
308 /* dir = direction vector for motion. ndir = normalised(dir) */
309 if (wmode == 0)
310 {
311 dir.x = 1;
312 dir.y = 0;
313 }
314 else
315 {
316 dir.x = 0;
317 dir.y = -1;
318 }
319 dir = fz_transform_vector(dir, trm);
320 ndir = fz_normalize_vector(dir);
321
322 size = fz_matrix_expansion(trm);
323
324 /* We need to identify where glyphs 'start' (p) and 'stop' (q).
325 * Each glyph holds its 'start' position, and the next glyph in the
326 * span (or span->max if there is no next glyph) holds its 'end'
327 * position.
328 *
329 * For both horizontal and vertical motion, trm->{e,f} gives the
330 * origin (usually the bottom left) of the glyph.
331 *
332 * In horizontal mode:
333 * + p is bottom left.
334 * + q is the bottom right
335 * In vertical mode:
336 * + p is top left (where it advanced from)
337 * + q is bottom left
338 */
339 if (wmode == 0)
340 {
341 p.x = trm.e;
342 p.y = trm.f;
343 q.x = trm.e + adv * dir.x;
344 q.y = trm.f + adv * dir.y;
345 }
346 else
347 {
348 p.x = trm.e - adv * dir.x;
349 p.y = trm.f - adv * dir.y;
350 q.x = trm.e;
351 q.y = trm.f;
352 }
353
354 /* Find current position to enter new text. */
355 cur_block = page->last_block;
356 if (cur_block && cur_block->type != FZ_STEXT_BLOCK_TEXT)
357 cur_block = NULL;
358 cur_line = cur_block ? cur_block->u.t.last_line : NULL;
359
360 if (cur_line && glyph < 0)
361 {
362 /* Don't advance pen or break lines for no-glyph characters in a cluster */
363 add_char_to_line(ctx, page, cur_line, trm, font, size, c, &dev->pen, &dev->pen, dev->color);
364 dev->lastchar = c;
365 return;
366 }
367
368 if (cur_line == NULL || cur_line->wmode != wmode || vec_dot(&ndir, &cur_line->dir) < 0.999f)
369 {
370 /* If the matrix has changed rotation, or the wmode is different (or if we don't have a line at all),
371 * then we can't append to the current block/line. */
372 new_para = 1;
373 new_line = 1;
374 }
375 else
376 {
377 /* Detect fake bold where text is printed twice in the same place. */
378 delta.x = fabsf(q.x - dev->pen.x);
379 delta.y = fabsf(q.y - dev->pen.y);
380 if (delta.x < FLT_EPSILON && delta.y < FLT_EPSILON && c == dev->lastchar)
381 return;
382
383 /* Calculate how far we've moved since the last character. */
384 delta.x = p.x - dev->pen.x;
385 delta.y = p.y - dev->pen.y;
386
387 /* The transform has not changed, so we know we're in the same
388 * direction. Calculate 2 distances; how far off the previous
389 * baseline we are, together with how far along the baseline
390 * we are from the expected position. */
391 spacing = ndir.x * delta.x + ndir.y * delta.y;
392 base_offset = -ndir.y * delta.x + ndir.x * delta.y;
393
394 /* Only a small amount off the baseline - we'll take this */
395 if (fabsf(base_offset) < size * 0.8f)
396 {
397 /* LTR or neutral character */
398 if (dev->curdir >= 0)
399 {
400 if (fabsf(spacing) < size * SPACE_DIST)
401 {
402 /* Motion is in line and small enough to ignore. */
403 new_line = 0;
404 }
405 else if (fabsf(spacing) > size * SPACE_MAX_DIST)
406 {
407 /* Motion is in line and large enough to warrant splitting to a new line */
408 new_line = 1;
409 }
410 else if (spacing < 0)
411 {
412 /* Motion is backward in line! Ignore this odd spacing. */
413 new_line = 0;
414 }
415 else
416 {
417 /* Motion is forward in line and large enough to warrant us adding a space. */
418 if (dev->lastchar != ' ' && wmode == 0)
419 add_space = 1;
420 new_line = 0;
421 }
422 }
423
424 /* RTL character -- disable space character and column detection heuristics */
425 else
426 {
427 new_line = 0;
428 if (spacing > size * SPACE_DIST || spacing < 0)
429 rtl = 0; /* backward (or big jump to 'right' side) means logical order */
430 else
431 rtl = 1; /* visual order, we need to reverse in a post process pass */
432 }
433 }
434
435 /* Enough for a new line, but not enough for a new paragraph */
436 else if (fabsf(base_offset) <= size * PARAGRAPH_DIST)
437 {
438 /* Check indent to spot text-indent style paragraphs */
439 if (wmode == 0 && cur_line && dev->new_obj)
440 if (fabsf(p.x - dev->start.x) > size * 0.5f)
441 new_para = 1;
442 new_line = 1;
443 }
444
445 /* Way off the baseline - open a new paragraph */
446 else
447 {
448 new_para = 1;
449 new_line = 1;
450 }
451 }
452
453 /* Start a new block (but only at the beginning of a text object) */
454 if (new_para || !cur_block)
455 {
456 cur_block = add_text_block_to_page(ctx, page);
457 cur_line = cur_block->u.t.last_line;
458 }
459
460 if (new_line && (dev->flags & FZ_STEXT_DEHYPHENATE) && is_hyphen(dev->lastchar))
461 {
462 remove_last_char(ctx, cur_line);
463 new_line = 0;
464 }
465
466 /* Start a new line */
467 if (new_line || !cur_line || force_new_line)
468 {
469 cur_line = add_line_to_block(ctx, page, cur_block, &ndir, wmode);
470 dev->start = p;
471 }
472
473 /* Add synthetic space */
474 if (add_space && !(dev->flags & FZ_STEXT_INHIBIT_SPACES))
475 add_char_to_line(ctx, page, cur_line, trm, font, size, ' ', &dev->pen, &p, dev->color);
476
477 add_char_to_line(ctx, page, cur_line, trm, font, size, c, &p, &q, dev->color);
478 dev->lastchar = c;
479 dev->pen = q;
480
481 dev->new_obj = 0;
482 dev->trm = trm;
483 }
484
485 static void
fz_add_stext_char(fz_context * ctx,fz_stext_device * dev,fz_font * font,int c,int glyph,fz_matrix trm,float adv,int wmode,int force_new_line)486 fz_add_stext_char(fz_context *ctx, fz_stext_device *dev, fz_font *font, int c, int glyph, fz_matrix trm, float adv, int wmode, int force_new_line)
487 {
488 /* ignore when one unicode character maps to multiple glyphs */
489 if (c == -1)
490 return;
491
492 if (!(dev->flags & FZ_STEXT_PRESERVE_LIGATURES))
493 {
494 switch (c)
495 {
496 case 0xFB00: /* ff */
497 fz_add_stext_char_imp(ctx, dev, font, 'f', glyph, trm, adv, wmode, force_new_line);
498 fz_add_stext_char_imp(ctx, dev, font, 'f', -1, trm, 0, wmode, 0);
499 return;
500 case 0xFB01: /* fi */
501 fz_add_stext_char_imp(ctx, dev, font, 'f', glyph, trm, adv, wmode, force_new_line);
502 fz_add_stext_char_imp(ctx, dev, font, 'i', -1, trm, 0, wmode, 0);
503 return;
504 case 0xFB02: /* fl */
505 fz_add_stext_char_imp(ctx, dev, font, 'f', glyph, trm, adv, wmode, force_new_line);
506 fz_add_stext_char_imp(ctx, dev, font, 'l', -1, trm, 0, wmode, 0);
507 return;
508 case 0xFB03: /* ffi */
509 fz_add_stext_char_imp(ctx, dev, font, 'f', glyph, trm, adv, wmode, force_new_line);
510 fz_add_stext_char_imp(ctx, dev, font, 'f', -1, trm, 0, wmode, 0);
511 fz_add_stext_char_imp(ctx, dev, font, 'i', -1, trm, 0, wmode, 0);
512 return;
513 case 0xFB04: /* ffl */
514 fz_add_stext_char_imp(ctx, dev, font, 'f', glyph, trm, adv, wmode, force_new_line);
515 fz_add_stext_char_imp(ctx, dev, font, 'f', -1, trm, 0, wmode, 0);
516 fz_add_stext_char_imp(ctx, dev, font, 'l', -1, trm, 0, wmode, 0);
517 return;
518 case 0xFB05: /* long st */
519 case 0xFB06: /* st */
520 fz_add_stext_char_imp(ctx, dev, font, 's', glyph, trm, adv, wmode, force_new_line);
521 fz_add_stext_char_imp(ctx, dev, font, 't', -1, trm, 0, wmode, 0);
522 return;
523 }
524 }
525
526 if (!(dev->flags & FZ_STEXT_PRESERVE_WHITESPACE))
527 {
528 switch (c)
529 {
530 case 0x0009: /* tab */
531 case 0x0020: /* space */
532 case 0x00A0: /* no-break space */
533 case 0x1680: /* ogham space mark */
534 case 0x180E: /* mongolian vowel separator */
535 case 0x2000: /* en quad */
536 case 0x2001: /* em quad */
537 case 0x2002: /* en space */
538 case 0x2003: /* em space */
539 case 0x2004: /* three-per-em space */
540 case 0x2005: /* four-per-em space */
541 case 0x2006: /* six-per-em space */
542 case 0x2007: /* figure space */
543 case 0x2008: /* punctuation space */
544 case 0x2009: /* thin space */
545 case 0x200A: /* hair space */
546 case 0x202F: /* narrow no-break space */
547 case 0x205F: /* medium mathematical space */
548 case 0x3000: /* ideographic space */
549 c = ' ';
550 }
551 }
552
553 fz_add_stext_char_imp(ctx, dev, font, c, glyph, trm, adv, wmode, force_new_line);
554 }
555
556 static void
fz_stext_extract(fz_context * ctx,fz_stext_device * dev,fz_text_span * span,fz_matrix ctm)557 fz_stext_extract(fz_context *ctx, fz_stext_device *dev, fz_text_span *span, fz_matrix ctm)
558 {
559 fz_font *font = span->font;
560 fz_matrix tm = span->trm;
561 fz_matrix trm;
562 float adv;
563 int i;
564
565 if (span->len == 0)
566 return;
567
568 for (i = 0; i < span->len; i++)
569 {
570 /* Calculate new pen location and delta */
571 tm.e = span->items[i].x;
572 tm.f = span->items[i].y;
573 trm = fz_concat(tm, ctm);
574
575 /* Calculate bounding box and new pen position based on font metrics */
576 if (span->items[i].gid >= 0)
577 adv = fz_advance_glyph(ctx, font, span->items[i].gid, span->wmode);
578 else
579 adv = 0;
580
581 fz_add_stext_char(ctx, dev, font,
582 span->items[i].ucs,
583 span->items[i].gid,
584 trm,
585 adv,
586 span->wmode,
587 (i == 0) && (dev->flags & FZ_STEXT_PRESERVE_SPANS));
588 }
589 }
590
hexrgb_from_color(fz_context * ctx,fz_colorspace * colorspace,const float * color)591 static int hexrgb_from_color(fz_context *ctx, fz_colorspace *colorspace, const float *color)
592 {
593 float rgb[3];
594 fz_convert_color(ctx, colorspace, color, fz_device_rgb(ctx), rgb, NULL, fz_default_color_params);
595 return
596 (fz_clampi(rgb[0] * 255, 0, 255) << 16) |
597 (fz_clampi(rgb[1] * 255, 0, 255) << 8) |
598 (fz_clampi(rgb[2] * 255, 0, 255));
599 }
600
601 static void
fz_stext_fill_text(fz_context * ctx,fz_device * dev,const fz_text * text,fz_matrix ctm,fz_colorspace * colorspace,const float * color,float alpha,fz_color_params color_params)602 fz_stext_fill_text(fz_context *ctx, fz_device *dev, const fz_text *text, fz_matrix ctm,
603 fz_colorspace *colorspace, const float *color, float alpha, fz_color_params color_params)
604 {
605 fz_stext_device *tdev = (fz_stext_device*)dev;
606 fz_text_span *span;
607 if (text == tdev->lasttext)
608 return;
609 tdev->color = hexrgb_from_color(ctx, colorspace, color);
610 tdev->new_obj = 1;
611 for (span = text->head; span; span = span->next)
612 fz_stext_extract(ctx, tdev, span, ctm);
613 fz_drop_text(ctx, tdev->lasttext);
614 tdev->lasttext = fz_keep_text(ctx, text);
615 }
616
617 static void
fz_stext_stroke_text(fz_context * ctx,fz_device * dev,const fz_text * text,const fz_stroke_state * stroke,fz_matrix ctm,fz_colorspace * colorspace,const float * color,float alpha,fz_color_params color_params)618 fz_stext_stroke_text(fz_context *ctx, fz_device *dev, const fz_text *text, const fz_stroke_state *stroke, fz_matrix ctm,
619 fz_colorspace *colorspace, const float *color, float alpha, fz_color_params color_params)
620 {
621 fz_stext_device *tdev = (fz_stext_device*)dev;
622 fz_text_span *span;
623 if (text == tdev->lasttext)
624 return;
625 tdev->color = hexrgb_from_color(ctx, colorspace, color);
626 tdev->new_obj = 1;
627 for (span = text->head; span; span = span->next)
628 fz_stext_extract(ctx, tdev, span, ctm);
629 fz_drop_text(ctx, tdev->lasttext);
630 tdev->lasttext = fz_keep_text(ctx, text);
631 }
632
633 static void
fz_stext_clip_text(fz_context * ctx,fz_device * dev,const fz_text * text,fz_matrix ctm,fz_rect scissor)634 fz_stext_clip_text(fz_context *ctx, fz_device *dev, const fz_text *text, fz_matrix ctm, fz_rect scissor)
635 {
636 fz_stext_device *tdev = (fz_stext_device*)dev;
637 fz_text_span *span;
638 if (text == tdev->lasttext)
639 return;
640 tdev->color = 0;
641 tdev->new_obj = 1;
642 for (span = text->head; span; span = span->next)
643 fz_stext_extract(ctx, tdev, span, ctm);
644 fz_drop_text(ctx, tdev->lasttext);
645 tdev->lasttext = fz_keep_text(ctx, text);
646 }
647
648 static void
fz_stext_clip_stroke_text(fz_context * ctx,fz_device * dev,const fz_text * text,const fz_stroke_state * stroke,fz_matrix ctm,fz_rect scissor)649 fz_stext_clip_stroke_text(fz_context *ctx, fz_device *dev, const fz_text *text, const fz_stroke_state *stroke, fz_matrix ctm, fz_rect scissor)
650 {
651 fz_stext_device *tdev = (fz_stext_device*)dev;
652 fz_text_span *span;
653 if (text == tdev->lasttext)
654 return;
655 tdev->color = 0;
656 tdev->new_obj = 1;
657 for (span = text->head; span; span = span->next)
658 fz_stext_extract(ctx, tdev, span, ctm);
659 fz_drop_text(ctx, tdev->lasttext);
660 tdev->lasttext = fz_keep_text(ctx, text);
661 }
662
663 static void
fz_stext_ignore_text(fz_context * ctx,fz_device * dev,const fz_text * text,fz_matrix ctm)664 fz_stext_ignore_text(fz_context *ctx, fz_device *dev, const fz_text *text, fz_matrix ctm)
665 {
666 fz_stext_device *tdev = (fz_stext_device*)dev;
667 fz_text_span *span;
668 if (text == tdev->lasttext)
669 return;
670 tdev->color = 0;
671 tdev->new_obj = 1;
672 for (span = text->head; span; span = span->next)
673 fz_stext_extract(ctx, tdev, span, ctm);
674 fz_drop_text(ctx, tdev->lasttext);
675 tdev->lasttext = fz_keep_text(ctx, text);
676 }
677
678 /* Images and shadings */
679
680 static void
fz_stext_fill_image(fz_context * ctx,fz_device * dev,fz_image * img,fz_matrix ctm,float alpha,fz_color_params color_params)681 fz_stext_fill_image(fz_context *ctx, fz_device *dev, fz_image *img, fz_matrix ctm, float alpha, fz_color_params color_params)
682 {
683 fz_stext_device *tdev = (fz_stext_device*)dev;
684
685 /* If the alpha is less than 50% then it's probably a watermark or effect or something. Skip it. */
686 if (alpha < 0.5f)
687 return;
688
689 add_image_block_to_page(ctx, tdev->page, ctm, img);
690 }
691
692 static void
fz_stext_fill_image_mask(fz_context * ctx,fz_device * dev,fz_image * img,fz_matrix ctm,fz_colorspace * cspace,const float * color,float alpha,fz_color_params color_params)693 fz_stext_fill_image_mask(fz_context *ctx, fz_device *dev, fz_image *img, fz_matrix ctm,
694 fz_colorspace *cspace, const float *color, float alpha, fz_color_params color_params)
695 {
696 fz_stext_fill_image(ctx, dev, img, ctm, alpha, color_params);
697 }
698
699 static fz_image *
fz_new_image_from_shade(fz_context * ctx,fz_shade * shade,fz_matrix * in_out_ctm,fz_color_params color_params,fz_rect scissor)700 fz_new_image_from_shade(fz_context *ctx, fz_shade *shade, fz_matrix *in_out_ctm, fz_color_params color_params, fz_rect scissor)
701 {
702 fz_matrix ctm = *in_out_ctm;
703 fz_pixmap *pix;
704 fz_image *img = NULL;
705 fz_rect bounds;
706 fz_irect bbox;
707
708 bounds = fz_bound_shade(ctx, shade, ctm);
709 bounds = fz_intersect_rect(bounds, scissor);
710 bbox = fz_irect_from_rect(bounds);
711
712 pix = fz_new_pixmap_with_bbox(ctx, fz_device_rgb(ctx), bbox, NULL, !shade->use_background);
713 fz_try(ctx)
714 {
715 if (shade->use_background)
716 fz_fill_pixmap_with_color(ctx, pix, shade->colorspace, shade->background, color_params);
717 else
718 fz_clear_pixmap(ctx, pix);
719 fz_paint_shade(ctx, shade, NULL, ctm, pix, color_params, bbox, NULL);
720 img = fz_new_image_from_pixmap(ctx, pix, NULL);
721 }
722 fz_always(ctx)
723 fz_drop_pixmap(ctx, pix);
724 fz_catch(ctx)
725 fz_rethrow(ctx);
726
727 in_out_ctm->a = pix->w;
728 in_out_ctm->b = 0;
729 in_out_ctm->c = 0;
730 in_out_ctm->d = pix->h;
731 in_out_ctm->e = pix->x;
732 in_out_ctm->f = pix->y;
733 return img;
734 }
735
736 static void
fz_stext_fill_shade(fz_context * ctx,fz_device * dev,fz_shade * shade,fz_matrix ctm,float alpha,fz_color_params color_params)737 fz_stext_fill_shade(fz_context *ctx, fz_device *dev, fz_shade *shade, fz_matrix ctm, float alpha, fz_color_params color_params)
738 {
739 fz_matrix local_ctm = ctm;
740 fz_rect scissor = fz_device_current_scissor(ctx, dev);
741 fz_image *image = fz_new_image_from_shade(ctx, shade, &local_ctm, color_params, scissor);
742 fz_try(ctx)
743 fz_stext_fill_image(ctx, dev, image, local_ctm, alpha, color_params);
744 fz_always(ctx)
745 fz_drop_image(ctx, image);
746 fz_catch(ctx)
747 fz_rethrow(ctx);
748 }
749
750 static void
fz_stext_close_device(fz_context * ctx,fz_device * dev)751 fz_stext_close_device(fz_context *ctx, fz_device *dev)
752 {
753 fz_stext_device *tdev = (fz_stext_device*)dev;
754 fz_stext_page *page = tdev->page;
755 fz_stext_block *block;
756 fz_stext_line *line;
757 fz_stext_char *ch;
758
759 for (block = page->first_block; block; block = block->next)
760 {
761 if (block->type != FZ_STEXT_BLOCK_TEXT)
762 continue;
763 for (line = block->u.t.first_line; line; line = line->next)
764 {
765 for (ch = line->first_char; ch; ch = ch->next)
766 {
767 fz_rect ch_box = fz_rect_from_quad(ch->quad);
768 if (ch == line->first_char)
769 line->bbox = ch_box;
770 else
771 line->bbox = fz_union_rect(line->bbox, ch_box);
772 }
773 block->bbox = fz_union_rect(block->bbox, line->bbox);
774 }
775 }
776
777 /* TODO: smart sorting of blocks and lines in reading order */
778 /* TODO: unicode NFC normalization */
779 }
780
781 static void
fz_stext_drop_device(fz_context * ctx,fz_device * dev)782 fz_stext_drop_device(fz_context *ctx, fz_device *dev)
783 {
784 fz_stext_device *tdev = (fz_stext_device*)dev;
785 fz_drop_text(ctx, tdev->lasttext);
786 }
787
788 fz_stext_options *
fz_parse_stext_options(fz_context * ctx,fz_stext_options * opts,const char * string)789 fz_parse_stext_options(fz_context *ctx, fz_stext_options *opts, const char *string)
790 {
791 const char *val;
792
793 memset(opts, 0, sizeof *opts);
794
795 if (fz_has_option(ctx, string, "preserve-ligatures", &val) && fz_option_eq(val, "yes"))
796 opts->flags |= FZ_STEXT_PRESERVE_LIGATURES;
797 if (fz_has_option(ctx, string, "preserve-whitespace", &val) && fz_option_eq(val, "yes"))
798 opts->flags |= FZ_STEXT_PRESERVE_WHITESPACE;
799 if (fz_has_option(ctx, string, "preserve-images", &val) && fz_option_eq(val, "yes"))
800 opts->flags |= FZ_STEXT_PRESERVE_IMAGES;
801 if (fz_has_option(ctx, string, "inhibit-spaces", &val) && fz_option_eq(val, "yes"))
802 opts->flags |= FZ_STEXT_INHIBIT_SPACES;
803 if (fz_has_option(ctx, string, "dehyphenate", &val) && fz_option_eq(val, "yes"))
804 opts->flags |= FZ_STEXT_DEHYPHENATE;
805 if (fz_has_option(ctx, string, "preserve-spans", &val) && fz_option_eq(val, "yes"))
806 opts->flags |= FZ_STEXT_PRESERVE_SPANS;
807
808 return opts;
809 }
810
811 fz_device *
fz_new_stext_device(fz_context * ctx,fz_stext_page * page,const fz_stext_options * opts)812 fz_new_stext_device(fz_context *ctx, fz_stext_page *page, const fz_stext_options *opts)
813 {
814 fz_stext_device *dev = fz_new_derived_device(ctx, fz_stext_device);
815
816 dev->super.close_device = fz_stext_close_device;
817 dev->super.drop_device = fz_stext_drop_device;
818
819 dev->super.fill_text = fz_stext_fill_text;
820 dev->super.stroke_text = fz_stext_stroke_text;
821 dev->super.clip_text = fz_stext_clip_text;
822 dev->super.clip_stroke_text = fz_stext_clip_stroke_text;
823 dev->super.ignore_text = fz_stext_ignore_text;
824
825 if (opts && (opts->flags & FZ_STEXT_PRESERVE_IMAGES))
826 {
827 dev->super.fill_shade = fz_stext_fill_shade;
828 dev->super.fill_image = fz_stext_fill_image;
829 dev->super.fill_image_mask = fz_stext_fill_image_mask;
830 }
831
832 if (opts)
833 dev->flags = opts->flags;
834 dev->page = page;
835 dev->pen.x = 0;
836 dev->pen.y = 0;
837 dev->trm = fz_identity;
838 dev->lastchar = ' ';
839 dev->curdir = 1;
840 dev->lasttext = NULL;
841
842 return (fz_device*)dev;
843 }
844