1 /* catdvi - get text from DVI files
2 Copyright (C) 1999 Antti-Juhani Kaijanaho <gaia@iki.fi>
3 Copyright (C) 2000-02 Bjoern Brill <brill@fs.math.uni-frankfurt.de>
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19
20 #include <assert.h>
21 #include <stdlib.h>
22 #include "page2.h"
23 #include "util.h"
24 #include "glyphops.h"
25 #include "layout.h"
26
27 /* Return value like with strcmp and friends. */
compare_boxes(struct box_t a,struct box_t b)28 static int compare_boxes(struct box_t a, struct box_t b)
29 {
30 if (a.y > b.y) return 1;
31 if (a.y < b.y) return -1;
32 assert(a.y == b.y);
33 if (a.x > b.x) return 1;
34 if (a.x < b.x) return -1;
35 assert(a.x == b.x);
36 return 0;
37 }
38
39 struct point_t {
40 sint32 x;
41 sint32 y;
42 };
43
point_in_box(const struct point_t * point,const struct box_t * box)44 static int point_in_box(const struct point_t * point, const struct box_t * box)
45 {
46 assert(box);
47 assert(point);
48 return(
49 box->x <= point->x &&
50 (box->x + box->width) >= point->x &&
51 (box->y - box->height) <= point->y &&
52 box->y >= point->y
53 );
54 }
55
56 /* Set to 1 when catdvi is invoked with the --sequential option */
57 int page_sequential = 0;
58
59 /* Set to 1 when catdvi is invoked with the --list-page-numbers option */
60 int page_list_numbers = 0;
61
62 /* List support variables. Note that most insertions happen near the
63 previous insertion, so it's beneficial to keep a pointer to the
64 last inserted node. */
65 struct list_node_t * list_head;
66 struct list_node_t * list_tail;
67 struct list_node_t * list_latest; /* Node that was inserted last. */
68
69 /* flags to avoid the page_adjust_*() loops over the whole page when
70 * there's nothing to adjust.
71 */
72 static int page_has_diacritics;
73 static int page_has_texmext;
74 static int page_has_radicals;
75
swap_nodes(struct list_node_t * one,struct list_node_t * other)76 static void swap_nodes(struct list_node_t * one, struct list_node_t * other)
77 {
78 assert(one != 0);
79 assert(other != 0);
80 assert(one->next == other);
81 assert(one == other->prev);
82
83 one->next = other->next;
84 other->prev = one->prev;
85 other->next = one;
86 one->prev = other;
87 if (one->next == 0) {
88 list_tail = one;
89 } else {
90 one->next->prev = one;
91 }
92 if (other->prev == 0) {
93 list_head = other;
94 } else {
95 other->prev->next = other;
96 }
97
98 assert(other->next == one);
99 assert(other == one->prev);
100 }
101
102 /* move list_latest into its correct position in the list */
bubble(void)103 static void bubble(void)
104 {
105 while (1) {
106 assert(list_latest != 0);
107
108 /* If it is in correct position now, end here. */
109 if ((list_latest->prev == 0
110 || compare_boxes(list_latest->prev->b, list_latest->b) <= 0)
111 && (list_latest->next == 0
112 || compare_boxes(list_latest->b, list_latest->next->b) <= 0)) {
113 return;
114 }
115
116 /* If it is too left, we must bubble it one position to the right. */
117 if (list_latest->next != 0
118 && compare_boxes(list_latest->b, list_latest->next->b) > 0) {
119 swap_nodes(list_latest, list_latest->next);
120 continue;
121 }
122
123 /* If it is too right, we must bubble it one position to the left. */
124 if (list_latest->prev != 0
125 && compare_boxes(list_latest->prev->b, list_latest->b) > 0) {
126 assert(list_latest->prev->next == list_latest);
127 swap_nodes(list_latest->prev, list_latest);
128 continue;
129 }
130 NOTREACHED;
131 }
132 NOTREACHED;
133 }
134
135
136 /* move some random node into its correct position in the list */
bubble_node(struct list_node_t * p)137 static void bubble_node(struct list_node_t * p)
138 {
139 list_latest = p;
140 bubble();
141 }
142
143
insert_list(struct box_t box)144 static void insert_list(struct box_t box)
145 {
146 struct list_node_t * new;
147
148 new = malloc(sizeof(struct list_node_t));
149 if (new == 0) enomem();
150 new->b = box;
151
152 if (list_head == 0 || list_tail == 0 || list_latest == 0) {
153 assert(list_head == 0);
154 assert(list_tail == 0);
155 assert(list_latest == 0);
156
157 new->next = new->prev = 0;
158
159 list_head = list_tail = list_latest = new;
160 return;
161 }
162
163 /* The list is nonempty. */
164
165 assert(list_head != 0);
166 assert(list_tail != 0);
167 assert(list_latest != 0);
168
169 /* The insertion algorithm is this: insert the node after the
170 latest insertion, and then bubble it into its correct
171 position. */
172
173 new->next = list_latest->next;
174 list_latest->next = new;
175 new->prev = list_latest;
176 list_latest = new;
177 if (new->next == 0) {
178 list_tail = new;
179 } else {
180 new->next->prev = new;
181 }
182
183 assert(list_latest->prev->next == list_latest);
184 assert(list_latest->next == 0 || list_latest->next->prev == list_latest);
185
186 /* sort by position except in sequential mode */
187 if (!page_sequential) bubble();
188 }
189
190
191 /* These two are set by command line options */
192 struct pageref_t page_start_output = {0, 0, 0, PRF_PHYSICAL};
193 struct pageref_t page_last_output = {SINT32_MAX, 0, 0, PRF_PHYSICAL};
194
195 static struct pageref_t current_page = {0, 0, 0, PRF_INVALID};
196
197
page_begin(sint32 count0)198 void page_begin(sint32 count0)
199 {
200 /* If necessary, delete the previous page's data structure. */
201 if (list_head != 0 || list_tail != 0 || list_latest != 0) {
202 struct list_node_t * p;
203
204 assert(list_head != 0);
205 assert(list_tail != 0);
206 assert(list_latest != 0);
207
208 p = list_head;
209 while (p != 0) {
210 struct list_node_t * next;
211
212 if (p == list_head) list_head = 0;
213 if (p == list_latest) list_latest = 0;
214 if (p == list_tail) list_tail = 0;
215
216 next = p->next;
217 free(p);
218 p = next;
219 }
220 }
221 assert(list_head == 0);
222 assert(list_tail == 0);
223 assert(list_latest == 0);
224
225 /* reset per page flags */
226 page_has_diacritics = 0;
227 page_has_texmext = 0;
228 page_has_radicals = 0;
229
230 /* keep track of page numbering */
231 if(current_page.flavour == PRF_INVALID) {
232 /* here comes the very first page of the document */
233 current_page.physical = 1;
234 current_page.count0 = count0;
235 if (count0 < 0) current_page.chapter = 1;
236 else current_page.chapter = 0;
237 /* number chapters from 0 to accomodate frontmatter */
238 current_page.flavour = PRF_COMPLETE;
239 }
240 else {
241 /* just another ordinary page */
242 current_page.physical += 1;
243 if (pageref_count0_cmp(current_page.count0, count0) >= 0) {
244 /* count0 has not increased, so start new chapter */
245 current_page.chapter += 1;
246 }
247 current_page.count0 = count0;
248 }
249 }
250
251
page_set_glyph(font_t font,glyph_t glyph,sint32 width,sint32 height,sint32 depth,sint32 axis_height,sint32 x,sint32 y)252 void page_set_glyph(
253 font_t font, glyph_t glyph,
254 sint32 width, sint32 height, sint32 depth, sint32 axis_height,
255 sint32 x, sint32 y
256 )
257 {
258 struct box_t b;
259 enum glyph_hint_t hint;
260
261 b.x = x;
262 b.y = y;
263 b.width = width;
264 b.height = height;
265 b.depth = depth;
266 b.glyph = glyph;
267 b.font = font;
268 b.axis_height = axis_height;
269 b.flags = 0;
270 b.axis = 0;
271
272 hint = glyph_get_hint(glyph);
273
274 if(hint & GH_DIACRITIC) {
275 b.flags |= BF_SWIMMING | BF_DIACRITIC ;
276 page_has_diacritics = 1;
277 }
278
279 if(hint & GH_EXTENSIBLE_RECIPE) {
280 b.flags |= BF_SWIMMING;
281 page_has_texmext = 1;
282 }
283 else if(hint & GH_ON_AXIS) {
284 b.flags |= BF_SWIMMING | BF_ON_AXIS | BF_HAS_AXIS ;
285 b.axis = y + (-height + depth)/2;
286 page_has_texmext = 1;
287 }
288 else if(hint & GH_RADICAL) {
289 b.flags |= BF_SWIMMING | BF_RADICAL ;
290 page_has_radicals = 1;
291 }
292 else if(axis_height > 0) {
293 b.flags |= BF_HAS_AXIS;
294 b.axis = y - axis_height;
295 }
296
297 insert_list(b);
298 }
299
300
301 static void page_pair_accenting(
302 struct list_node_t * pbase,
303 struct list_node_t * pdiacritic,
304 int direction
305 );
306
page_end(void)307 void page_end(void)
308 {
309 pmesg(50, "BEGIN page_end\n");
310
311 if (
312 pageref_cmp(&page_start_output, ¤t_page) > 0 ||
313 pageref_cmp(&page_last_output, ¤t_page) < 0
314 ) {
315 if(msglevel >= 80) {
316 pmesg(80, "skipping page by user request:\n ");
317 pageref_print(¤t_page, stderr);
318 }
319 pmesg(50, "END page_end\n");
320 return;
321 }
322
323 if(page_list_numbers) {
324 pageref_print(¤t_page, stdout);
325 pmesg(50, "END page_end\n");
326 return;
327 }
328
329 if(page_sequential) page_print_sequential();
330 else page_print_formatted();
331
332 pmesg(50, "END page_end\n");
333 return;
334
335 }
336
page_adjust_diacritics(void)337 void page_adjust_diacritics(void)
338 {
339 int trouble = 0;
340 int direction = 0;
341
342 struct list_node_t *p, *s, *q, *pb;
343 struct box_t dia_box;
344 struct point_t dia_center;
345
346 pmesg(50, "BEGIN page_adjust_diacritics\n");
347
348 if(!page_has_diacritics) {
349 pmesg(80, "nothing to do\n");
350 pmesg(50, "END page_adjust_diacritics\n");
351 return;
352 }
353
354 for (p = list_head; p != 0; p = s) {
355 s = p->next;
356 /* p-> is a moving target */
357
358 if (!(p->b.flags & BF_DIACRITIC)) continue;
359
360 trouble = 0;
361 pb = 0;
362
363 dia_box = p->b;
364 dia_center.x = dia_box.x + dia_box.width / 2;
365 dia_center.y = dia_box.y - dia_box.height / 2;
366
367 /* Search corresponding base box. There are complex overlayed
368 * constructions we can't handle. Therefore, we try to find all
369 * possible candidates and abort when more than one is found.
370 */
371
372 /* search backwards */
373 for(q = p->prev; q != 0; q = q->prev) {
374 if (abs(dia_box.y - q->b.y) > 3*(dia_box.height + q->b.height))
375 /* too far off to find anything useful */
376 break;
377
378 if (point_in_box(&dia_center, &q->b)) {
379 /* I got you babe */
380 if(pb != 0) {
381 /* we are not alone...too bad */
382 trouble = 1;
383 break;
384 }
385 pb = q;
386 direction = -1;
387 }
388 }
389
390 if(trouble) {
391 pmesg(
392 80,
393 "page_adjust_diacritics: trouble with diacritic %#lx\n",
394 dia_box.glyph
395 );
396 continue;
397 }
398
399 /* search forward */
400 for(q = p->next; q != 0; q = q->next) {
401 if (abs(dia_box.y - q->b.y) > 3*(dia_box.height + q->b.height))
402 /* too far off to find anything useful */
403 break;
404
405 if (point_in_box(&dia_center, &q->b)) {
406 /* I got you babe */
407 if(pb != 0) {
408 /* we are not alone...too bad */
409 trouble = 1;
410 break;
411 }
412 pb = q;
413 direction = 1;
414 }
415 }
416
417 if(trouble) {
418 pmesg(
419 80,
420 "page_adjust_diacritics: trouble with diacritic %#lx\n",
421 dia_box.glyph
422 );
423 continue;
424 }
425 if(pb == 0) {
426 pmesg(
427 80,
428 "page_adjust_diacritics: no base glyph for diacritic %#lx\n",
429 dia_box.glyph
430 );
431 /* a lone diacritic. assume it just belongs where it is */
432 p->b.flags &= ~(BF_SWIMMING | BF_DIACRITIC);
433 continue;
434 }
435
436 /* Found the one and only one. Let's get closer. */
437 page_pair_accenting(pb, p, direction);
438 }
439
440 pmesg(50, "END page_adjust_diacritics\n");
441 }
442
443
page_pair_accenting(struct list_node_t * pbase,struct list_node_t * pdiacritic,int direction)444 static void page_pair_accenting(
445 struct list_node_t * pbase,
446 struct list_node_t * pdiacritic,
447 int direction
448 )
449 {
450 glyph_t dcv;
451
452 assert(pdiacritic->b.flags & BF_DIACRITIC);
453 assert(abs(direction) == 1);
454
455 pmesg(
456 80,
457 "detected accenting: base=%#lx, diacritic=%#lx\n",
458 pbase->b.glyph,
459 pdiacritic->b.glyph
460 );
461
462 dcv = diacritic_combining_variant(pdiacritic->b.glyph);
463
464 if(dcv) {
465 /* OK this one can be handeled within the unicode framework */
466
467 /* move diacritic right after base glyph */
468 if(direction == -1) {
469 /* base before diacritic */
470 while(pdiacritic->prev != pbase) {
471 swap_nodes(pdiacritic->prev, pdiacritic);
472 }
473 }
474 else {
475 /* base after diacritic */
476 while(pdiacritic->prev != pbase) {
477 swap_nodes(pdiacritic, pdiacritic->next);
478 /* note swap_nodes() is order sensitive */
479 }
480 }
481
482 /* Make the combining diacritic occupy the same space as the
483 * accented glyph would. That's the only way to statisfy all
484 * ordering and spacing assumptions in this program. And nearly
485 * consequent.
486 */
487 pdiacritic->b.width = pbase->b.width;
488 pdiacritic->b.height = max(
489 pbase->b.height,
490 pdiacritic->b.height + pbase->b.y - pdiacritic->b.y
491 );
492 pdiacritic->b.x = pbase->b.x;
493 pdiacritic->b.y = pbase->b.y;
494 pdiacritic->b.glyph = dcv;
495 pdiacritic->b.flags &= ~(BF_SWIMMING | BF_DIACRITIC);
496 }
497 else {
498 /* A strange guy. Won't combine. Just put him in line so he can't
499 * clobber up things. Like in real life.
500 */
501 pdiacritic->b.height =
502 pdiacritic->b.height + pbase->b.y - pdiacritic->b.y;
503 pdiacritic->b.y = pbase->b.y;
504 pdiacritic->b.flags &= ~(BF_SWIMMING | BF_DIACRITIC);
505
506 /* keep the list ordered */
507 if(!page_sequential) bubble_node(pdiacritic);
508 }
509
510 }
511
512
513 /************************************************************************
514 * the hairy texmext stuff
515 ************************************************************************/
516
517 typedef struct interval32_t interval32_t;
518 struct interval32_t {
519 sint32 a;
520 sint32 b;
521 };
522
interval32_contains(const interval32_t * this,sint32 x)523 static int interval32_contains(const interval32_t * this, sint32 x)
524 {
525 int res;
526
527 res = (this->a <= x) && (x <= this->b);
528
529 pmesg(
530 150,
531 "interval32_contains: %ld %s [%ld, %ld]\n",
532 x,
533 res ? "in" : "not in",
534 this->a,
535 this->b
536 );
537
538 return res;
539 }
540
541
542 typedef struct searchinfo_t searchinfo_t;
543 struct searchinfo_t {
544 interval32_t searched_y;
545 interval32_t x;
546 interval32_t y;
547 interval32_t top;
548 interval32_t axis;
549 int require_axis;
550 int test_start;
551 struct list_node_t * start;
552 struct list_node_t * first_found;
553 };
554
page_match_box(const struct searchinfo_t * quest,const struct list_node_t * candidate)555 static int page_match_box(
556 const struct searchinfo_t * quest,
557 const struct list_node_t * candidate
558 )
559 {
560 int matches;
561 const struct box_t * cb;
562
563 cb = &(candidate->b);
564
565 /* The candidate's x, y and top ( == y - height) values must match the ones
566 * we're searching for. If candidate has a known math axis, it must match.
567 * require_axis says whether we require it to have a known math axis.
568 */
569 matches =
570 interval32_contains(&(quest->x), cb->x) &&
571 interval32_contains(&(quest->y), cb->y) &&
572 interval32_contains(&(quest->top), cb->y - cb->height) &&
573 (
574 (cb->flags & BF_HAS_AXIS) ?
575 interval32_contains(&(quest->axis), cb->axis) :
576 !quest->require_axis
577 ); /* yes these parentheses _are_ neccessary */
578
579 return(matches);
580 }
581
582 /* Return values:
583 * 0 - no box matches the given criteria
584 * 1 - some boxes match the given criteria, and all of them have the same y
585 * 2 - some boxes match the given criteria, but different y values occur.
586 *
587 * If some boxes match, quest->first_found will point to guess what.
588 */
page_grb_unique_y(struct searchinfo_t * quest)589 static int page_grb_unique_y(
590 struct searchinfo_t * quest
591 )
592 {
593 interval32_t * range;
594 struct list_node_t * p;
595 struct list_node_t * start_up, * start_down;
596 sint32 y;
597 int have_y = 0;
598
599
600 range = &(quest->searched_y);
601 assert(interval32_contains(range, quest->start->b.y));
602 start_up = quest->test_start ? quest->start : quest->start->prev;
603 start_down = quest->start->next;
604
605 quest->first_found = NULL;
606
607 for(p = start_up; p != NULL; p = p->prev) {
608 if(!interval32_contains(range, p->b.y)) break;
609 if(p->b.flags & BF_SWIMMING) continue;
610 if(page_match_box(quest, p)) {
611 if(have_y) {
612 if(p->b.y != y) {
613 return 2;
614 }
615 }
616 else {
617 quest->first_found = p;
618 y = p->b.y;
619 have_y = 1;
620 }
621 }
622 }
623
624 for(p = start_down; p != NULL; p = p->next) {
625 if(!interval32_contains(range, p->b.y)) break;
626 if(p->b.flags & BF_SWIMMING) continue;
627 if(page_match_box(quest, p)) {
628 if(have_y) {
629 if(p->b.y != y) {
630 return 2;
631 }
632 }
633 else {
634 quest->first_found = p;
635 y = p->b.y;
636 have_y = 1;
637 }
638 }
639 }
640
641 return have_y;
642 }
643
644
645 enum srestrict_t {
646 SRS_MIN_VAL = -1, /* always first */
647 SRS_REQUIRE_AXIS,
648 SRS_NARROW,
649 SRS_VERY_NARROW,
650 SRS_MOREMATH_SIDE,
651 SRS_LESSMATH_SIDE,
652 SRS_MAX_VAL /* always last */
653 };
654
655 static int srestrict_conflicts[SRS_MAX_VAL] = {
656 0, /* SRS_REQUIRE_AXIS */
657 1 << SRS_VERY_NARROW, /* SRS_NARROW */
658 1 << SRS_NARROW, /* SRS_VERY_NARROW */
659 1 << SRS_MOREMATH_SIDE, /* SRS_MOREMATH_SIDE */
660 1 << SRS_LESSMATH_SIDE /* SRS_LESSMATH_SIDE */
661 };
662
663
664 /* Recurse through all subsets of the set of search restrictions.
665 * Return values:
666 * 0 - No box conforms to the given set of restrictions. Cut that branch.
667 * 1 - The given restrictions or a further refinement have lead to a unique
668 * y value.
669 * 2 - There were boxes conforming to the given set of restrictions, but
670 * the refinements I've tried haven't lead to a unique y value.
671 */
page_grb_recursion(struct searchinfo_t * quest,struct list_node_t * swimmer,enum srestrict_t try_restrict,int srestricts)672 static int page_grb_recursion(
673 struct searchinfo_t * quest,
674 struct list_node_t * swimmer,
675 enum srestrict_t try_restrict,
676 int srestricts
677 )
678 {
679 struct searchinfo_t newquest;
680 int res;
681 struct box_t * pb;
682 sint32 d;
683
684 static char indents[] = " ";
685 char * indent;
686
687 indent = indents + lengthof(indents) + try_restrict - SRS_MAX_VAL;
688
689 pmesg(130, "%sBEGIN page_grb_recursion\n", indent);
690 pmesg(150, "%spage_grb_recursion: try_restrict=%d\n", indent, try_restrict);
691
692 if(try_restrict < 0) {
693 /* The set of applicable search restrictions is already settled.
694 * Really DO something.
695 */
696 res = page_grb_unique_y(quest);
697 pmesg(
698 150,
699 "%spage_grb_recursion: srestricts=%#4x, unique_y result %d\n",
700 indent,
701 srestricts,
702 res
703 );
704 pmesg(130, "%sEND page_grb_recursion\n", indent);
705 return res;
706 }
707
708 /* Try without imposing a new restriction first */
709 res = page_grb_recursion(quest, swimmer, try_restrict - 1, srestricts);
710 if(res <= 1) {
711 pmesg(130, "%sEND page_grb_recursion\n", indent);
712 return res;
713 }
714
715 /* This hasn't been restrictive enough, try adding "our" restriction. */
716
717 if(srestrict_conflicts[try_restrict] & srestricts) {
718 /* We can't add "our" restriction - conflict. But our caller could
719 * try others.
720 */
721 pmesg(150, "%spage_grb_recursion: tried conflicting restrictions\n", indent);
722 pmesg(130, "%sEND page_grb_recursion\n", indent);
723 return(2);
724 }
725
726 pb = &(swimmer->b);
727 d = ((pb->height + pb->depth) + pb->width) / 20;
728 /* (arithmetic mean of total height and width of swimmer)/10 */
729 newquest = *quest;
730 switch(try_restrict) {
731 case SRS_REQUIRE_AXIS:
732 newquest.require_axis = 1;
733 break;
734 case SRS_NARROW:
735 newquest.x.a = max(newquest.x.a, pb->x - 30*d);
736 newquest.x.b = min(newquest.x.b, pb->x + 40*d);
737 /* 10 for the swimmer itself */
738 break;
739 case SRS_MOREMATH_SIDE:
740 if(glyph_get_hint(pb->glyph) & GH_MOREMATH_LEFT) {
741 newquest.x.b = pb->x + pb->width;
742 }
743 else {
744 newquest.x.a = pb->x;
745 }
746 break;
747 case SRS_LESSMATH_SIDE:
748 if(glyph_get_hint(pb->glyph) & GH_MOREMATH_LEFT) {
749 newquest.x.a = pb->x;
750 }
751 else {
752 newquest.x.b = pb->x + pb->width;
753 }
754 break;
755 case SRS_VERY_NARROW:
756 newquest.x.a = max(newquest.x.a, pb->x - 15*d);
757 newquest.x.b = min(newquest.x.b, pb->x + 25*d);
758 /* 10 for the swimmer itself */
759 break;
760 default:
761 NOTREACHED;
762 }
763
764 res = page_grb_recursion(
765 &newquest,
766 swimmer,
767 try_restrict - 1,
768 srestricts | (1 << try_restrict)
769 );
770
771 quest->first_found = newquest.first_found;
772 pmesg(130, "%sEND page_grb_recursion\n", indent);
773 if(res == 1) return 1;
774 else return 2;
775
776 }
777
778
779 /* The fonts in "TeX math extension" encoding are somewhat different from
780 * the others. The glyphs are centered on the "math axis" (about the height
781 * of a minus sign above the baseline of the surrounding text/formula). The
782 * y coordinate of the glyphs reference point is mostly meaningless (and in
783 * Knuths cmex fonts the glyphs are "hanging down" from the reference
784 * point). We know the math axis: its the arithmethic mean of (y - height)
785 * and (y + depth). But we have to move the glyph to the _baseline_ for
786 * proper text formatting, hence we need to deduce the baseline somehow.
787 *
788 * TeX gets the height of the math axis above the baseline from a parameter
789 * named axis_height in the font metrics of the currently used math symbol
790 * font. This parameter is only present in TFM files for fonts with "TeX math
791 * symbols" encoding and it is not easy to be sure which axis height was
792 * in effect when the math extension glyph was set, given only the DVI file.
793 *
794 * Therefore, we try another approach first: we look at the surrounding
795 * boxes. If every box in some reasonable neighbourhood that is dissected
796 * by the math axis has the same baseline, this must be the baseline of
797 * the formula. Only if this approach fails we resort to guessing
798 * axis_height by looking at the loaded math symbol fonts.
799 */
page_guess_reference_box(struct list_node_t * swimmer)800 static struct list_node_t * page_guess_reference_box(
801 struct list_node_t * swimmer
802 )
803 {
804 struct searchinfo_t quest;
805 struct box_t * pb;
806
807 pmesg(50, "BEGIN page_guess_reference_box\n");
808 pmesg(
809 80,
810 "page_guess_reference_box: y=%ld, axis=%ld\n",
811 swimmer->b.y,
812 swimmer->b.axis
813 );
814
815 pb = &(swimmer->b);
816
817 assert(pb->flags & BF_SWIMMING);
818 assert(pb->flags & BF_HAS_AXIS);
819
820 quest.x.a = SINT32_MIN;
821 quest.x.b = SINT32_MAX;
822 quest.y.a = pb->axis;
823 quest.y.b = pb->y + pb->depth;
824 quest.top.a = SINT32_MIN;
825 quest.top.b = pb->axis;
826 quest.axis.a = pb->axis - 2;
827 quest.axis.b = pb->axis + 2; /* allow small rounding errors */
828 quest.require_axis = 0;
829 quest.test_start = 0;
830 quest.start = swimmer;
831
832
833 if(page_sequential) {
834 sint32 h;
835
836 h = pb->height + pb->depth;
837 quest.searched_y.a = pb->axis - 3*h;
838 quest.searched_y.b = pb->axis + 3*h;
839 }
840 else {
841 quest.searched_y.a = min(pb->axis, pb->y);
842 quest.searched_y.b = pb->y + pb->depth;
843 }
844
845 if(page_grb_recursion(&quest, swimmer, SRS_MAX_VAL - 1, 0) == 1) {
846 pmesg(50, "END page_guess_reference_box\n");
847 return quest.first_found;
848 }
849 else {
850 pmesg(50, "END page_guess_reference_box\n");
851 return NULL;
852 }
853
854 }
855
856
page_adjust_texmext(void)857 void page_adjust_texmext(void)
858 {
859 struct list_node_t * p, * s, * ref;
860 sint32 new_y, delta;
861
862 pmesg(50, "BEGIN page_adjust_texmext\n");
863
864 if(!page_has_texmext) {
865 pmesg(80, "nothing to do\n");
866 pmesg(50, "END page_adjust_texmext\n");
867 return;
868 }
869
870 for (p = list_head; p != 0; p = s) {
871 s = p->next;
872 /* p-> is a moving target */
873
874 if (!(p->b.flags & BF_ON_AXIS)) continue;
875 assert(p->b.flags & BF_HAS_AXIS);
876 assert(p->b.flags & BF_SWIMMING);
877
878 pmesg(
879 80,
880 "page_adjust_texmext: glyph=%#6lx, y=%ld\n",
881 p->b.glyph,
882 p->b.y
883 );
884
885 if(p->b.axis_height > 0) {
886 ref = NULL;
887 pmesg(
888 80,
889 "page_adjust_texmext: known axis_height=%ld\n",
890 p->b.axis_height
891 );
892 }
893 else {
894 ref = page_guess_reference_box(p);
895 pmesg(
896 80,
897 "page_adjust_texmext:%s reference box found\n",
898 ref ? "" : " no"
899 );
900 }
901
902 if(ref != NULL) new_y = ref->b.y;
903 else new_y = p->b.axis + abs(p->b.axis_height);
904 pmesg(80, "page_adjust_texmext: new_y=%ld\n", new_y);
905
906 delta = new_y - p->b.y;
907 p->b.y = new_y;
908 p->b.height += delta;
909 p->b.depth -= delta;
910 p->b.axis_height = p->b.y - p->b.axis;
911 p->b.flags &= ~(BF_SWIMMING | BF_ON_AXIS);
912
913 if(!page_sequential) bubble_node(p);
914
915 }
916
917 pmesg(50, "END page_adjust_texmext\n");
918 }
919
920
page_find_lowest_box(struct searchinfo_t * quest)921 static struct list_node_t * page_find_lowest_box(struct searchinfo_t * quest)
922 {
923 interval32_t * range;
924 struct list_node_t * p, * low;
925 struct list_node_t * start_up, * start_down;
926
927 range = &(quest->searched_y);
928 assert(interval32_contains(range, quest->start->b.y));
929 start_up = quest->test_start ? quest->start : quest->start->prev;
930 start_down = quest->start->next;
931
932 quest->first_found = NULL;
933 low = NULL;
934
935 for(p = start_up; p != NULL; p = p->prev) {
936 if(!interval32_contains(range, p->b.y)) break;
937 if(p->b.flags & BF_SWIMMING) continue;
938 if(page_match_box(quest, p)) {
939 if(low == NULL || p->b.y > low->b.y) low = p;
940 }
941 }
942
943 for(p = start_down; p != NULL; p = p->next) {
944 if(!interval32_contains(range, p->b.y)) break;
945 if(p->b.flags & BF_SWIMMING) continue;
946 if(page_match_box(quest, p)) {
947 if(low == NULL || p->b.y > low->b.y) low = p;
948 }
949 }
950
951 quest->first_found = low;
952 return low;
953 }
954
955
956 /* TeX's radical signs are hanging down from their reference point. We move
957 * them to the baseline of the lowest glyph in the radicant (that's where
958 * the bottom tip ought to be) or, if there's no radicant, to the y position
959 * of the bottom tip.
960 */
page_adjust_radicals(void)961 void page_adjust_radicals(void)
962 {
963 struct list_node_t * p, * s, * lowest;
964 sint32 new_y, delta;
965 struct searchinfo_t quest;
966
967 pmesg(50, "BEGIN page_adjust_radicals\n");
968
969 if(!page_has_radicals) {
970 pmesg(80, "nothing to do\n");
971 pmesg(50, "END page_adjust_radicals\n");
972 return;
973 }
974
975 for (p = list_head; p != 0; p = s) {
976 s = p->next;
977 /* p-> is a moving target */
978
979 if (!(p->b.flags & BF_RADICAL)) continue;
980 assert(p->b.flags & BF_SWIMMING);
981
982 pmesg(
983 80,
984 "page_adjust_radicals: glyph=%#6lx, y=%ld\n",
985 p->b.glyph,
986 p->b.y
987 );
988
989 /* put the radical on the baseline of the lowest box in the radicant */
990 quest.x.a = p->b.x;
991 quest.x.b = SINT32_MAX;
992 /* FIXME: Once we have rudimentary rules support, we can search
993 * for the rule forming the radicals top and take its length as
994 * the width of the radicant.
995 */
996 quest.y.a = p->b.y - p->b.height;
997 quest.y.b = p->b.y + p->b.depth;
998 quest.top = quest.y;
999 quest.axis.a = SINT32_MIN;
1000 quest.axis.b = SINT32_MAX;
1001 quest.require_axis = 0;
1002 quest.test_start = 0;
1003 quest.start = p;
1004
1005 if(page_sequential) {
1006 sint32 h;
1007
1008 h = p->b.height + p->b.depth;
1009 quest.searched_y.a = p->b.y - h;
1010 quest.searched_y.b = p->b.y + 2*h;
1011 }
1012 else {
1013 quest.searched_y = quest.y;
1014 }
1015
1016 lowest = page_find_lowest_box(&quest);
1017 pmesg(
1018 80,
1019 "page_adjust_radicals:%s lowest box found\n",
1020 lowest ? "" : " no"
1021 );
1022
1023 if(lowest != NULL) new_y = lowest->b.y;
1024 else new_y = p->b.y + p->b.depth;
1025 pmesg(80, "page_adjust_radicals: new_y=%ld\n", new_y);
1026
1027 delta = new_y - p->b.y;
1028 p->b.y = new_y;
1029 p->b.height += delta;
1030 p->b.depth -= delta;
1031 p->b.flags &= ~(BF_SWIMMING | BF_RADICAL);
1032
1033 if(!page_sequential) bubble_node(p);
1034
1035 }
1036
1037 pmesg(50, "END page_adjust_radicals\n");
1038 }
1039