1 /* catdvi - get text from DVI files
2    Copyright (C) 1999 Antti-Juhani Kaijanaho <gaia@iki.fi>
3    Copyright (C) 2000-02 Bjoern Brill <brill@fs.math.uni-frankfurt.de>
4 
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 2 of the License, or
8    (at your option) any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program; if not, write to the Free Software
17    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19 
20 #include <assert.h>
21 #include <stdlib.h>
22 #include "page2.h"
23 #include "util.h"
24 #include "glyphops.h"
25 #include "layout.h"
26 
27 /* Return value like with strcmp and friends. */
compare_boxes(struct box_t a,struct box_t b)28 static int compare_boxes(struct box_t a, struct box_t b)
29 {
30         if (a.y > b.y) return 1;
31         if (a.y < b.y) return -1;
32         assert(a.y == b.y);
33         if (a.x > b.x) return 1;
34         if (a.x < b.x) return -1;
35         assert(a.x == b.x);
36         return 0;
37 }
38 
39 struct point_t {
40     sint32 x;
41     sint32 y;
42 };
43 
point_in_box(const struct point_t * point,const struct box_t * box)44 static int point_in_box(const struct point_t * point, const struct box_t * box)
45 {
46     assert(box);
47     assert(point);
48     return(
49     	box->x      	    	<= point->x &&
50 	(box->x + box->width) 	>= point->x &&
51 	(box->y - box->height) 	<= point->y &&
52 	box->y      	    	>= point->y
53     );
54 }
55 
56 /* Set to 1 when catdvi is invoked with the --sequential option */
57 int page_sequential = 0;
58 
59 /* Set to 1 when catdvi is invoked with the --list-page-numbers option */
60 int page_list_numbers = 0;
61 
62 /* List support variables.  Note that most insertions happen near the
63    previous insertion, so it's beneficial to keep a pointer to the
64    last inserted node. */
65 struct list_node_t * list_head;
66 struct list_node_t * list_tail;
67 struct list_node_t * list_latest; /* Node that was inserted last. */
68 
69 /* flags to avoid the page_adjust_*() loops over the whole page when
70  * there's nothing to adjust.
71  */
72 static int page_has_diacritics;
73 static int page_has_texmext;
74 static int page_has_radicals;
75 
swap_nodes(struct list_node_t * one,struct list_node_t * other)76 static void swap_nodes(struct list_node_t * one, struct list_node_t * other)
77 {
78         assert(one != 0);
79         assert(other != 0);
80         assert(one->next == other);
81         assert(one == other->prev);
82 
83         one->next = other->next;
84         other->prev = one->prev;
85         other->next = one;
86         one->prev = other;
87         if (one->next == 0) {
88                 list_tail = one;
89         } else {
90                 one->next->prev = one;
91         }
92         if (other->prev == 0) {
93                 list_head = other;
94         } else {
95                 other->prev->next = other;
96         }
97 
98         assert(other->next == one);
99         assert(other == one->prev);
100 }
101 
102 /* move list_latest into its correct position in the list */
bubble(void)103 static void bubble(void)
104 {
105         while (1) {
106                 assert(list_latest != 0);
107 
108                 /* If it is in correct position now, end here. */
109                 if ((list_latest->prev == 0
110                      || compare_boxes(list_latest->prev->b, list_latest->b) <= 0)
111                     && (list_latest->next == 0
112                         || compare_boxes(list_latest->b, list_latest->next->b) <= 0)) {
113                         return;
114                 }
115 
116                 /* If it is too left, we must bubble it one position to the right. */
117                 if (list_latest->next != 0
118                     && compare_boxes(list_latest->b, list_latest->next->b) > 0) {
119                         swap_nodes(list_latest, list_latest->next);
120                         continue;
121                 }
122 
123                 /* If it is too right, we must bubble it one position to the left. */
124                 if (list_latest->prev != 0
125                     && compare_boxes(list_latest->prev->b, list_latest->b) > 0) {
126                         assert(list_latest->prev->next == list_latest);
127                         swap_nodes(list_latest->prev, list_latest);
128                         continue;
129                 }
130                 NOTREACHED;
131         }
132         NOTREACHED;
133 }
134 
135 
136 /* move some random node into its correct position in the list */
bubble_node(struct list_node_t * p)137 static void bubble_node(struct list_node_t * p)
138 {
139     list_latest = p;
140     bubble();
141 }
142 
143 
insert_list(struct box_t box)144 static void insert_list(struct box_t box)
145 {
146         struct list_node_t * new;
147 
148         new = malloc(sizeof(struct list_node_t));
149         if (new == 0) enomem();
150         new->b = box;
151 
152         if (list_head == 0 || list_tail == 0 || list_latest == 0) {
153                 assert(list_head == 0);
154                 assert(list_tail == 0);
155                 assert(list_latest == 0);
156 
157                 new->next = new->prev = 0;
158 
159                 list_head = list_tail = list_latest = new;
160                 return;
161         }
162 
163         /* The list is nonempty. */
164 
165         assert(list_head != 0);
166         assert(list_tail != 0);
167         assert(list_latest != 0);
168 
169         /* The insertion algorithm is this: insert the node after the
170            latest insertion, and then bubble it into its correct
171            position.  */
172 
173         new->next = list_latest->next;
174         list_latest->next = new;
175         new->prev = list_latest;
176         list_latest = new;
177         if (new->next == 0) {
178                 list_tail = new;
179         } else {
180                 new->next->prev = new;
181         }
182 
183         assert(list_latest->prev->next == list_latest);
184         assert(list_latest->next == 0 || list_latest->next->prev == list_latest);
185 
186     	/* sort by position except in sequential mode */
187         if (!page_sequential) bubble();
188 }
189 
190 
191 /* These two are set by command line options */
192 struct pageref_t page_start_output = {0, 0, 0, PRF_PHYSICAL};
193 struct pageref_t page_last_output = {SINT32_MAX, 0, 0, PRF_PHYSICAL};
194 
195 static struct pageref_t current_page = {0, 0, 0, PRF_INVALID};
196 
197 
page_begin(sint32 count0)198 void page_begin(sint32 count0)
199 {
200         /* If necessary, delete the previous page's data structure. */
201         if (list_head != 0 || list_tail != 0 || list_latest != 0) {
202                 struct list_node_t * p;
203 
204                 assert(list_head != 0);
205                 assert(list_tail != 0);
206                 assert(list_latest != 0);
207 
208                 p = list_head;
209                 while (p != 0) {
210                         struct list_node_t * next;
211 
212                         if (p == list_head)   list_head = 0;
213                         if (p == list_latest) list_latest = 0;
214                         if (p == list_tail)   list_tail = 0;
215 
216                         next = p->next;
217                         free(p);
218                         p = next;
219                 }
220         }
221         assert(list_head == 0);
222         assert(list_tail == 0);
223         assert(list_latest == 0);
224 
225     	/* reset per page flags */
226 	page_has_diacritics = 0;
227 	page_has_texmext = 0;
228 	page_has_radicals = 0;
229 
230 	/* keep track of page numbering */
231 	if(current_page.flavour == PRF_INVALID) {
232 	    /* here comes the very first page of the document */
233 	    current_page.physical = 1;
234 	    current_page.count0 = count0;
235 	    if (count0 < 0) current_page.chapter = 1;
236 	    else current_page.chapter = 0;
237 	    	/* number chapters from 0 to accomodate frontmatter */
238 	    current_page.flavour = PRF_COMPLETE;
239 	}
240 	else {
241 	    /* just another ordinary page */
242     	    current_page.physical += 1;
243 	    if (pageref_count0_cmp(current_page.count0, count0) >= 0) {
244 		/* count0 has not increased, so start new chapter */
245 		current_page.chapter += 1;
246 	    }
247 	    current_page.count0 = count0;
248 	}
249 }
250 
251 
page_set_glyph(font_t font,glyph_t glyph,sint32 width,sint32 height,sint32 depth,sint32 axis_height,sint32 x,sint32 y)252 void page_set_glyph(
253     font_t font, glyph_t glyph,
254     sint32 width, sint32 height, sint32 depth, sint32 axis_height,
255     sint32 x, sint32 y
256 )
257 {
258     struct box_t b;
259     enum glyph_hint_t hint;
260 
261     b.x = x;
262     b.y = y;
263     b.width = width;
264     b.height = height;
265     b.depth = depth;
266     b.glyph = glyph;
267     b.font = font;
268     b.axis_height = axis_height;
269     b.flags = 0;
270     b.axis = 0;
271 
272     hint = glyph_get_hint(glyph);
273 
274     if(hint & GH_DIACRITIC) {
275 	b.flags |= BF_SWIMMING | BF_DIACRITIC ;
276 	page_has_diacritics = 1;
277     }
278 
279     if(hint & GH_EXTENSIBLE_RECIPE) {
280 	b.flags |= BF_SWIMMING;
281 	page_has_texmext = 1;
282     }
283     else if(hint & GH_ON_AXIS) {
284 	b.flags |= BF_SWIMMING | BF_ON_AXIS | BF_HAS_AXIS ;
285 	b.axis = y + (-height + depth)/2;
286 	page_has_texmext = 1;
287     }
288     else if(hint & GH_RADICAL) {
289 	b.flags |= BF_SWIMMING | BF_RADICAL ;
290 	page_has_radicals = 1;
291     }
292     else if(axis_height > 0) {
293 	b.flags |= BF_HAS_AXIS;
294 	b.axis = y - axis_height;
295     }
296 
297     insert_list(b);
298 }
299 
300 
301 static void page_pair_accenting(
302     struct list_node_t * pbase,
303     struct list_node_t * pdiacritic,
304     int direction
305 );
306 
page_end(void)307 void page_end(void)
308 {
309     pmesg(50, "BEGIN page_end\n");
310 
311     if (
312 	pageref_cmp(&page_start_output, &current_page) > 0 ||
313 	pageref_cmp(&page_last_output, &current_page) < 0
314     ) {
315 	if(msglevel >= 80) {
316 	    pmesg(80, "skipping page by user request:\n  ");
317 	    pageref_print(&current_page, stderr);
318 	}
319 	pmesg(50, "END page_end\n");
320 	return;
321     }
322 
323     if(page_list_numbers) {
324 	pageref_print(&current_page, stdout);
325 	pmesg(50, "END page_end\n");
326 	return;
327     }
328 
329     if(page_sequential) page_print_sequential();
330     else page_print_formatted();
331 
332     pmesg(50, "END page_end\n");
333     return;
334 
335 }
336 
page_adjust_diacritics(void)337 void page_adjust_diacritics(void)
338 {
339     int trouble = 0;
340     int direction = 0;
341 
342     struct list_node_t *p, *s, *q, *pb;
343     struct box_t dia_box;
344     struct point_t dia_center;
345 
346     pmesg(50, "BEGIN page_adjust_diacritics\n");
347 
348     if(!page_has_diacritics) {
349     	pmesg(80, "nothing to do\n");
350     	pmesg(50, "END page_adjust_diacritics\n");
351 	return;
352     }
353 
354     for (p = list_head; p != 0; p = s) {
355     	s = p->next;
356 	    /* p-> is a moving target */
357 
358 	if (!(p->b.flags & BF_DIACRITIC)) continue;
359 
360 	trouble = 0;
361 	pb = 0;
362 
363     	dia_box = p->b;
364 	dia_center.x = dia_box.x + dia_box.width / 2;
365 	dia_center.y = dia_box.y - dia_box.height / 2;
366 
367 	/* Search corresponding base box. There are complex overlayed
368 	 * constructions we can't handle. Therefore, we try to find all
369 	 * possible candidates and abort when more than one is found.
370 	 */
371 
372 	/* search backwards */
373 	for(q = p->prev; q != 0; q = q->prev) {
374 	    if (abs(dia_box.y - q->b.y) > 3*(dia_box.height + q->b.height))
375 	    	/* too far off to find anything useful */
376 	    	break;
377 
378 	    if (point_in_box(&dia_center, &q->b)) {
379 	    	/* I got you babe */
380     	    	if(pb != 0) {
381 		    /* we are not alone...too bad */
382 		    trouble = 1;
383 		    break;
384 		}
385 		pb = q;
386 		direction = -1;
387 	    }
388 	}
389 
390 	if(trouble) {
391 	    pmesg(
392 	    	80,
393 		"page_adjust_diacritics: trouble with diacritic %#lx\n",
394 		dia_box.glyph
395 	    );
396 	    continue;
397 	}
398 
399 	/* search forward */
400 	for(q = p->next; q != 0; q = q->next) {
401 	    if (abs(dia_box.y - q->b.y) > 3*(dia_box.height + q->b.height))
402 	    	/* too far off to find anything useful */
403 	    	break;
404 
405 	    if (point_in_box(&dia_center, &q->b)) {
406 	    	/* I got you babe */
407     	    	if(pb != 0) {
408 		    /* we are not alone...too bad */
409 		    trouble = 1;
410 		    break;
411 		}
412 		pb = q;
413 		direction = 1;
414 	    }
415 	}
416 
417 	if(trouble) {
418 	    pmesg(
419 	    	80,
420 		"page_adjust_diacritics: trouble with diacritic %#lx\n",
421 		dia_box.glyph
422 	    );
423 	    continue;
424 	}
425 	if(pb == 0) {
426 	    pmesg(
427 	    	80,
428 		"page_adjust_diacritics: no base glyph for diacritic %#lx\n",
429 		dia_box.glyph
430 	    );
431 	    /* a lone diacritic. assume it just belongs where it is */
432 	    p->b.flags &= ~(BF_SWIMMING | BF_DIACRITIC);
433 	    continue;
434 	}
435 
436 	/* Found the one and only one. Let's get closer. */
437 	page_pair_accenting(pb, p, direction);
438     }
439 
440     pmesg(50, "END page_adjust_diacritics\n");
441 }
442 
443 
page_pair_accenting(struct list_node_t * pbase,struct list_node_t * pdiacritic,int direction)444 static void page_pair_accenting(
445     struct list_node_t * pbase,
446     struct list_node_t * pdiacritic,
447     int direction
448 )
449 {
450     glyph_t dcv;
451 
452     assert(pdiacritic->b.flags & BF_DIACRITIC);
453     assert(abs(direction) == 1);
454 
455     pmesg(
456 	80,
457 	"detected accenting: base=%#lx, diacritic=%#lx\n",
458 	pbase->b.glyph,
459 	pdiacritic->b.glyph
460     );
461 
462     dcv = diacritic_combining_variant(pdiacritic->b.glyph);
463 
464     if(dcv) {
465     	/* OK this one can be handeled within the unicode framework */
466 
467 	/* move diacritic right after base glyph */
468 	if(direction == -1) {
469 	    /* base before diacritic */
470 	    while(pdiacritic->prev != pbase) {
471 		swap_nodes(pdiacritic->prev, pdiacritic);
472 	    }
473 	}
474 	else {
475 	    /* base after diacritic */
476 	    while(pdiacritic->prev != pbase) {
477 		swap_nodes(pdiacritic, pdiacritic->next);
478 		    /* note swap_nodes() is order sensitive */
479 	    }
480 	}
481 
482     	/* Make the combining diacritic occupy the same space as the
483 	 * accented glyph would. That's the only way to statisfy all
484 	 * ordering and spacing assumptions in this program. And nearly
485 	 * consequent.
486 	 */
487 	pdiacritic->b.width = pbase->b.width;
488 	pdiacritic->b.height = max(
489 	    pbase->b.height,
490 	    pdiacritic->b.height + pbase->b.y - pdiacritic->b.y
491 	);
492 	pdiacritic->b.x = pbase->b.x;
493 	pdiacritic->b.y = pbase->b.y;
494 	pdiacritic->b.glyph = dcv;
495 	pdiacritic->b.flags &= ~(BF_SWIMMING | BF_DIACRITIC);
496     }
497     else {
498     	/* A strange guy. Won't combine. Just put him in line so he can't
499 	 * clobber up things. Like in real life.
500 	 */
501 	pdiacritic->b.height =
502 	    pdiacritic->b.height + pbase->b.y - pdiacritic->b.y;
503 	pdiacritic->b.y = pbase->b.y;
504 	pdiacritic->b.flags &= ~(BF_SWIMMING | BF_DIACRITIC);
505 
506 	/* keep the list ordered */
507 	if(!page_sequential) bubble_node(pdiacritic);
508     }
509 
510 }
511 
512 
513 /************************************************************************
514  * the hairy texmext stuff
515  ************************************************************************/
516 
517 typedef struct interval32_t interval32_t;
518 struct interval32_t {
519     sint32 a;
520     sint32 b;
521 };
522 
interval32_contains(const interval32_t * this,sint32 x)523 static int interval32_contains(const interval32_t * this, sint32 x)
524 {
525     int res;
526 
527     res = (this->a <= x) && (x <= this->b);
528 
529     pmesg(
530     	150,
531 	"interval32_contains: %ld %s [%ld, %ld]\n",
532 	x,
533 	res ? "in" : "not in",
534 	this->a,
535 	this->b
536     );
537 
538     return res;
539 }
540 
541 
542 typedef struct searchinfo_t searchinfo_t;
543 struct searchinfo_t {
544     interval32_t searched_y;
545     interval32_t x;
546     interval32_t y;
547     interval32_t top;
548     interval32_t axis;
549     int require_axis;
550     int test_start;
551     struct list_node_t * start;
552     struct list_node_t * first_found;
553 };
554 
page_match_box(const struct searchinfo_t * quest,const struct list_node_t * candidate)555 static int page_match_box(
556     const struct searchinfo_t * quest,
557     const struct list_node_t * candidate
558 )
559 {
560     int matches;
561     const struct box_t * cb;
562 
563     cb = &(candidate->b);
564 
565     /* The candidate's x, y and top ( == y - height) values must match the ones
566      * we're searching for. If candidate has a known math axis, it must match.
567      * require_axis says whether we require it to have a known math axis.
568      */
569     matches =
570     	interval32_contains(&(quest->x), cb->x) &&
571     	interval32_contains(&(quest->y), cb->y) &&
572 	interval32_contains(&(quest->top), cb->y - cb->height) &&
573     	(
574 	    (cb->flags & BF_HAS_AXIS) ?
575 	    	interval32_contains(&(quest->axis), cb->axis) :
576 	    	!quest->require_axis
577 	);  /* yes these parentheses _are_ neccessary */
578 
579     return(matches);
580 }
581 
582 /* Return values:
583  * 0 - no box matches the given criteria
584  * 1 - some boxes match the given criteria, and all of them have the same y
585  * 2 - some boxes match the given criteria, but different y values occur.
586  *
587  * If some boxes match, quest->first_found will point to guess what.
588  */
page_grb_unique_y(struct searchinfo_t * quest)589 static int page_grb_unique_y(
590     struct searchinfo_t * quest
591 )
592 {
593     interval32_t * range;
594     struct list_node_t * p;
595     struct list_node_t * start_up, * start_down;
596     sint32 y;
597     int have_y = 0;
598 
599 
600     range = &(quest->searched_y);
601     assert(interval32_contains(range, quest->start->b.y));
602     start_up = quest->test_start ? quest->start : quest->start->prev;
603     start_down = quest->start->next;
604 
605     quest->first_found = NULL;
606 
607     for(p = start_up; p != NULL; p = p->prev) {
608     	if(!interval32_contains(range, p->b.y)) break;
609 	if(p->b.flags & BF_SWIMMING) continue;
610 	if(page_match_box(quest, p)) {
611 	    if(have_y) {
612 	    	if(p->b.y != y) {
613 		    return 2;
614 		}
615 	    }
616 	    else {
617 	    	quest->first_found = p;
618 		y = p->b.y;
619 		have_y = 1;
620 	    }
621 	}
622     }
623 
624     for(p = start_down; p != NULL; p = p->next) {
625     	if(!interval32_contains(range, p->b.y)) break;
626 	if(p->b.flags & BF_SWIMMING) continue;
627 	if(page_match_box(quest, p)) {
628 	    if(have_y) {
629 	    	if(p->b.y != y) {
630 		    return 2;
631 		}
632 	    }
633 	    else {
634 	    	quest->first_found = p;
635 		y = p->b.y;
636 		have_y = 1;
637 	    }
638 	}
639     }
640 
641     return have_y;
642 }
643 
644 
645 enum srestrict_t {
646     SRS_MIN_VAL = -1,	/* always first */
647     SRS_REQUIRE_AXIS,
648     SRS_NARROW,
649     SRS_VERY_NARROW,
650     SRS_MOREMATH_SIDE,
651     SRS_LESSMATH_SIDE,
652     SRS_MAX_VAL     	/* always last */
653 };
654 
655 static int srestrict_conflicts[SRS_MAX_VAL] = {
656     0,      	    	    /* SRS_REQUIRE_AXIS */
657     1 << SRS_VERY_NARROW,   /* SRS_NARROW */
658     1 << SRS_NARROW,	    /* SRS_VERY_NARROW */
659     1 << SRS_MOREMATH_SIDE, /* SRS_MOREMATH_SIDE */
660     1 << SRS_LESSMATH_SIDE  /* SRS_LESSMATH_SIDE */
661 };
662 
663 
664 /* Recurse through all subsets of the set of search restrictions.
665  * Return values:
666  * 0 - No box conforms to the given set of restrictions. Cut that branch.
667  * 1 - The given restrictions or a further refinement have lead to a unique
668  *     y value.
669  * 2 - There were boxes conforming to the given set of restrictions, but
670  *     the refinements I've tried haven't lead to a unique y value.
671  */
page_grb_recursion(struct searchinfo_t * quest,struct list_node_t * swimmer,enum srestrict_t try_restrict,int srestricts)672 static int page_grb_recursion(
673     struct searchinfo_t * quest,
674     struct list_node_t * swimmer,
675     enum srestrict_t try_restrict,
676     int srestricts
677 )
678 {
679     struct searchinfo_t newquest;
680     int res;
681     struct box_t * pb;
682     sint32 d;
683 
684     static char indents[] = "                 ";
685     char * indent;
686 
687     indent = indents + lengthof(indents) + try_restrict - SRS_MAX_VAL;
688 
689     pmesg(130, "%sBEGIN page_grb_recursion\n", indent);
690     pmesg(150, "%spage_grb_recursion: try_restrict=%d\n", indent, try_restrict);
691 
692     if(try_restrict < 0) {
693     	/* The set of applicable search restrictions is already settled.
694 	 * Really DO something.
695 	 */
696 	res = page_grb_unique_y(quest);
697         pmesg(
698 	    150,
699 	    "%spage_grb_recursion: srestricts=%#4x, unique_y result %d\n",
700 	    indent,
701 	    srestricts,
702 	    res
703 	);
704     	pmesg(130, "%sEND page_grb_recursion\n", indent);
705 	return res;
706     }
707 
708     /* Try without imposing a new restriction first */
709     res = page_grb_recursion(quest, swimmer, try_restrict - 1, srestricts);
710     if(res <= 1) {
711     	pmesg(130, "%sEND page_grb_recursion\n", indent);
712     	return res;
713     }
714 
715     /* This hasn't been restrictive enough, try adding "our" restriction. */
716 
717     if(srestrict_conflicts[try_restrict] & srestricts) {
718     	/* We can't add "our" restriction - conflict. But our caller could
719 	 * try others.
720 	 */
721     	pmesg(150, "%spage_grb_recursion: tried conflicting restrictions\n", indent);
722     	pmesg(130, "%sEND page_grb_recursion\n", indent);
723 	return(2);
724     }
725 
726     pb = &(swimmer->b);
727     d = ((pb->height + pb->depth) + pb->width) / 20;
728     	/* (arithmetic mean of total height and width of swimmer)/10 */
729     newquest = *quest;
730     switch(try_restrict) {
731     	case SRS_REQUIRE_AXIS:
732 	    newquest.require_axis = 1;
733 	    break;
734 	case SRS_NARROW:
735 	    newquest.x.a = max(newquest.x.a, pb->x - 30*d);
736 	    newquest.x.b = min(newquest.x.b, pb->x + 40*d);
737 	    	/* 10 for the swimmer itself */
738 	    break;
739 	case SRS_MOREMATH_SIDE:
740 	    if(glyph_get_hint(pb->glyph) & GH_MOREMATH_LEFT) {
741 	    	newquest.x.b = pb->x + pb->width;
742 	    }
743 	    else {
744 	    	newquest.x.a = pb->x;
745 	    }
746 	    break;
747 	case SRS_LESSMATH_SIDE:
748 	    if(glyph_get_hint(pb->glyph) & GH_MOREMATH_LEFT) {
749 	    	newquest.x.a = pb->x;
750 	    }
751 	    else {
752 	    	newquest.x.b = pb->x + pb->width;
753 	    }
754 	    break;
755 	case SRS_VERY_NARROW:
756 	    newquest.x.a = max(newquest.x.a, pb->x - 15*d);
757 	    newquest.x.b = min(newquest.x.b, pb->x + 25*d);
758 	    	/* 10 for the swimmer itself */
759 	    break;
760 	default:
761 	    NOTREACHED;
762     }
763 
764     res = page_grb_recursion(
765     	&newquest,
766 	swimmer,
767 	try_restrict - 1,
768 	srestricts | (1 << try_restrict)
769     );
770 
771     quest->first_found = newquest.first_found;
772     pmesg(130, "%sEND page_grb_recursion\n", indent);
773     if(res == 1) return 1;
774     else return 2;
775 
776 }
777 
778 
779 /* The fonts in "TeX math extension" encoding are somewhat different from
780  * the others. The glyphs are centered on the "math axis" (about the height
781  * of a minus sign above the baseline of the surrounding text/formula). The
782  * y coordinate of the glyphs reference point is mostly meaningless (and in
783  * Knuths cmex fonts the glyphs are "hanging down" from the reference
784  * point). We know the math axis: its the arithmethic mean of (y - height)
785  * and (y + depth). But we have to move the glyph to the _baseline_ for
786  * proper text formatting, hence we need to deduce the baseline somehow.
787  *
788  * TeX gets the height of the math axis above the baseline from a parameter
789  * named axis_height in the font metrics of the currently used math symbol
790  * font. This parameter is only present in TFM files for fonts with "TeX math
791  * symbols" encoding and it is not easy to be sure which axis height was
792  * in effect when the math extension glyph was set, given only the DVI file.
793  *
794  * Therefore, we try another approach first: we look at the surrounding
795  * boxes. If every box in some reasonable neighbourhood that is dissected
796  * by the math axis has the same baseline, this must be the baseline of
797  * the formula. Only if this approach fails we resort to guessing
798  * axis_height by looking at the loaded math symbol fonts.
799  */
page_guess_reference_box(struct list_node_t * swimmer)800 static struct list_node_t * page_guess_reference_box(
801     struct list_node_t * swimmer
802 )
803 {
804     struct searchinfo_t quest;
805     struct box_t * pb;
806 
807     pmesg(50, "BEGIN page_guess_reference_box\n");
808     pmesg(
809     	80,
810 	"page_guess_reference_box: y=%ld, axis=%ld\n",
811 	swimmer->b.y,
812 	swimmer->b.axis
813     );
814 
815     pb = &(swimmer->b);
816 
817     assert(pb->flags & BF_SWIMMING);
818     assert(pb->flags & BF_HAS_AXIS);
819 
820     quest.x.a = SINT32_MIN;
821     quest.x.b =  SINT32_MAX;
822     quest.y.a = pb->axis;
823     quest.y.b = pb->y + pb->depth;
824     quest.top.a = SINT32_MIN;
825     quest.top.b = pb->axis;
826     quest.axis.a = pb->axis - 2;
827     quest.axis.b = pb->axis + 2; /* allow small rounding errors */
828     quest.require_axis = 0;
829     quest.test_start = 0;
830     quest.start = swimmer;
831 
832 
833     if(page_sequential) {
834     	sint32 h;
835 
836     	h = pb->height + pb->depth;
837 	quest.searched_y.a = pb->axis - 3*h;
838 	quest.searched_y.b = pb->axis + 3*h;
839     }
840     else {
841     	quest.searched_y.a = min(pb->axis, pb->y);
842     	quest.searched_y.b = pb->y + pb->depth;
843     }
844 
845     if(page_grb_recursion(&quest, swimmer, SRS_MAX_VAL - 1, 0) == 1) {
846     	pmesg(50, "END page_guess_reference_box\n");
847     	return quest.first_found;
848     }
849     else {
850     	pmesg(50, "END page_guess_reference_box\n");
851     	return NULL;
852     }
853 
854 }
855 
856 
page_adjust_texmext(void)857 void page_adjust_texmext(void)
858 {
859     struct list_node_t * p, * s, * ref;
860     sint32 new_y, delta;
861 
862     pmesg(50, "BEGIN page_adjust_texmext\n");
863 
864     if(!page_has_texmext) {
865     	pmesg(80, "nothing to do\n");
866     	pmesg(50, "END page_adjust_texmext\n");
867 	return;
868     }
869 
870     for (p = list_head; p != 0; p = s) {
871     	s = p->next;
872 	    /* p-> is a moving target */
873 
874 	if (!(p->b.flags & BF_ON_AXIS)) continue;
875 	assert(p->b.flags & BF_HAS_AXIS);
876 	assert(p->b.flags & BF_SWIMMING);
877 
878     	pmesg(
879 	    80,
880 	    "page_adjust_texmext: glyph=%#6lx, y=%ld\n",
881 	    p->b.glyph,
882 	    p->b.y
883 	);
884 
885 	if(p->b.axis_height > 0) {
886 	    ref = NULL;
887 	    pmesg(
888 	    	80,
889 		"page_adjust_texmext: known axis_height=%ld\n",
890 		p->b.axis_height
891 	    );
892 	}
893 	else {
894     	    ref = page_guess_reference_box(p);
895 	    pmesg(
896 	    	80,
897 		"page_adjust_texmext:%s reference box found\n",
898 		ref ? "" : " no"
899 	    );
900 	}
901 
902 	if(ref != NULL)  new_y = ref->b.y;
903 	else new_y = p->b.axis + abs(p->b.axis_height);
904 	pmesg(80, "page_adjust_texmext: new_y=%ld\n", new_y);
905 
906 	delta = new_y - p->b.y;
907 	p->b.y = new_y;
908 	p->b.height += delta;
909 	p->b.depth -= delta;
910 	p->b.axis_height = p->b.y - p->b.axis;
911 	p->b.flags &= ~(BF_SWIMMING | BF_ON_AXIS);
912 
913 	if(!page_sequential) bubble_node(p);
914 
915     }
916 
917     pmesg(50, "END page_adjust_texmext\n");
918 }
919 
920 
page_find_lowest_box(struct searchinfo_t * quest)921 static struct list_node_t * page_find_lowest_box(struct searchinfo_t * quest)
922 {
923     interval32_t * range;
924     struct list_node_t * p, * low;
925     struct list_node_t * start_up, * start_down;
926 
927     range = &(quest->searched_y);
928     assert(interval32_contains(range, quest->start->b.y));
929     start_up = quest->test_start ? quest->start : quest->start->prev;
930     start_down = quest->start->next;
931 
932     quest->first_found = NULL;
933     low = NULL;
934 
935     for(p = start_up; p != NULL; p = p->prev) {
936     	if(!interval32_contains(range, p->b.y)) break;
937 	if(p->b.flags & BF_SWIMMING) continue;
938 	if(page_match_box(quest, p)) {
939 	    if(low == NULL || p->b.y > low->b.y) low = p;
940 	}
941     }
942 
943     for(p = start_down; p != NULL; p = p->next) {
944     	if(!interval32_contains(range, p->b.y)) break;
945 	if(p->b.flags & BF_SWIMMING) continue;
946 	if(page_match_box(quest, p)) {
947 	    if(low == NULL || p->b.y > low->b.y) low = p;
948 	}
949     }
950 
951     quest->first_found = low;
952     return low;
953 }
954 
955 
956 /* TeX's radical signs are hanging down from their reference point. We move
957  * them to the baseline of the lowest glyph in the radicant (that's where
958  * the bottom tip ought to be) or, if there's no radicant, to the y position
959  * of the bottom tip.
960  */
page_adjust_radicals(void)961 void page_adjust_radicals(void)
962 {
963     struct list_node_t * p, * s, * lowest;
964     sint32 new_y, delta;
965     struct searchinfo_t quest;
966 
967     pmesg(50, "BEGIN page_adjust_radicals\n");
968 
969     if(!page_has_radicals) {
970     	pmesg(80, "nothing to do\n");
971     	pmesg(50, "END page_adjust_radicals\n");
972 	return;
973     }
974 
975     for (p = list_head; p != 0; p = s) {
976     	s = p->next;
977 	    /* p-> is a moving target */
978 
979 	if (!(p->b.flags & BF_RADICAL)) continue;
980 	assert(p->b.flags & BF_SWIMMING);
981 
982     	pmesg(
983 	    80,
984 	    "page_adjust_radicals: glyph=%#6lx, y=%ld\n",
985 	    p->b.glyph,
986 	    p->b.y
987 	);
988 
989     	/* put the radical on the baseline of the lowest box in the radicant */
990 	quest.x.a = p->b.x;
991 	quest.x.b =  SINT32_MAX;
992 	    /* FIXME: Once we have rudimentary rules support, we can search
993 	     * for the rule forming the radicals top and take its length as
994 	     * the width of the radicant.
995 	     */
996 	quest.y.a = p->b.y - p->b.height;
997 	quest.y.b = p->b.y + p->b.depth;
998 	quest.top = quest.y;
999 	quest.axis.a = SINT32_MIN;
1000 	quest.axis.b = SINT32_MAX;
1001 	quest.require_axis = 0;
1002 	quest.test_start = 0;
1003 	quest.start = p;
1004 
1005 	if(page_sequential) {
1006 	    sint32 h;
1007 
1008     	    h = p->b.height + p->b.depth;
1009 	    quest.searched_y.a = p->b.y - h;
1010 	    quest.searched_y.b = p->b.y + 2*h;
1011 	}
1012 	else {
1013     	    quest.searched_y = quest.y;
1014 	}
1015 
1016     	lowest = page_find_lowest_box(&quest);
1017 	pmesg(
1018 	    80,
1019 	    "page_adjust_radicals:%s lowest box found\n",
1020 	    lowest ? "" : " no"
1021 	);
1022 
1023 	if(lowest != NULL)  new_y = lowest->b.y;
1024 	else new_y = p->b.y + p->b.depth;
1025 	pmesg(80, "page_adjust_radicals: new_y=%ld\n", new_y);
1026 
1027 	delta = new_y - p->b.y;
1028 	p->b.y = new_y;
1029 	p->b.height += delta;
1030 	p->b.depth -= delta;
1031 	p->b.flags &= ~(BF_SWIMMING | BF_RADICAL);
1032 
1033 	if(!page_sequential) bubble_node(p);
1034 
1035     }
1036 
1037     pmesg(50, "END page_adjust_radicals\n");
1038 }
1039