1 /* HTML renderer */
2 
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
6 
7 #include <ctype.h>
8 #include <stdarg.h>
9 #include <string.h>
10 
11 #include "elinks.h"
12 
13 #include "cache/cache.h"
14 #include "config/options.h"
15 #include "document/docdata.h"
16 #include "document/document.h"
17 #include "document/html/frames.h"
18 #include "document/html/parser.h"
19 #include "document/html/parser/parse.h"
20 #include "document/html/renderer.h"
21 #include "document/html/tables.h"
22 #include "document/options.h"
23 #include "document/refresh.h"
24 #include "document/renderer.h"
25 #include "intl/charsets.h"
26 #include "protocol/uri.h"
27 #include "session/session.h"
28 #include "terminal/color.h"
29 #include "terminal/draw.h"
30 #include "util/color.h"
31 #include "util/conv.h"
32 #include "util/error.h"
33 #include "util/hash.h"
34 #include "util/lists.h"
35 #include "util/memory.h"
36 #include "util/string.h"
37 #include "util/time.h"
38 #include "viewer/text/form.h"
39 #include "viewer/text/view.h"
40 #include "viewer/text/vs.h"
41 
42 /* Unsafe macros */
43 #include "document/html/internal.h"
44 
45 /* Types and structs */
46 
47 /* Tags are used for ``id''s or anchors in the document referenced by the
48  * fragment part of the URI. */
49 /* FIXME: This and find_tag() should be part of the general infrastructure
50  * in document/document.*. --pasky */
51 struct tag {
52 	LIST_HEAD(struct tag);
53 
54 	int x, y;
55 	unsigned char name[1]; /* must be last of struct. --Zas */
56 };
57 
58 enum link_state {
59 	LINK_STATE_NONE,
60 	LINK_STATE_NEW,
61 	LINK_STATE_SAME,
62 };
63 
64 struct link_state_info {
65 	unsigned char *link;
66 	unsigned char *target;
67 	unsigned char *image;
68 	struct form_control *form;
69 };
70 
71 struct table_cache_entry_key {
72 	unsigned char *start;
73 	unsigned char *end;
74 	int align;
75 	int margin;
76 	int width;
77 	int x;
78 	int link_num;
79 };
80 
81 struct table_cache_entry {
82 	LIST_HEAD(struct table_cache_entry);
83 
84 	struct table_cache_entry_key key;
85 	struct part part;
86 };
87 
88 /* Max. entries in table cache used for nested tables. */
89 #define MAX_TABLE_CACHE_ENTRIES 16384
90 
91 /* Global variables */
92 static int table_cache_entries;
93 static struct hash *table_cache;
94 
95 
96 struct renderer_context {
97 	int last_link_to_move;
98 	struct tag *last_tag_to_move;
99 	/* All tags between document->tags and this tag (inclusive) should
100 	 * be aligned to the next line break, unless some real content follows
101 	 * the tag. Therefore, this virtual tags list accumulates new tags as
102 	 * they arrive and empties when some real content is written; if a line
103 	 * break is inserted in the meanwhile, the tags follow it (ie. imagine
104 	 * <a name="x"> <p>, then the "x" tag follows the line breaks inserted
105 	 * by the <p> tag). */
106 	struct tag *last_tag_for_newline;
107 
108 	struct link_state_info link_state_info;
109 
110 	struct conv_table *convert_table;
111 
112 	/* Used for setting cache info from HTTP-EQUIV meta tags. */
113 	struct cache_entry *cached;
114 
115 	int g_ctrl_num;
116 	int subscript;	/* Count stacked subscripts */
117 	int supscript;	/* Count stacked supscripts */
118 
119 	unsigned int empty_format:1;
120 	unsigned int nobreak:1;
121 	unsigned int nosearchable:1;
122 	unsigned int nowrap:1; /* Activated/deactivated by SP_NOWRAP. */
123 };
124 
125 static struct renderer_context renderer_context;
126 
127 
128 /* Prototypes */
129 void line_break(struct html_context *);
130 void put_chars(struct html_context *, unsigned char *, int);
131 
132 #define X(x_)	(part->box.x + (x_))
133 #define Y(y_)	(part->box.y + (y_))
134 
135 #define SPACES_GRANULARITY	0x7F
136 
137 #define ALIGN_SPACES(x, o, n) mem_align_alloc(x, o, n, unsigned char, SPACES_GRANULARITY)
138 
139 static inline void
set_screen_char_color(struct screen_char * schar,color_T bgcolor,color_T fgcolor,enum color_flags color_flags,enum color_mode color_mode)140 set_screen_char_color(struct screen_char *schar,
141 		      color_T bgcolor, color_T fgcolor,
142 		      enum color_flags color_flags,
143 		      enum color_mode color_mode)
144 {
145 	struct color_pair colors = INIT_COLOR_PAIR(bgcolor, fgcolor);
146 
147 	set_term_color(schar, &colors, color_flags, color_mode);
148 }
149 
150 static int
realloc_line(struct html_context * html_context,struct document * document,int y,int length)151 realloc_line(struct html_context *html_context, struct document *document,
152              int y, int length)
153 {
154 	struct screen_char *pos, *end;
155 	struct line *line;
156 
157 	if (!realloc_lines(document, y))
158 		return -1;
159 
160 	line = &document->data[y];
161 
162 	if (length < line->length)
163 		return 0;
164 
165 	if (!ALIGN_LINE(&line->chars, line->length, length + 1))
166 		return -1;
167 
168 	/* We cannot rely on the aligned allocation to clear the members for us
169 	 * since for line splitting we simply trim the length. Question is if
170 	 * it is better to to clear the line after the splitting or here. */
171 	end = &line->chars[length];
172 	end->data = ' ';
173 	end->attr = 0;
174 	set_screen_char_color(end, par_format.bgcolor, 0x0,
175 			      COLOR_ENSURE_CONTRAST, /* for bug 461 */
176 			      document->options.color_mode);
177 
178 	for (pos = &line->chars[line->length]; pos < end; pos++) {
179 		copy_screen_chars(pos, end, 1);
180 	}
181 
182 	line->length = length + 1;
183 
184 	return 0;
185 }
186 
187 void
expand_lines(struct html_context * html_context,struct part * part,int x,int y,int lines,color_T bgcolor)188 expand_lines(struct html_context *html_context, struct part *part,
189              int x, int y, int lines, color_T bgcolor)
190 {
191 	int line;
192 
193 	assert(part && part->document);
194 	if_assert_failed return;
195 
196 	if (!use_document_bg_colors(&part->document->options))
197 		return;
198 
199 	par_format.bgcolor = bgcolor;
200 
201 	for (line = 0; line < lines; line++)
202 		realloc_line(html_context, part->document, Y(y + line), X(x));
203 }
204 
205 static inline int
realloc_spaces(struct part * part,int length)206 realloc_spaces(struct part *part, int length)
207 {
208 	if (length < part->spaces_len)
209 		return 0;
210 
211 	if (!ALIGN_SPACES(&part->spaces, part->spaces_len, length))
212 		return -1;
213 
214 	part->spaces_len = length;
215 
216 	return 0;
217 }
218 
219 
220 #define LINE(y_)	part->document->data[Y(y_)]
221 #define POS(x_, y_)	LINE(y_).chars[X(x_)]
222 #define LEN(y_)		int_max(LINE(y_).length - part->box.x, 0)
223 
224 
225 /* When we clear chars we want to preserve and use the background colors
226  * already in place else we could end up ``staining'' the background especial
227  * when drawing table cells. So make the cleared chars share the colors in
228  * place. */
229 static inline void
clear_hchars(struct html_context * html_context,int x,int y,int width)230 clear_hchars(struct html_context *html_context, int x, int y, int width)
231 {
232 	struct part *part;
233 	struct screen_char *pos, *end;
234 
235 	assert(html_context);
236 	if_assert_failed return;
237 
238 	part = html_context->part;
239 
240 	assert(part && part->document && width > 0);
241 	if_assert_failed return;
242 
243 	if (realloc_line(html_context, part->document, Y(y), X(x) + width - 1))
244 		return;
245 
246 	assert(part->document->data);
247 	if_assert_failed return;
248 
249 	pos = &POS(x, y);
250 	end = pos + width - 1;
251 	end->data = ' ';
252 	end->attr = 0;
253 	set_screen_char_color(end, par_format.bgcolor, 0x0,
254 			      COLOR_ENSURE_CONTRAST, /* for bug 461 */
255 			      part->document->options.color_mode);
256 
257 	while (pos < end)
258 		copy_screen_chars(pos++, end, 1);
259 }
260 
261 /* TODO: Merge parts with get_format_screen_char(). --jonas */
262 /* Allocates the required chars on the given line and returns the char at
263  * position (x, y) ready to be used as a template char.  */
264 static inline struct screen_char *
get_frame_char(struct html_context * html_context,struct part * part,int x,int y,unsigned char data,color_T bgcolor,color_T fgcolor)265 get_frame_char(struct html_context *html_context, struct part *part,
266 	       int x, int y, unsigned char data,
267                color_T bgcolor, color_T fgcolor)
268 {
269 	struct screen_char *template;
270 
271 	assert(html_context);
272 	if_assert_failed return NULL;
273 
274 	assert(part && part->document && x >= 0 && y >= 0);
275 	if_assert_failed return NULL;
276 
277 	if (realloc_line(html_context, part->document, Y(y), X(x)))
278 		return NULL;
279 
280 	assert(part->document->data);
281 	if_assert_failed return NULL;
282 
283 	template = &POS(x, y);
284 	template->data = data;
285 	template->attr = SCREEN_ATTR_FRAME;
286 	set_screen_char_color(template, bgcolor, fgcolor,
287 			      part->document->options.color_flags,
288 			      part->document->options.color_mode);
289 
290 	return template;
291 }
292 
293 void
draw_frame_hchars(struct part * part,int x,int y,int width,unsigned char data,color_T bgcolor,color_T fgcolor,struct html_context * html_context)294 draw_frame_hchars(struct part *part, int x, int y, int width,
295 		  unsigned char data, color_T bgcolor, color_T fgcolor,
296 		  struct html_context *html_context)
297 {
298 	struct screen_char *template;
299 
300 	assert(width > 0);
301 	if_assert_failed return;
302 
303 	template = get_frame_char(html_context, part, x + width - 1, y, data, bgcolor, fgcolor);
304 	if (!template) return;
305 
306 	/* The template char is the last we need to draw so only decrease @width. */
307 	for (width -= 1; width; width--, x++) {
308 		copy_screen_chars(&POS(x, y), template, 1);
309 	}
310 }
311 
312 void
draw_frame_vchars(struct part * part,int x,int y,int height,unsigned char data,color_T bgcolor,color_T fgcolor,struct html_context * html_context)313 draw_frame_vchars(struct part *part, int x, int y, int height,
314 		  unsigned char data, color_T bgcolor, color_T fgcolor,
315 		  struct html_context *html_context)
316 {
317 	struct screen_char *template = get_frame_char(html_context, part, x, y,
318 	                                              data, bgcolor, fgcolor);
319 
320 	if (!template) return;
321 
322 	/* The template char is the first vertical char to be drawn. So
323 	 * copy it to the rest. */
324 	for (height -= 1, y += 1; height; height--, y++) {
325 	    	if (realloc_line(html_context, part->document, Y(y), X(x)))
326 			return;
327 
328 		copy_screen_chars(&POS(x, y), template, 1);
329 	}
330 }
331 
332 static inline struct screen_char *
get_format_screen_char(struct html_context * html_context,enum link_state link_state)333 get_format_screen_char(struct html_context *html_context,
334                        enum link_state link_state)
335 {
336 	static struct text_attrib_style ta_cache = { -1, 0x0, 0x0 };
337 	static struct screen_char schar_cache;
338 
339 	if (memcmp(&ta_cache, &format.style, sizeof(ta_cache))) {
340 		copy_struct(&ta_cache, &format.style);
341 
342 		schar_cache.attr = 0;
343 		if (format.style.attr & ~(AT_UPDATE_SUB|AT_UPDATE_SUP)) {
344 			if (format.style.attr & AT_UNDERLINE) {
345 				schar_cache.attr |= SCREEN_ATTR_UNDERLINE;
346 			}
347 
348 			if (format.style.attr & AT_BOLD) {
349 				schar_cache.attr |= SCREEN_ATTR_BOLD;
350 			}
351 
352 			if (format.style.attr & AT_ITALIC) {
353 				schar_cache.attr |= SCREEN_ATTR_ITALIC;
354 			}
355 
356 			if (format.style.attr & AT_GRAPHICS) {
357 				schar_cache.attr |= SCREEN_ATTR_FRAME;
358 			}
359 		}
360 
361 		if (link_state != LINK_STATE_NONE
362 		    && html_context->options->underline_links) {
363 			schar_cache.attr |= SCREEN_ATTR_UNDERLINE;
364 		}
365 
366 		set_screen_char_color(&schar_cache, format.style.bg, format.style.fg,
367 				      html_context->options->color_flags,
368 				      html_context->options->color_mode);
369 
370 		if (html_context->options->display_subs) {
371 			if (format.style.attr & AT_SUBSCRIPT) {
372 				if (format.style.attr & AT_UPDATE_SUB) {
373 					renderer_context.subscript++;
374 					format.style.attr &= ~AT_UPDATE_SUB;
375 					put_chars(html_context, "[", 1);
376 				}
377 			} else {
378 				while (renderer_context.subscript) {
379 					renderer_context.subscript--;
380 					put_chars(html_context, "]", 1);
381 				}
382 			}
383 		}
384 
385 		if (html_context->options->display_sups) {
386 			if (format.style.attr & AT_SUPERSCRIPT) {
387 				if (format.style.attr & AT_UPDATE_SUP) {
388 					renderer_context.supscript++;
389 					format.style.attr &= ~AT_UPDATE_SUP;
390 					put_chars(html_context, "^", 1);
391 				}
392 			} else {
393 				while (renderer_context.supscript)
394 					renderer_context.supscript--;
395 			}
396 		}
397 	}
398 
399 	if (!!(schar_cache.attr & SCREEN_ATTR_UNSEARCHABLE)
400 	    ^ !!renderer_context.nosearchable) {
401 		schar_cache.attr ^= SCREEN_ATTR_UNSEARCHABLE;
402 	}
403 
404 	return &schar_cache;
405 }
406 
407 /* First possibly do the format change and then find out what coordinates
408  * to use since sub- or superscript might change them */
409 static inline void
set_hline(struct html_context * html_context,unsigned char * chars,int charslen,enum link_state link_state)410 set_hline(struct html_context *html_context, unsigned char *chars, int charslen,
411 	  enum link_state link_state)
412 {
413 	struct part *part = html_context->part;
414 	struct screen_char *schar = get_format_screen_char(html_context,
415 	                                                   link_state);
416 	int x = part->cx;
417 	int y = part->cy;
418 
419 	assert(part);
420 	if_assert_failed return;
421 
422 	if (realloc_spaces(part, x + charslen))
423 		return;
424 
425 	if (part->document) {
426 		if (realloc_line(html_context, part->document,
427 		                 Y(y), X(x) + charslen - 1))
428 			return;
429 
430 		for (; charslen > 0; charslen--, x++, chars++) {
431 			if (*chars == NBSP_CHAR) {
432 				schar->data = ' ';
433 				part->spaces[x] = html_context->options->wrap_nbsp;
434 			} else {
435 				part->spaces[x] = (*chars == ' ');
436 				schar->data = *chars;
437 			}
438 
439 			copy_screen_chars(&POS(x, y), schar, 1);
440 		}
441 	} else {
442 		for (; charslen > 0; charslen--, x++, chars++) {
443 			if (*chars == NBSP_CHAR) {
444 				part->spaces[x] = html_context->options->wrap_nbsp;
445 			} else {
446 				part->spaces[x] = (*chars == ' ');
447 			}
448 		}
449 	}
450 }
451 
452 static void
move_links(struct html_context * html_context,int xf,int yf,int xt,int yt)453 move_links(struct html_context *html_context, int xf, int yf, int xt, int yt)
454 {
455 	struct part *part;
456 	struct tag *tag;
457 	int nlink = renderer_context.last_link_to_move;
458 	int matched = 0;
459 
460 	assert(html_context);
461 	if_assert_failed return;
462 
463 	part = html_context->part;
464 
465 	assert(part && part->document);
466 	if_assert_failed return;
467 
468 	if (!realloc_lines(part->document, Y(yt)))
469 		return;
470 
471 	for (; nlink < part->document->nlinks; nlink++) {
472 		struct link *link = &part->document->links[nlink];
473 		int i;
474 
475 		for (i = 0; i < link->npoints; i++) {
476 			/* Fix for bug 479 (part one) */
477 			/* The scenario that triggered it:
478 			 *
479 			 * Imagine a centered element containing a really long
480 			 * word (over half of the screen width long) followed
481 			 * by a few links with no spaces between them where all
482 			 * the link text combined with the really long word
483 			 * will force the line to be wrapped. When rendering
484 			 * the line first words (including link text words) are
485 			 * put on one line. Then wrapping is performed moving
486 			 * all links from current line to the one below. Then
487 			 * the current line (now only containing the really
488 			 * long word) is centered. This will trigger a call to
489 			 * move_links() which will increment.
490 			 *
491 			 * Without the fix below the centering of the current
492 			 * line will increment last_link_to_move to that of the
493 			 * last link which means centering of the next line
494 			 * with all the links will only move the last link
495 			 * leaving all the other links' points dangling and
496 			 * causing buggy link highlighting.
497 			 *
498 			 * Even links like textareas will be correctly handled
499 			 * because @last_link_to_move is a way to optimize how
500 			 * many links move_links() will have to iterate and
501 			 * this little fix will only decrease the effect of the
502 			 * optimization by always ensuring it is never
503 			 * incremented too far. */
504 			if (!matched && link->points[i].y > Y(yf)) {
505 				matched = 1;
506 				continue;
507 			}
508 
509 			if (link->points[i].y != Y(yf))
510 				continue;
511 
512 			matched = 1;
513 
514 			if (link->points[i].x < X(xf))
515 				continue;
516 
517 			if (yt >= 0) {
518 				link->points[i].y = Y(yt);
519 				link->points[i].x += -xf + xt;
520 			} else {
521 				int to_move = link->npoints - (i + 1);
522 
523 				assert(to_move >= 0);
524 
525 				if (to_move > 0) {
526 					memmove(&link->points[i],
527 						&link->points[i + 1],
528 						to_move *
529 						sizeof(*link->points));
530 					i--;
531 				}
532 
533 				link->npoints--;
534 			}
535 		}
536 
537 		if (!matched) {
538 			renderer_context.last_link_to_move = nlink;
539 		}
540 	}
541 
542 	/* Don't move tags when removing links. */
543 	if (yt < 0) return;
544 
545 	matched = 0;
546 	tag = renderer_context.last_tag_to_move;
547 
548 	while (list_has_next(part->document->tags, tag)) {
549 		tag = tag->next;
550 
551 		if (tag->y == Y(yf)) {
552 			matched = 1;
553 			if (tag->x >= X(xf)) {
554 				tag->y = Y(yt);
555 				tag->x += -xf + xt;
556 			}
557 
558 		} else if (!matched && tag->y > Y(yf)) {
559 			/* Fix for bug 479 (part two) */
560 			matched = 1;
561 		}
562 
563 		if (!matched) renderer_context.last_tag_to_move = tag;
564 	}
565 }
566 
567 static inline void
copy_chars(struct html_context * html_context,int x,int y,int width,struct screen_char * d)568 copy_chars(struct html_context *html_context, int x, int y, int width, struct screen_char *d)
569 {
570 	struct part *part;
571 
572 	assert(html_context);
573 	if_assert_failed return;
574 
575 	part = html_context->part;
576 
577 	assert(width > 0 && part && part->document && part->document->data);
578 	if_assert_failed return;
579 
580 	if (realloc_line(html_context, part->document, Y(y), X(x) + width - 1))
581 		return;
582 
583 	copy_screen_chars(&POS(x, y), d, width);
584 }
585 
586 static inline void
move_chars(struct html_context * html_context,int x,int y,int nx,int ny)587 move_chars(struct html_context *html_context, int x, int y, int nx, int ny)
588 {
589 	struct part *part;
590 
591 	assert(html_context);
592 	if_assert_failed return;
593 
594 	part = html_context->part;
595 
596 	assert(part && part->document && part->document->data);
597 	if_assert_failed return;
598 
599 	if (LEN(y) - x <= 0) return;
600 	copy_chars(html_context, nx, ny, LEN(y) - x, &POS(x, y));
601 
602 	LINE(y).length = X(x);
603 	move_links(html_context, x, y, nx, ny);
604 }
605 
606 static inline void
shift_chars(struct html_context * html_context,int y,int shift)607 shift_chars(struct html_context *html_context, int y, int shift)
608 {
609 	struct part *part;
610 	struct screen_char *a;
611 	int len;
612 
613 	assert(html_context);
614 	if_assert_failed return;
615 
616 	part = html_context->part;
617 
618 	assert(part && part->document && part->document->data);
619 	if_assert_failed return;
620 
621 	len = LEN(y);
622 
623 	a = fmem_alloc(len * sizeof(*a));
624 	if (!a) return;
625 
626 	copy_screen_chars(a, &POS(0, y), len);
627 
628 	clear_hchars(html_context, 0, y, shift);
629 	copy_chars(html_context, shift, y, len, a);
630 	fmem_free(a);
631 
632 	move_links(html_context, 0, y, shift, y);
633 }
634 
635 static inline void
del_chars(struct html_context * html_context,int x,int y)636 del_chars(struct html_context *html_context, int x, int y)
637 {
638 	struct part *part;
639 
640 	assert(html_context);
641 	if_assert_failed return;
642 
643 	part = html_context->part;
644 
645 	assert(part && part->document && part->document->data);
646 	if_assert_failed return;
647 
648 	LINE(y).length = X(x);
649 	move_links(html_context, x, y, -1, -1);
650 }
651 
652 #if TABLE_LINE_PADDING < 0
653 # define overlap_width(x) (x).width
654 #else
655 # define overlap_width(x) int_min((x).width, \
656 	html_context->options->box.width - TABLE_LINE_PADDING)
657 #endif
658 #define overlap(x) int_max(overlap_width(x) - (x).rightmargin, 0)
659 
660 static int inline
split_line_at(struct html_context * html_context,int width)661 split_line_at(struct html_context *html_context, int width)
662 {
663 	struct part *part;
664 	int tmp;
665 	int new_width = width + par_format.rightmargin;
666 
667 	assert(html_context);
668 	if_assert_failed return 0;
669 
670 	part = html_context->part;
671 
672 	assert(part);
673 	if_assert_failed return 0;
674 
675 	/* Make sure that we count the right margin to the total
676 	 * actual box width. */
677 	int_lower_bound(&part->box.width, new_width);
678 
679 	if (part->document) {
680 		assert(part->document->data);
681 		if_assert_failed return 0;
682 		assertm(POS(width, part->cy).data == ' ',
683 			"bad split: %c", POS(width, part->cy).data);
684 		move_chars(html_context, width + 1, part->cy, par_format.leftmargin, part->cy + 1);
685 		del_chars(html_context, width, part->cy);
686 	}
687 
688 	width++; /* Since we were using (x + 1) only later... */
689 
690 	tmp = part->spaces_len - width;
691 	if (tmp > 0) {
692 		/* 0 is possible and I'm paranoid ... --Zas */
693 		memmove(part->spaces, part->spaces + width, tmp);
694 	}
695 
696 	assert(tmp >= 0);
697 	if_assert_failed tmp = 0;
698 	memset(part->spaces + tmp, 0, width);
699 
700 	if (par_format.leftmargin > 0) {
701 		tmp = part->spaces_len - par_format.leftmargin;
702 		assertm(tmp > 0, "part->spaces_len - par_format.leftmargin == %d", tmp);
703 		/* So tmp is zero, memmove() should survive that. Don't recover. */
704 		memmove(part->spaces + par_format.leftmargin, part->spaces, tmp);
705 	}
706 
707 	part->cy++;
708 
709 	if (part->cx == width) {
710 		part->cx = -1;
711 		int_lower_bound(&part->box.height, part->cy);
712 		return 2;
713 	} else {
714 		part->cx -= width - par_format.leftmargin;
715 		int_lower_bound(&part->box.height, part->cy + 1);
716 		return 1;
717 	}
718 }
719 
720 /* Here, we scan the line for a possible place where we could split it into two
721  * (breaking it, because it is too long), if it is overlapping from the maximal
722  * box width. */
723 /* Returns 0 if there was found no spot suitable for breaking the line.
724  *         1 if the line was split into two.
725  *         2 if the (second) splitted line is blank (that is useful to determine
726  *           ie. if the next line_break() should really break the line; we don't
727  *           want to see any blank lines to pop up, do we?). */
728 static int
split_line(struct html_context * html_context)729 split_line(struct html_context *html_context)
730 {
731 	struct part *part;
732 	int x;
733 
734 	assert(html_context);
735 	if_assert_failed return 0;
736 
737 	part = html_context->part;
738 
739 	assert(part);
740 	if_assert_failed return 0;
741 
742 	for (x = overlap(par_format); x >= par_format.leftmargin; x--)
743 		if (x < part->spaces_len && part->spaces[x])
744 			return split_line_at(html_context, x);
745 
746 	for (x = par_format.leftmargin; x < part->cx ; x++)
747 		if (x < part->spaces_len && part->spaces[x])
748 			return split_line_at(html_context, x);
749 
750 	/* Make sure that we count the right margin to the total
751 	 * actual box width. */
752 	int_lower_bound(&part->box.width, part->cx + par_format.rightmargin);
753 
754 	return 0;
755 }
756 
757 /* Insert @new_spaces spaces before the coordinates @x and @y,
758  * adding those spaces to whatever link is at those coordinates. */
759 /* TODO: Integrate with move_links. */
760 static void
insert_spaces_in_link(struct part * part,int x,int y,int new_spaces)761 insert_spaces_in_link(struct part *part, int x, int y, int new_spaces)
762 {
763 	int i = part->document->nlinks;
764 
765 	x = X(x);
766 	y = Y(y);
767 
768 	while (i--) {
769 		struct link *link = &part->document->links[i];
770 		int j = link->npoints;
771 
772 		while (j-- > 1) {
773 			struct point *point = &link->points[j];
774 
775 			if (point->x != x || point->y != y)
776 				continue;
777 
778 			if (!realloc_points(link, link->npoints + new_spaces))
779 				return;
780 
781 			link->npoints += new_spaces;
782 			point = &link->points[link->npoints - 1];
783 
784 			while (new_spaces--) {
785 				point->x = --x;
786 				point->y = y;
787 				point--;
788 			}
789 
790 			return;
791 		}
792 	}
793 }
794 
795 /* This function is very rare exemplary of clean and beautyful code here.
796  * Please handle with care. --pasky */
797 static void
justify_line(struct html_context * html_context,int y)798 justify_line(struct html_context *html_context, int y)
799 {
800 	struct part *part;
801 	struct screen_char *line; /* we save original line here */
802 	int len;
803 	int pos;
804 	int *space_list;
805 	int spaces;
806 	int diff;
807 
808 	assert(html_context);
809 	if_assert_failed return;
810 
811 	part = html_context->part;
812 
813 	assert(part && part->document && part->document->data);
814 	if_assert_failed return;
815 
816 	len = LEN(y);
817 	assert(len > 0);
818 	if_assert_failed return;
819 
820 	line = fmem_alloc(len * sizeof(*line));
821 	if (!line) return;
822 
823 	/* It may sometimes happen that the line is only one char long and that
824 	 * char is space - then we're going to write to both [0] and [1], but
825 	 * we allocated only one field. Thus, we've to do (len + 1). --pasky */
826 	space_list = fmem_alloc((len + 1) * sizeof(*space_list));
827 	if (!space_list) {
828 		fmem_free(line);
829 		return;
830 	}
831 
832 	copy_screen_chars(line, &POS(0, y), len);
833 
834 	/* Skip leading spaces */
835 
836 	spaces = 0;
837 	pos = 0;
838 
839 	while (line[pos].data == ' ')
840 		pos++;
841 
842 	/* Yes, this can be negative, we know. But we add one to it always
843 	 * anyway, so it's ok. */
844 	space_list[spaces++] = pos - 1;
845 
846 	/* Count spaces */
847 
848 	for (; pos < len; pos++)
849 		if (line[pos].data == ' ')
850 			space_list[spaces++] = pos;
851 
852 	space_list[spaces] = len;
853 
854 	/* Realign line */
855 
856 	/* Diff is the difference between the width of the paragraph
857 	 * and the current length of the line. */
858 	diff = overlap(par_format) - len;
859 
860 	/* We check diff > 0 because diff can be negative (i.e., we have
861 	 * an unbroken line of length > overlap(par_format))
862 	 * even when spaces > 1 if the line has only non-breaking spaces. */
863 	if (spaces > 1 && diff > 0) {
864 		int prev_end = 0;
865 		int word;
866 
867 		clear_hchars(html_context, 0, y, overlap(par_format));
868 
869 		for (word = 0; word < spaces; word++) {
870 			/* We have to increase line length by 'diff' num. of
871 			 * characters, so we move 'word'th word 'word_shift'
872 			 * characters right. */
873 			int word_start = space_list[word] + 1;
874 			int word_len = space_list[word + 1] - word_start;
875 			int word_shift;
876 			int new_start;
877 			int new_spaces;
878 
879 			assert(word_len >= 0);
880 			if_assert_failed continue;
881 			if (!word_len) continue;
882 
883 			word_shift = (word * diff) / (spaces - 1);
884 			new_start = word_start + word_shift;
885 
886 			copy_chars(html_context, new_start, y, word_len,
887 				   &line[word_start]);
888 
889 			new_spaces = new_start - prev_end - 1;
890 			if (word && new_spaces) {
891 				move_links(html_context, prev_end + 1, y, new_start, y);
892 				insert_spaces_in_link(part,
893 						      new_start, y, new_spaces);
894 			}
895 
896 			prev_end = new_start + word_len;
897 		}
898 	}
899 
900 	fmem_free(space_list);
901 	fmem_free(line);
902 }
903 
904 static void
align_line(struct html_context * html_context,int y,int last)905 align_line(struct html_context *html_context, int y, int last)
906 {
907 	struct part *part;
908 	int shift;
909 	int len;
910 
911 	assert(html_context);
912 	if_assert_failed return;
913 
914 	part = html_context->part;
915 
916 	assert(part && part->document && part->document->data);
917 	if_assert_failed return;
918 
919 	len = LEN(y);
920 
921 	if (!len || par_format.align == ALIGN_LEFT)
922 		return;
923 
924 	if (par_format.align == ALIGN_JUSTIFY) {
925 		if (!last)
926 			justify_line(html_context, y);
927 		return;
928 	}
929 
930 	shift = overlap(par_format) - len;
931 	if (par_format.align == ALIGN_CENTER)
932 		shift /= 2;
933 	if (shift > 0)
934 		shift_chars(html_context, y, shift);
935 }
936 
937 static inline void
init_link_event_hooks(struct html_context * html_context,struct link * link)938 init_link_event_hooks(struct html_context *html_context, struct link *link)
939 {
940 	link->event_hooks = mem_calloc(1, sizeof(*link->event_hooks));
941 	if (!link->event_hooks) return;
942 
943 #define add_evhook(list_, type_, src_)						\
944 	do {									\
945 		struct script_event_hook *evhook;				\
946 										\
947 		if (!src_) break;						\
948 										\
949 		evhook = mem_calloc(1, sizeof(*evhook));			\
950 		if (!evhook) break;						\
951 										\
952 		evhook->type = type_;						\
953 		evhook->src  = stracpy(src_);					\
954 		add_to_list(*(list_), evhook);					\
955 	} while (0)
956 
957 	init_list(*link->event_hooks);
958 	add_evhook(link->event_hooks, SEVHOOK_ONCLICK, format.onclick);
959 	add_evhook(link->event_hooks, SEVHOOK_ONDBLCLICK, format.ondblclick);
960 	add_evhook(link->event_hooks, SEVHOOK_ONMOUSEOVER, format.onmouseover);
961 	add_evhook(link->event_hooks, SEVHOOK_ONHOVER, format.onhover);
962 	add_evhook(link->event_hooks, SEVHOOK_ONFOCUS, format.onfocus);
963 	add_evhook(link->event_hooks, SEVHOOK_ONMOUSEOUT, format.onmouseout);
964 	add_evhook(link->event_hooks, SEVHOOK_ONBLUR, format.onblur);
965 
966 #undef add_evhook
967 }
968 
969 static struct link *
new_link(struct html_context * html_context,unsigned char * name,int namelen)970 new_link(struct html_context *html_context, unsigned char *name, int namelen)
971 {
972 	struct document *document;
973 	struct part *part;
974 	int link_number;
975 	struct link *link;
976 
977 	assert(html_context);
978 	if_assert_failed return NULL;
979 
980 	part = html_context->part;
981 
982 	assert(part);
983 	if_assert_failed return NULL;
984 
985 	document = part->document;
986 
987 	assert(document);
988 	if_assert_failed return NULL;
989 
990 	link_number = part->link_num;
991 
992 	if (!ALIGN_LINK(&document->links, document->nlinks, document->nlinks + 1))
993 		return NULL;
994 
995 	link = &document->links[document->nlinks++];
996 	link->number = link_number - 1;
997 	if (document->options.use_tabindex) link->number += format.tabindex;
998 	link->accesskey = format.accesskey;
999 	link->title = null_or_stracpy(format.title);
1000 	link->where_img = null_or_stracpy(format.image);
1001 
1002 	if (!format.form) {
1003 		link->target = null_or_stracpy(format.target);
1004 		link->data.name = memacpy(name, namelen);
1005 		/* if (strlen(url) > 4 && !c_strncasecmp(url, "MAP@", 4)) { */
1006 		if (format.link
1007 		    && ((format.link[0]|32) == 'm')
1008 		    && ((format.link[1]|32) == 'a')
1009 		    && ((format.link[2]|32) == 'p')
1010 		    && 	(format.link[3]     == '@')
1011 		    &&   format.link[4]) {
1012 			link->type = LINK_MAP;
1013 			link->where = stracpy(format.link + 4);
1014 		} else {
1015 			link->type = LINK_HYPERTEXT;
1016 			link->where = null_or_stracpy(format.link);
1017 		}
1018 
1019 	} else {
1020 		struct form_control *fc = format.form;
1021 		struct form *form;
1022 
1023 		switch (fc->type) {
1024 		case FC_TEXT:
1025 		case FC_PASSWORD:
1026 		case FC_FILE:
1027 			link->type = LINK_FIELD;
1028 			break;
1029 		case FC_TEXTAREA:
1030 			link->type = LINK_AREA;
1031 			break;
1032 		case FC_CHECKBOX:
1033 		case FC_RADIO:
1034 			link->type = LINK_CHECKBOX;
1035 			break;
1036 		case FC_SELECT:
1037 			link->type = LINK_SELECT;
1038 			break;
1039 		case FC_SUBMIT:
1040 		case FC_IMAGE:
1041 		case FC_RESET:
1042 		case FC_BUTTON:
1043 		case FC_HIDDEN:
1044 			link->type = LINK_BUTTON;
1045 		}
1046 		link->data.form_control = fc;
1047 		/* At this point, format.form might already be set but
1048 		 * the form_control not registered through SP_CONTROL
1049 		 * yet, therefore without fc->form set. It is always
1050 		 * after the "good" last form was already processed,
1051 		 * though, so we can safely just take that. */
1052 		form = fc->form;
1053 		if (!form && !list_empty(document->forms))
1054 			form = document->forms.next;
1055 		link->target = null_or_stracpy(form ? form->target : NULL);
1056 	}
1057 
1058 	link->color.background = format.style.bg;
1059 	link->color.foreground = link_is_textinput(link)
1060 				? format.style.fg : format.clink;
1061 
1062 	init_link_event_hooks(html_context, link);
1063 
1064 	return link;
1065 }
1066 
1067 static void
html_special_tag(struct document * document,unsigned char * t,int x,int y)1068 html_special_tag(struct document *document, unsigned char *t, int x, int y)
1069 {
1070 	struct tag *tag;
1071 	int tag_len;
1072 
1073 	assert(document);
1074 	if_assert_failed return;
1075 
1076 	tag_len = strlen(t);
1077 	/* One byte is reserved for name in struct tag. */
1078 	tag = mem_alloc(sizeof(*tag) + tag_len);
1079 	if (!tag) return;
1080 
1081 	tag->x = x;
1082 	tag->y = y;
1083 	memcpy(tag->name, t, tag_len + 1);
1084 	add_to_list(document->tags, tag);
1085 	if (renderer_context.last_tag_for_newline == (struct tag *) &document->tags)
1086 		renderer_context.last_tag_for_newline = tag;
1087 }
1088 
1089 
1090 static void
put_chars_conv(struct html_context * html_context,unsigned char * chars,int charslen)1091 put_chars_conv(struct html_context *html_context,
1092                unsigned char *chars, int charslen)
1093 {
1094 	struct part *part;
1095 
1096 	assert(html_context);
1097 	if_assert_failed return;
1098 
1099 	part = html_context->part;
1100 
1101 	assert(part && chars && charslen);
1102 	if_assert_failed return;
1103 
1104 	if (format.style.attr & AT_GRAPHICS) {
1105 		put_chars(html_context, chars, charslen);
1106 		return;
1107 	}
1108 
1109 	convert_string(renderer_context.convert_table, chars, charslen,
1110 	               html_context->options->cp,
1111 	               CSM_DEFAULT, NULL, (void (*)(void *, unsigned char *, int)) put_chars, html_context);
1112 }
1113 
1114 static inline void
put_link_number(struct html_context * html_context)1115 put_link_number(struct html_context *html_context)
1116 {
1117 	struct part *part = html_context->part;
1118 	unsigned char s[64];
1119 	unsigned char *fl = format.link;
1120 	unsigned char *ft = format.target;
1121 	unsigned char *fi = format.image;
1122 	struct form_control *ff = format.form;
1123 	int slen = 0;
1124 
1125 	format.link = format.target = format.image = NULL;
1126 	format.form = NULL;
1127 
1128 	s[slen++] = '[';
1129 	ulongcat(s, &slen, part->link_num, sizeof(s) - 3, 0);
1130 	s[slen++] = ']';
1131 	s[slen] = '\0';
1132 
1133 	renderer_context.nosearchable = 1;
1134 	put_chars(html_context, s, slen);
1135 	renderer_context.nosearchable = 0;
1136 
1137 	if (ff && ff->type == FC_TEXTAREA) line_break(html_context);
1138 
1139 	/* We might have ended up on a new line after the line breaking
1140 	 * or putting the link number chars. */
1141 	if (part->cx == -1) part->cx = par_format.leftmargin;
1142 
1143 	format.link = fl;
1144 	format.target = ft;
1145 	format.image = fi;
1146 	format.form = ff;
1147 }
1148 
1149 #define assert_link_variable(old, new) \
1150 	assertm(!(old), "Old link value [%s]. New value [%s]", old, new);
1151 
1152 static inline void
init_link_state_info(unsigned char * link,unsigned char * target,unsigned char * image,struct form_control * form)1153 init_link_state_info(unsigned char *link, unsigned char *target,
1154 		     unsigned char *image, struct form_control *form)
1155 {
1156 	assert_link_variable(renderer_context.link_state_info.image, image);
1157 	assert_link_variable(renderer_context.link_state_info.target, target);
1158 	assert_link_variable(renderer_context.link_state_info.link, link);
1159 
1160 	renderer_context.link_state_info.link = null_or_stracpy(link);
1161 	renderer_context.link_state_info.target = null_or_stracpy(target);
1162 	renderer_context.link_state_info.image = null_or_stracpy(image);
1163 	renderer_context.link_state_info.form = form;
1164 }
1165 
1166 static inline void
done_link_state_info(void)1167 done_link_state_info(void)
1168 {
1169 	mem_free_if(renderer_context.link_state_info.link);
1170 	mem_free_if(renderer_context.link_state_info.target);
1171 	mem_free_if(renderer_context.link_state_info.image);
1172 	memset(&renderer_context.link_state_info, 0,
1173 	       sizeof(renderer_context.link_state_info));
1174 }
1175 
1176 static inline void
process_link(struct html_context * html_context,enum link_state link_state,unsigned char * chars,int charslen)1177 process_link(struct html_context *html_context, enum link_state link_state,
1178 	     unsigned char *chars, int charslen)
1179 {
1180 	struct part *part = html_context->part;
1181 	struct link *link;
1182 	int x_offset = 0;
1183 
1184 	switch (link_state) {
1185 	case LINK_STATE_SAME: {
1186 		unsigned char *name;
1187 
1188 		if (!part->document) return;
1189 
1190 		assertm(part->document->nlinks > 0, "no link");
1191 		if_assert_failed return;
1192 
1193 		link = &part->document->links[part->document->nlinks - 1];
1194 
1195 		name = get_link_name(link);
1196 		if (name) {
1197 			unsigned char *new_name;
1198 
1199 			new_name = straconcat(name, chars, NULL);
1200 			if (new_name) {
1201 				mem_free(name);
1202 				link->data.name = new_name;
1203 			}
1204 		}
1205 
1206 		/* FIXME: Concatenating two adjectent <a> elements to a single
1207 		 * link is broken since we lose the event handlers for the
1208 		 * second one.  OTOH simply appending them here won't fly since
1209 		 * we may get here multiple times for even a single link. We
1210 		 * will probably need some SP_ for creating a new link or so.
1211 		 * --pasky */
1212 
1213 		break;
1214 	}
1215 
1216 	case LINK_STATE_NEW:
1217 		part->link_num++;
1218 
1219 		init_link_state_info(format.link, format.target,
1220 				     format.image, format.form);
1221 		if (!part->document) return;
1222 
1223 		/* Trim leading space from the link text */
1224 		while (x_offset < charslen && chars[x_offset] <= ' ')
1225 			x_offset++;
1226 
1227 		if (x_offset) {
1228 			charslen -= x_offset;
1229 			chars += x_offset;
1230 		}
1231 
1232 		link = new_link(html_context, chars, charslen);
1233 		if (!link) return;
1234 
1235 		break;
1236 
1237 	case LINK_STATE_NONE:
1238 	default:
1239 		INTERNAL("bad link_state %i", (int) link_state);
1240 		return;
1241 	}
1242 
1243 	/* Add new canvas positions to the link. */
1244 	if (realloc_points(link, link->npoints + charslen)) {
1245 		struct point *point = &link->points[link->npoints];
1246 		int x = X(part->cx) + x_offset;
1247 		int y = Y(part->cy);
1248 
1249 		link->npoints += charslen;
1250 
1251 		for (; charslen > 0; charslen--, point++, x++) {
1252 			point->x = x;
1253 			point->y = y;
1254 		}
1255 	}
1256 }
1257 
1258 static inline enum link_state
get_link_state(struct html_context * html_context)1259 get_link_state(struct html_context *html_context)
1260 {
1261 	enum link_state state;
1262 
1263 	if (!(format.link || format.image || format.form)) {
1264 		state = LINK_STATE_NONE;
1265 
1266 	} else if ((renderer_context.link_state_info.link
1267 		    || renderer_context.link_state_info.image
1268 		    || renderer_context.link_state_info.form)
1269 		   && !xstrcmp(format.link, renderer_context.link_state_info.link)
1270 		   && !xstrcmp(format.target, renderer_context.link_state_info.target)
1271 		   && !xstrcmp(format.image, renderer_context.link_state_info.image)
1272 		   && format.form == renderer_context.link_state_info.form) {
1273 
1274 		return LINK_STATE_SAME;
1275 
1276 	} else {
1277 		state = LINK_STATE_NEW;
1278 	}
1279 
1280 	done_link_state_info();
1281 
1282 	return state;
1283 }
1284 
1285 static inline int
html_has_non_space_chars(unsigned char * chars,int charslen)1286 html_has_non_space_chars(unsigned char *chars, int charslen)
1287 {
1288 	int pos = 0;
1289 
1290 	while (pos < charslen)
1291 		if (!isspace(chars[pos++]))
1292 			return 1;
1293 
1294 	return 0;
1295 }
1296 
1297 void
put_chars(struct html_context * html_context,unsigned char * chars,int charslen)1298 put_chars(struct html_context *html_context, unsigned char *chars, int charslen)
1299 {
1300 	enum link_state link_state;
1301 	int update_after_subscript = renderer_context.subscript;
1302 	struct part *part;
1303 
1304 	assert(html_context);
1305 	if_assert_failed return;
1306 
1307 	part = html_context->part;
1308 
1309 	assert(part);
1310 	if_assert_failed return;
1311 
1312 	assert(chars && charslen);
1313 	if_assert_failed return;
1314 
1315 	/* If we are not handling verbatim aligning and we are at the begining
1316 	 * of a line trim whitespace. */
1317 	if (part->cx == -1) {
1318 		/* If we are not handling verbatim aligning trim leading
1319 		 * whitespaces. */
1320 		if (!html_is_preformatted()) {
1321 			while (charslen && *chars == ' ') {
1322 				chars++;
1323 				charslen--;
1324 			}
1325 
1326 			if (charslen < 1) return;
1327 		}
1328 
1329 		part->cx = par_format.leftmargin;
1330 	}
1331 
1332 	/* For preformatted html always update 'the last tag' so we never end
1333 	 * up moving tags to the wrong line (Fixes bug 324). For all other html
1334 	 * it is moved only when the line being rendered carry some real
1335 	 * non-whitespace content. */
1336 	if (html_is_preformatted()
1337 	    || html_has_non_space_chars(chars, charslen)) {
1338 		renderer_context.last_tag_for_newline = (struct tag *) &part->document->tags;
1339 	}
1340 
1341 	int_lower_bound(&part->box.height, part->cy + 1);
1342 
1343 	link_state = get_link_state(html_context);
1344 
1345 	if (link_state == LINK_STATE_NEW) {
1346 		int x_offset = 0;
1347 
1348 		/* Don't add inaccessible links. It seems to be caused
1349 		 * by the parser putting a space char after stuff like
1350 		 * <img>-tags or comments wrapped in <a>-tags. See bug
1351 		 * 30 for test case. */
1352 		while (x_offset < charslen && chars[x_offset] <= ' ')
1353 			x_offset++;
1354 
1355 		/* For pure spaces reset the link state */
1356 		if (x_offset == charslen)
1357 			link_state = LINK_STATE_NONE;
1358 		else if (html_context->options->links_numbering)
1359 			put_link_number(html_context);
1360 	}
1361 
1362 	set_hline(html_context, chars, charslen, link_state);
1363 
1364 	if (link_state != LINK_STATE_NONE) {
1365 
1366 #define is_drawing_subs_or_sups() \
1367 		((format.style.attr & AT_SUBSCRIPT \
1368 		  && html_context->options->display_subs) \
1369 		 || (format.style.attr & AT_SUPERSCRIPT \
1370 		     && html_context->options->display_sups))
1371 
1372 		/* We need to update the current @link_state because <sub> and
1373 		 * <sup> tags will output to the canvas using an inner
1374 		 * put_chars() call which results in their process_link() call
1375 		 * will ``update'' the @link_state. */
1376 		if (link_state == LINK_STATE_NEW
1377 		    && (is_drawing_subs_or_sups()
1378 			|| update_after_subscript != renderer_context.subscript)) {
1379 			link_state = get_link_state(html_context);
1380 		}
1381 
1382 #undef is_drawing_subs_or_sups
1383 
1384 		process_link(html_context, link_state, chars, charslen);
1385 	}
1386 
1387 	if (renderer_context.nowrap
1388 	    && part->cx + charslen > overlap(par_format))
1389 		return;
1390 
1391 	part->cx += charslen;
1392 	renderer_context.nobreak = 0;
1393 
1394 	if (!(html_context->options->wrap || html_is_preformatted())) {
1395 		while (part->cx > overlap(par_format)
1396 		       && part->cx > par_format.leftmargin) {
1397 			int x = split_line(html_context);
1398 
1399 			if (!x) break;
1400 			if (part->document)
1401 				align_line(html_context, part->cy - 1, 0);
1402 			renderer_context.nobreak = !!(x - 1);
1403 		}
1404 	}
1405 
1406 	assert(charslen > 0);
1407 	part->xa += charslen;
1408 	int_lower_bound(&part->max_width, part->xa
1409 			+ par_format.leftmargin + par_format.rightmargin
1410 			- (chars[charslen - 1] == ' '
1411 			   && !html_is_preformatted()));
1412 	return;
1413 
1414 }
1415 
1416 #undef overlap
1417 
1418 void
line_break(struct html_context * html_context)1419 line_break(struct html_context *html_context)
1420 {
1421 	struct part *part;
1422 	struct tag *tag;
1423 
1424 	assert(html_context);
1425 	if_assert_failed return;
1426 
1427 	part = html_context->part;
1428 
1429 	assert(part);
1430 	if_assert_failed return;
1431 
1432 	int_lower_bound(&part->box.width, part->cx + par_format.rightmargin);
1433 
1434 	if (renderer_context.nobreak) {
1435 		renderer_context.nobreak = 0;
1436 		part->cx = -1;
1437 		part->xa = 0;
1438 		return;
1439 	}
1440 
1441 	if (!part->document || !part->document->data) goto end;
1442 
1443 	if (!realloc_lines(part->document, part->box.height + part->cy + 1))
1444 		return;
1445 
1446 	if (part->cx > par_format.leftmargin && LEN(part->cy) > part->cx - 1
1447 	    && POS(part->cx - 1, part->cy).data == ' ') {
1448 		del_chars(html_context, part->cx - 1, part->cy);
1449 		part->cx--;
1450 	}
1451 
1452 	if (part->cx > 0) align_line(html_context, part->cy, 1);
1453 
1454 	for (tag = renderer_context.last_tag_for_newline;
1455 	     tag && tag != (struct tag *) &part->document->tags;
1456 	     tag = tag->prev) {
1457 		tag->x = X(0);
1458 		tag->y = Y(part->cy + 1);
1459 	}
1460 
1461 end:
1462 	part->cy++;
1463 	part->cx = -1;
1464 	part->xa = 0;
1465    	memset(part->spaces, 0, part->spaces_len);
1466 }
1467 
1468 static void
html_special_form(struct part * part,struct form * form)1469 html_special_form(struct part *part, struct form *form)
1470 {
1471 	struct form *nform;
1472 
1473 	assert(part && form);
1474 	assert(form->form_num > 0);
1475 	assert(form->form_end == INT_MAX);
1476 	if_assert_failed return;
1477 
1478 	if (!part->document) {
1479 		done_form(form);
1480 		return;
1481 	}
1482 
1483 	/* Make a fake form with form_num == 0 so that there is
1484 	 * something to use if form controls appear above the first
1485 	 * actual FORM element.  There can never be a real form with
1486 	 * form_num == 0 because the form_num is the position after the
1487 	 * "<form" characters and that's already five characters.  The
1488 	 * fake form does not have a name, and it gets a form_view and
1489 	 * becomes visible to ECMAScript only if it actually has
1490 	 * controls in it.  */
1491 	if (list_empty(part->document->forms)) {
1492 		nform = init_form();
1493 		if (!nform) {
1494 			done_form(form);
1495 			return;
1496 		}
1497 		nform->form_num = 0;
1498 		add_to_list(part->document->forms, nform);
1499 	}
1500 
1501 	/* Make sure the new form ``claims'' its slice of the form range
1502 	 * maintained in the form_num and form_end variables. */
1503 	foreach (nform, part->document->forms) {
1504 		if (form->form_num < nform->form_num
1505 		    || nform->form_end < form->form_num)
1506 			continue;
1507 
1508 		/* First check if the form has identical form numbers.
1509 		 * That should only be the case when the form being
1510 		 * added is in fact the same form in which case it
1511 		 * should be dropped. The fact that this can happen
1512 		 * suggests that the table renderering can be confused.
1513 		 * See bug 647 for a test case.
1514 		 * Do not compare form->form_end here because it is
1515 		 * normally set by this function and that has obviously
1516 		 * not yet been done.  */
1517 		if (nform->form_num == form->form_num) {
1518 			done_form(form);
1519 			return;
1520 		}
1521 
1522 		/* The form start is inside an already added form, so
1523 		 * partition the space of the existing form and get
1524 		 * |old|new|. */
1525 		form->form_end = nform->form_end;
1526 		nform->form_end = form->form_num - 1;
1527 		assertm(nform->form_num <= nform->form_end,
1528 			"[%d:%d] [%d:%d]", nform->form_num, nform->form_end,
1529 			form->form_num, form->form_end);
1530 		add_to_list(part->document->forms, form);
1531 		return;
1532 	}
1533 
1534 	ERROR("hole between forms");
1535 	done_form(form);
1536 	return;
1537 }
1538 
1539 static void
html_special_form_control(struct part * part,struct form_control * fc)1540 html_special_form_control(struct part *part, struct form_control *fc)
1541 {
1542 	struct form *form;
1543 
1544 	assert(part && fc);
1545 	if_assert_failed return;
1546 
1547 	if (!part->document) {
1548 		done_form_control(fc);
1549 		mem_free(fc);
1550 		return;
1551 	}
1552 
1553 	fc->g_ctrl_num = renderer_context.g_ctrl_num++;
1554 
1555 	/* We don't want to recode hidden fields. */
1556 	if (fc->type == FC_TEXT || fc->type == FC_PASSWORD ||
1557 	    fc->type == FC_TEXTAREA) {
1558 		unsigned char *dv = convert_string(renderer_context.convert_table,
1559 						   fc->default_value,
1560 						   strlen(fc->default_value),
1561 						   part->document->options.cp,
1562 						   CSM_QUERY, NULL, NULL, NULL);
1563 
1564 		if (dv) mem_free_set(&fc->default_value, dv);
1565 	}
1566 
1567 	if (list_empty(part->document->forms)) {
1568 		/* No forms encountered yet, that means a homeless form
1569 		 * control. Generate a dummy form for those Flying
1570 		 * Dutchmans. */
1571 		form = init_form();
1572 		form->form_num = 0;
1573 		add_to_list(part->document->forms, form);
1574 	}
1575 	/* Attach this form control to the last form encountered. */
1576 	form = part->document->forms.next;
1577 	fc->form = form;
1578 	add_to_list(form->items, fc);
1579 }
1580 
1581 #ifdef CONFIG_DEBUG
1582 /** Assert that each form in the list has a different form.form_num
1583  * ... form.form_end range and that the ranges are contiguous and
1584  * together cover all numbers from 0 to INT_MAX.  Alternatively, the
1585  * whole list may be empty.  This function can be called from a
1586  * debugger, or automatically from some places.
1587  *
1588  * This function may leave assert_failed = 1; the caller must use
1589  * if_assert_failed.  */
1590 static void
assert_forms_list_ok(struct list_head * forms)1591 assert_forms_list_ok(struct list_head *forms)
1592 {
1593 	int saw_form_num_0 = 0;
1594 	struct form *outer;
1595 
1596 	if (list_empty(*forms)) return;
1597 
1598 	/* O(n^2) algorithm, but it's only for debugging.  */
1599 	foreach (outer, *forms) {
1600 		int followers = 0;
1601 		struct form *inner;
1602 
1603 		if (outer->form_num == 0)
1604 			saw_form_num_0++;
1605 
1606 		foreach (inner, *forms) {
1607 			assert(inner == outer
1608 			       || inner->form_num > outer->form_end
1609 			       || outer->form_num > inner->form_end);
1610 			if (outer->form_end == inner->form_num - 1)
1611 				followers++;
1612 		}
1613 
1614 		if (outer->form_end == INT_MAX)
1615 			assert(followers == 0);
1616 		else
1617 			assert(followers == 1);
1618 	}
1619 
1620 	assert(saw_form_num_0 == 1);
1621 }
1622 #else  /* !CONFIG_DEBUG */
1623 # define assert_forms_list_ok(forms) ((void) 0)
1624 #endif /* !CONFIG_DEBUG */
1625 
1626 /* Reparents form items based on position in the source. */
1627 void
check_html_form_hierarchy(struct part * part)1628 check_html_form_hierarchy(struct part *part)
1629 {
1630 	struct document *document = part->document;
1631 	INIT_LIST_HEAD(form_controls);
1632 	struct form *form;
1633 	struct form_control *fc, *next;
1634 
1635 	if (list_empty(document->forms))
1636 		return;
1637 
1638 	assert_forms_list_ok(&document->forms);
1639 	if_assert_failed {}
1640 
1641 	/* Take out all badly placed form items. */
1642 
1643 	foreach (form, document->forms) {
1644 
1645 		assertm(form->form_num <= form->form_end,
1646 			"%p [%d : %d]", form, form->form_num, form->form_end);
1647 
1648 		foreachsafe (fc, next, form->items) {
1649 			if (form->form_num <= fc->position
1650 			    && fc->position <= form->form_end)
1651 				continue;
1652 
1653 			move_to_top_of_list(form_controls, fc);
1654 		}
1655 	}
1656 
1657 	/* Re-insert the form items the correct places. */
1658 
1659 	foreachsafe (fc, next, form_controls) {
1660 
1661 		foreach (form, document->forms) {
1662 			if (fc->position < form->form_num
1663 			    || form->form_end < fc->position)
1664 				continue;
1665 
1666 			fc->form = form;
1667 			move_to_top_of_list(form->items, fc);
1668 			break;
1669 		}
1670 	}
1671 
1672 	assert(list_empty(form_controls));
1673 }
1674 
1675 static inline void
color_link_lines(struct html_context * html_context)1676 color_link_lines(struct html_context *html_context)
1677 {
1678 	struct document *document = html_context->part->document;
1679 	struct color_pair colors = INIT_COLOR_PAIR(par_format.bgcolor, 0x0);
1680 	enum color_mode color_mode = document->options.color_mode;
1681 	enum color_flags color_flags = document->options.color_flags;
1682 	int y;
1683 
1684 	for (y = 0; y < document->height; y++) {
1685 		int x;
1686 
1687 		for (x = 0; x < document->data[y].length; x++) {
1688 			struct screen_char *schar = &document->data[y].chars[x];
1689 
1690 			set_term_color(schar, &colors, color_flags, color_mode);
1691 
1692 			/* XXX: Entering hack zone! Change to clink color after
1693 			 * link text has been recolored. */
1694 			if (schar->data == ':' && colors.foreground == 0x0)
1695 				colors.foreground = format.clink;
1696 		}
1697 
1698 		colors.foreground = 0x0;
1699 	}
1700 }
1701 
1702 static void *
html_special(struct html_context * html_context,enum html_special_type c,...)1703 html_special(struct html_context *html_context, enum html_special_type c, ...)
1704 {
1705 	va_list l;
1706 	struct part *part;
1707 	struct document *document;
1708 	void *ret_val = NULL;
1709 
1710 	assert(html_context);
1711 	if_assert_failed return NULL;
1712 
1713 	part = html_context->part;
1714 
1715 	assert(part);
1716 	if_assert_failed return NULL;
1717 
1718 	document = part->document;
1719 
1720 	va_start(l, c);
1721 	switch (c) {
1722 		case SP_TAG:
1723 			if (document) {
1724 				unsigned char *t = va_arg(l, unsigned char *);
1725 
1726 				html_special_tag(document, t, X(part->cx), Y(part->cy));
1727 			}
1728 			break;
1729 		case SP_FORM:
1730 		{
1731 			struct form *form = va_arg(l, struct form *);
1732 
1733 			html_special_form(part, form);
1734 			break;
1735 		}
1736 		case SP_CONTROL:
1737 		{
1738 			struct form_control *fc = va_arg(l, struct form_control *);
1739 
1740 			html_special_form_control(part, fc);
1741 			break;
1742 		}
1743 		case SP_TABLE:
1744 			ret_val = renderer_context.convert_table;
1745 			break;
1746 		case SP_USED:
1747 			ret_val = (void *) (long) !!document;
1748 			break;
1749 		case SP_CACHE_CONTROL:
1750 		{
1751 			struct cache_entry *cached = renderer_context.cached;
1752 
1753 			cached->cache_mode = CACHE_MODE_NEVER;
1754 			cached->expire = 0;
1755 			break;
1756 		}
1757 		case SP_CACHE_EXPIRES:
1758 		{
1759 			time_t expires = va_arg(l, time_t);
1760 			struct cache_entry *cached = renderer_context.cached;
1761 
1762 			if (!expires || cached->cache_mode == CACHE_MODE_NEVER)
1763 				break;
1764 
1765 			timeval_from_seconds(&cached->max_age, expires);
1766 			cached->expire = 1;
1767 			break;
1768 		}
1769 		case SP_FRAMESET:
1770 		{
1771 			struct frameset_param *fsp = va_arg(l, struct frameset_param *);
1772 			struct frameset_desc *frameset_desc;
1773 
1774 			if (!fsp->parent && document->frame_desc)
1775 				break;
1776 
1777 			frameset_desc = create_frameset(fsp);
1778 			if (!fsp->parent && !document->frame_desc)
1779 				document->frame_desc = frameset_desc;
1780 
1781 			ret_val = frameset_desc;
1782 			break;
1783 		}
1784 		case SP_FRAME:
1785 		{
1786 			struct frameset_desc *parent = va_arg(l, struct frameset_desc *);
1787 			unsigned char *name = va_arg(l, unsigned char *);
1788 			unsigned char *url = va_arg(l, unsigned char *);
1789 
1790 			add_frameset_entry(parent, NULL, name, url);
1791 			break;
1792 		}
1793 		case SP_NOWRAP:
1794 			renderer_context.nowrap = !!va_arg(l, int);
1795 			break;
1796 		case SP_REFRESH:
1797 		{
1798 			unsigned long seconds = va_arg(l, unsigned long);
1799 			unsigned char *t = va_arg(l, unsigned char *);
1800 
1801 			if (document) {
1802 				if (document->refresh)
1803 					done_document_refresh(document->refresh);
1804 				document->refresh = init_document_refresh(t, seconds);
1805 			}
1806 			break;
1807 		}
1808 		case SP_COLOR_LINK_LINES:
1809 			if (document && use_document_bg_colors(&document->options))
1810 				color_link_lines(html_context);
1811 			break;
1812 		case SP_STYLESHEET:
1813 #ifdef CONFIG_CSS
1814 			if (document) {
1815 				struct uri *uri = va_arg(l, struct uri *);
1816 
1817 				add_to_uri_list(&document->css_imports, uri);
1818 			}
1819 #endif
1820 			break;
1821 		case SP_SCRIPT:
1822 #ifdef CONFIG_ECMASCRIPT
1823 			if (document) {
1824 				struct uri *uri = va_arg(l, struct uri *);
1825 
1826 				add_to_uri_list(&document->ecmascript_imports, uri);
1827 			}
1828 #endif
1829 			break;
1830 	}
1831 
1832 	va_end(l);
1833 
1834 	return ret_val;
1835 }
1836 
1837 void
free_table_cache(void)1838 free_table_cache(void)
1839 {
1840 	if (table_cache) {
1841 		struct hash_item *item;
1842 		int i;
1843 
1844 		/* We do not free key here. */
1845 		foreach_hash_item (item, *table_cache, i) {
1846 			mem_free_if(item->value);
1847 		}
1848 
1849 		free_hash(table_cache);
1850 	}
1851 
1852 	table_cache = NULL;
1853 	table_cache_entries = 0;
1854 }
1855 
1856 struct part *
format_html_part(struct html_context * html_context,unsigned char * start,unsigned char * end,int align,int margin,int width,struct document * document,int x,int y,unsigned char * head,int link_num)1857 format_html_part(struct html_context *html_context,
1858 		 unsigned char *start, unsigned char *end,
1859 		 int align, int margin, int width, struct document *document,
1860 		 int x, int y, unsigned char *head,
1861 		 int link_num)
1862 {
1863 	struct part *part;
1864 	struct html_element *html_state;
1865 	int llm = renderer_context.last_link_to_move;
1866 	struct tag *ltm = renderer_context.last_tag_to_move;
1867 	int ef = renderer_context.empty_format;
1868 	int lm = html_context->margin;
1869 
1870 	/* Hash creation if needed. */
1871 	if (!table_cache) {
1872 		table_cache = init_hash(8, &strhash);
1873 	} else if (!document) {
1874 		/* Search for cached entry. */
1875 		struct table_cache_entry_key key;
1876 		struct hash_item *item;
1877 
1878 		/* Clear key to prevent potential alignment problem
1879 		 * when keys are compared. */
1880 		memset(&key, 0, sizeof(key));
1881 
1882 		key.start = start;
1883 		key.end = end;
1884 		key.align = align;
1885 		key.margin = margin;
1886 		key.width = width;
1887 		key.x = x;
1888 		key.link_num = link_num;
1889 
1890 		item = get_hash_item(table_cache,
1891 				     (unsigned char *) &key,
1892 				     sizeof(key));
1893 		if (item) { /* We found it in cache, so just copy and return. */
1894 			part = mem_alloc(sizeof(*part));
1895 			if (part)  {
1896 				copy_struct(part, &((struct table_cache_entry *)
1897 						    item->value)->part);
1898 				return part;
1899 			}
1900 		}
1901 	}
1902 
1903 	assertm(y >= 0, "format_html_part: y == %d", y);
1904 	if_assert_failed return NULL;
1905 
1906 	if (document) {
1907 		struct node *node = mem_alloc(sizeof(*node));
1908 
1909 		if (node) {
1910 			int node_width = !html_context->table_level ? INT_MAX : width;
1911 
1912 			set_box(&node->box, x, y, node_width, 1);
1913 			add_to_list(document->nodes, node);
1914 		}
1915 
1916 		renderer_context.last_link_to_move = document->nlinks;
1917 		renderer_context.last_tag_to_move = (struct tag *) &document->tags;
1918 		renderer_context.last_tag_for_newline = (struct tag *) &document->tags;
1919 	} else {
1920 		renderer_context.last_link_to_move = 0;
1921 		renderer_context.last_tag_to_move = (struct tag *) NULL;
1922 		renderer_context.last_tag_for_newline = (struct tag *) NULL;
1923 	}
1924 
1925 	html_context->margin = margin;
1926 	renderer_context.empty_format = !document;
1927 
1928 	done_link_state_info();
1929 	renderer_context.nobreak = 1;
1930 
1931 	part = mem_calloc(1, sizeof(*part));
1932 	if (!part) goto ret;
1933 
1934 	part->document = document;
1935 	part->box.x = x;
1936 	part->box.y = y;
1937 	part->cx = -1;
1938 	part->cy = 0;
1939 	part->link_num = link_num;
1940 
1941 	html_state = init_html_parser_state(html_context, ELEMENT_IMMORTAL, align, margin, width);
1942 
1943 	parse_html(start, end, part, head, html_context);
1944 
1945 	done_html_parser_state(html_context, html_state);
1946 
1947 	int_lower_bound(&part->max_width, part->box.width);
1948 
1949 	renderer_context.nobreak = 0;
1950 
1951 	done_link_state_info();
1952 	mem_free_if(part->spaces);
1953 
1954 	if (document) {
1955 		struct node *node = document->nodes.next;
1956 
1957 		node->box.height = y - node->box.y + part->box.height;
1958 	}
1959 
1960 ret:
1961 	renderer_context.last_link_to_move = llm;
1962 	renderer_context.last_tag_to_move = ltm;
1963 	renderer_context.empty_format = ef;
1964 
1965 	html_context->margin = lm;
1966 
1967 	if (html_context->table_level > 1 && !document
1968 	    && table_cache
1969 	    && table_cache_entries < MAX_TABLE_CACHE_ENTRIES) {
1970 		/* Create a new entry. */
1971 		/* Clear memory to prevent bad key comparaison due to alignment
1972 		 * of key fields. */
1973 		struct table_cache_entry *tce = mem_calloc(1, sizeof(*tce));
1974 		/* A goto is used here to prevent a test or code
1975 		 * redundancy. */
1976 		if (!tce) goto end;
1977 
1978 		tce->key.start = start;
1979 		tce->key.end = end;
1980 		tce->key.align = align;
1981 		tce->key.margin = margin;
1982 		tce->key.width = width;
1983 		tce->key.x = x;
1984 		tce->key.link_num = link_num;
1985 		copy_struct(&tce->part, part);
1986 
1987 		if (!add_hash_item(table_cache,
1988 				   (unsigned char *) &tce->key,
1989 				   sizeof(tce->key), tce)) {
1990 			mem_free(tce);
1991 		} else {
1992 			table_cache_entries++;
1993 		}
1994 	}
1995 
1996 end:
1997 
1998 	return part;
1999 }
2000 
2001 void
render_html_document(struct cache_entry * cached,struct document * document,struct string * buffer)2002 render_html_document(struct cache_entry *cached, struct document *document,
2003 		     struct string *buffer)
2004 {
2005 	struct html_context *html_context;
2006 	struct part *part;
2007 	unsigned char *start;
2008 	unsigned char *end;
2009 	struct string title;
2010 	struct string head;
2011 	int i;
2012 
2013 	assert(cached && document);
2014 	if_assert_failed return;
2015 
2016 	if (!init_string(&head)) return;
2017 
2018 	if (cached->head) add_to_string(&head, cached->head);
2019 
2020 	start = buffer->source;
2021 	end = buffer->source + buffer->length;
2022 
2023 	html_context = init_html_parser(cached->uri, &document->options,
2024 	                                start, end, &head, &title,
2025 	                                put_chars_conv, line_break,
2026 	                                html_special);
2027 	if (!html_context) return;
2028 
2029 	renderer_context.g_ctrl_num = 0;
2030 	renderer_context.cached = cached;
2031 	renderer_context.convert_table = get_convert_table(head.source,
2032 							   document->options.cp,
2033 							   document->options.assume_cp,
2034 							   &document->cp,
2035 							   &document->cp_status,
2036 							   document->options.hard_assume);
2037 
2038 	if (title.length) {
2039 		document->title = convert_string(renderer_context.convert_table,
2040 						 title.source, title.length,
2041 						 document->options.cp,
2042 						 CSM_DEFAULT, NULL, NULL, NULL);
2043 	}
2044 	done_string(&title);
2045 
2046 	part = format_html_part(html_context, start, end, par_format.align,
2047 			        par_format.leftmargin,
2048 				document->options.box.width, document,
2049 			        0, 0, head.source, 1);
2050 
2051 	/* Drop empty allocated lines at end of document if any
2052 	 * and adjust document height. */
2053 	for (i = document->height - 1; i >= 0 ; i--) {
2054 		if (!document->data[i].length) {
2055 			mem_free_if(document->data[i].chars);
2056 			document->height--;
2057 		} else break;
2058 	}
2059 
2060 	/* Calculate document width. */
2061 	document->width = 0;
2062 	for (i = 0; i < document->height; i++)
2063 		int_lower_bound(&document->width, document->data[i].length);
2064 
2065 #if 1
2066 	document->options.needs_width = 1;
2067 #else
2068 	/* FIXME: This needs more tuning since if we are centering stuff it
2069 	 * does not work. */
2070 	document->options.needs_width =
2071 				(document->width + (document->options.margin
2072 				 >= document->options.width));
2073 #endif
2074 
2075 	document->bgcolor = par_format.bgcolor;
2076 
2077 	done_html_parser(html_context);
2078 
2079 	/* Drop forms which has been serving as a placeholder for form items
2080 	 * added in the wrong order due to the ordering of table rendering. */
2081 	{
2082 		struct form *form;
2083 
2084 		foreach (form, document->forms) {
2085 			if (form->form_num)
2086 				continue;
2087 
2088 			if (list_empty(form->items))
2089 				done_form(form);
2090 
2091 			break;
2092 		}
2093 	}
2094 
2095 	/* @part was residing in html_context so it has to stay alive until
2096 	 * done_html_parser(). */
2097 	done_string(&head);
2098 	mem_free_if(part);
2099 
2100 #if 0 /* debug purpose */
2101 	{
2102 		FILE *f = fopen("forms", "ab");
2103 		struct form_control *form;
2104 		unsigned char *qq;
2105 		fprintf(f,"FORM:\n");
2106 		foreach (form, document->forms) {
2107 			fprintf(f, "g=%d f=%d c=%d t:%d\n",
2108 				form->g_ctrl_num, form->form_num,
2109 				form->ctrl_num, form->type);
2110 		}
2111 		fprintf(f,"fragment: \n");
2112 		for (qq = start; qq < end; qq++) fprintf(f, "%c", *qq);
2113 		fprintf(f,"----------\n\n");
2114 		fclose(f);
2115 	}
2116 #endif
2117 }
2118 
2119 int
find_tag(struct document * document,unsigned char * name,int namelen)2120 find_tag(struct document *document, unsigned char *name, int namelen)
2121 {
2122 	struct tag *tag;
2123 
2124 	foreach (tag, document->tags)
2125 		if (!c_strlcasecmp(tag->name, -1, name, namelen))
2126 			return tag->y;
2127 
2128 	return -1;
2129 }
2130