1 /* retawq/renderer.c - plaintext/HTML renderer
2    This file is part of retawq (<http://retawq.sourceforge.net/>), a network
3    client created by Arne Thomassen; retawq is basically released under certain
4    versions of the GNU General Public License and WITHOUT ANY WARRANTY.
5    Read the file COPYING for license details, README for program information.
6    Copyright (C) 2001-2005 Arne Thomassen <arne@arne-thomassen.de>
7 */
8 
9 /* This code is roughly part of the user interface; it's taken out for the only
10    reason that source files are smaller this way. Look how it's #include'd
11    from main.c; maybe dirty, but simple and functional...
12 */
13 
14 /* Basic operation of this renderer: the caller sets up some data (in a
15    tRendererData structure); the renderer scans/parses the given content and
16    builds text lines from that; each single line is fed to the caller's "line
17    callback" function which "consumes" the lines and tells the renderer
18    whether further lines should be generated. This is, so far, the most
19    correct/clean/fast rendering concept I was able to render. :-) */
20 
21 #define renderer_interface static /* (currently) */
22 
23 #define HTML_FORMS_SLOPPY 1
24 /* Grumble... Some web page authors write rubbish like
25      <tr><form action="/search.php3"><td> or
26      <table><form action="...."><tr>
27    which is explicitly forbidden by htmlspec, e.g.
28      <!ELEMENT TR       - O (TH|TD)+        -- table row -->
29    ...and then users will blame retawq because they can't submit that form.
30 */
31 
32 my_enum1 enum
33 { raUnknown = 0, raLayout = 1, raCalcWidth = 2
34 } my_enum2(unsigned char) tRendererAction;
35 
36 my_enum1 enum
37 { rdfNone = 0, rdfCallerDone = 0x01, rdfRendererDone = 0x02,
38   rdfOutOfContent = 0x04, rdfVirtual = 0x08, rdfHtml = 0x10,
39   rdfCalledParserHtmlStart = 0x20, rdfAe = 0x40,
40 #if TGC_IS_GRAPHICS
41   rdfGraphical = 0x80, /* create graphical widgets for active elements */
42 #endif
43 #if TGC_IS_PIXELING
44   rdfPixeling = 0x100, /* measure width/height in pixels, not in characters */
45 #endif
46 #if CONFIG_HTML & HTML_FRAMES
47   rdfFrames = 0x200, /* build "real" frames */
48 #endif
49   rdfFinal = 0x400, rdfAttributes = 0x800,
50 #if MIGHT_USE_COLORS
51   rdfColors = 0x1000,
52 #endif
53   rdfAlignment = 0x2000
54 } my_enum2(unsigned short) tRendererDataFlags;
55 #define rdfAnyDone (rdfCallerDone | rdfRendererDone)
56 
57 typedef unsigned char tRendererText;
58 typedef attr_t tRendererAttr;
59 
60 typedef struct tRendererElement
61 { struct tRendererElement* next;
62   tRendererText* text;
63   tRendererAttr* attr;
64   size_t textcount;
65   tBoolean is_spacer;
66 } tRendererElement;
67 
68 struct tRendererData;
69 typedef void (*tRendererLineCallback)(struct tRendererData*);
70 
71 typedef struct tRendererData
72 { void* rsd; /* "renderer-specific data", e.g. "tRendererHtmlData*" */
73   tRendererLineCallback line_callback; /*CS*/
74   void* line_callback_data; /*CS*/
75   tBrowserDocument* document; /*CS*/
76   const tRendererElement* element; /*CR*/
77   tRendererText* inttext;
78   tRendererAttr* intattr;
79   tActiveElementNumber* intaenum;
80   size_t inttextlen;
81   size_t line_width; /*CS*/
82   tCoordinate resulting_line_width; /* (CR for raCalcWidth) */
83   tRendererDataFlags flags; /*CS*/
84   tRendererAction ra; /*CS*/
85 } tRendererData;
86 /* Fields marked "CS" must/may be set by the caller; fields marked "CR" may
87    be read by the caller; all other fields are private to the renderer. */
88 
89 #define is_html(data) ((data)->flags & rdfHtml)
90 
91 typedef struct
92 { size_t count; /* for ordered lists */
93   tRendererText symbol; /* for unordered lists */
94   unsigned char depth;
95   tBoolean is_ordered;
96 } tHtmlListBase;
97 
98 typedef signed int tHtmlNestingNumber; /* ("signed" for simplicity only) */
99 
100 my_enum1 enum
101 { hnestfNone = 0, hnestfBold = 0x01, hnestfUnderlined = 0x02,
102   hnestfAeStyle = 0x04, hnestfForbidPre = 0x08, hnestfInsidePre = 0x10
103 } my_enum2(unsigned char) tHtmlNestingFlags;
104 /* ("hnestf" to disambiguate from "hnf" (tHtmlNodeFlags) clearly) */
105 
106 typedef struct
107 { const char* unknown_tagname; /* for htkInvalid */
108   tHtmlNestingNumber li_recalc;
109   size_t li_offset;
110   tRendererAttr currattr;
111   tHtmlNodeFlags align; /* contains only hnfAlignAny flags */
112   tHtmlTagKind htk;
113   tHtmlNestingFlags hnestf;
114   unsigned char listdepth;
115 } tHtmlNesting;
116 
117 my_enum1 enum
118 { rhdfNone = 0, rhdfAtParStart = 0x01, rhdfIsFirstThTd = 0x02
119 } my_enum2(unsigned char) tRendererHtmlDataFlags;
120 
121 typedef struct
122 { tHtmlNesting* nesting;
123   tHtmlNestingNumber numnest, maxnest, p_level, table_level, ae_level,
124     form_level;
125   tActiveElementNumber _ae, __ae;
126   tHtmlFormNumber _hfn, __hfn;
127   tLinenumber currline;
128   tRendererHtmlDataFlags flags;
129 } tRendererHtmlData;
130 
131 
132 /* Helper functions */
133 
renderer_deallocate_rsd(tRendererData * data)134 static one_caller void renderer_deallocate_rsd(tRendererData* data)
135 { const void* _rsd = data->rsd;
136   if (_rsd == NULL) return; /* nothing to do */
137   if (is_html(data))
138   { const tRendererHtmlData* rsd = (const tRendererHtmlData*) _rsd;
139     __dealloc(rsd->nesting);
140   }
141   memory_deallocate(_rsd); data->rsd = NULL;
142 }
143 
renderer_deliver_line(tRendererData * data)144 static __my_inline void renderer_deliver_line(tRendererData* data)
145 /* delivers one line of text to the callback */
146 { (data->line_callback)(data);
147 }
148 
find_attribute(const tHtmlNode * node,tAttributeName name)149 static const tAttribute* find_attribute(const tHtmlNode* node,
150   tAttributeName name)
151 { const tAttribute* retval = NULL;
152   if (node->kind != htkText)
153   { const tAttribute* a = (const tAttribute*) (node->data);
154     while (a != NULL)
155     { if (a->name == name) { retval = a; break; }
156       a = a->next;
157     }
158   }
159   return(retval);
160 }
161 
162 
163 /* Plaintext renderer */
164 
plaintext_deliver_line(tRendererData * data,tRendererText * text,size_t * _len)165 static tBoolean plaintext_deliver_line(tRendererData* data,
166   tRendererText* text, size_t* _len)
167 { size_t len = *_len, line_width = data->line_width, using;
168   if (len < line_width) using = len; /* use the whole text */
169   else /* try to break the line at a space character */
170   { size_t si = line_width - 1; /* "split-index" */
171     while ( (si > 0) && (text[si] != ' ') ) si--;
172     if (si > 0) using = si;
173     else using = line_width - 1; /* a whole line of unbreakable text */
174   }
175   switch (data->ra)
176   { case raLayout:
177       if (data->flags & rdfVirtual) data->element = NULL;
178       else
179       { static tBoolean did_init = falsE;
180         static tRendererElement element;
181         if (!did_init) { my_memclr_var(element); did_init = truE; }
182         element.text = text; element.textcount = using;
183         data->element = &element;
184       }
185       break;
186     case raCalcWidth: data->resulting_line_width = using; break;
187   }
188   renderer_deliver_line(data);
189   while ( (using < len) && (text[using] == ' ') ) using++;
190   if (using >= len) len = 0; /* all was used */
191   else
192   { size_t count;
193     for (count = 0; count < len - using; count++)
194       text[count] = text[count + using];
195     len -= using;
196   }
197   *_len = len;
198   return(cond2boolean(!(data->flags & rdfCallerDone)));
199 }
200 
201 #define plaintext_append_char(ch) \
202   do \
203   { inttext[inttextlen++] = (tRendererText) ch; \
204     if (inttextlen > line_width) \
205     { if (!plaintext_deliver_line(data, inttext, &inttextlen)) goto out; } \
206   } while (0)
207 
renderer_plaintext(tRendererData * data)208 static one_caller void renderer_plaintext(tRendererData* data)
209 { const tContentblock* content = data->document->cantent->content;
210   const char* contentdata;
211   const size_t line_width = data->line_width;
212   size_t contentsize, pos = 0, inttextlen = 0,
213     maxinttextlen = line_width + 8 + 4;
214   tRendererText* inttext = __memory_allocate(maxinttextlen *
215     sizeof(tRendererText), mapRendering);
216   unsigned char c;
217   char ch;
218 
219   content_recalc: contentdata = content->data; contentsize = content->used;
220   loop:
221   if (pos >= contentsize)
222   { content = content->next; pos = 0;
223     if (content != NULL) goto content_recalc;
224     else
225     { data->flags |= rdfOutOfContent;
226       while ( (inttextlen > 0) &&
227               (plaintext_deliver_line(data, inttext, &inttextlen)) )
228       { /* deliver any remaining text */ }
229       data->flags |= rdfRendererDone; goto out;
230     }
231   }
232   ch = contentdata[pos++]; c = (unsigned char) ch;
233   if (is_bad_uchar(c))
234   { if (ch == '\n')
235     { if (!plaintext_deliver_line(data, inttext, &inttextlen)) goto out; }
236     else if (ch == '\t')
237     { unsigned char count = 8;
238       while (count-- > 0) plaintext_append_char(' ');
239     }
240     else if (ch != '\r') { ch = '?'; goto do_append; }
241   }
242   else { do_append: plaintext_append_char(ch); }
243   goto loop;
244 
245   out:
246   memory_deallocate(inttext);
247 }
248 
249 #undef plaintext_append_char
250 
251 
252 /* HTML renderer */
253 
254 #define html_currnest(rsd) ((rsd)->numnest - 1)
255 #define html_nestdata(what) (rsd->nesting[html_currnest(rsd)].what)
256 #define html_hnestf_or(value) \
257   do { html_nestdata(hnestf) |= (value); html_recalc_currattr(data); } while(0)
258 
259 #define html_inside_pre_tag(rsd) (html_nestdata(hnestf) & hnestfInsidePre)
260 #define html_do_ae(data) ((data)->flags & rdfAe)
261 #define html_do_attr(data) ((data)->flags & rdfAttributes)
262 #if MIGHT_USE_COLORS
263 #define html_do_colors(data) ((data)->flags & rdfColors)
264 #endif
265 #define set_form_data /* (currently nothing to do) */
266 
html_line_width(tRendererData * data)267 static my_inline size_t html_line_width(tRendererData* data)
268 { const tRendererHtmlData* rsd = (const tRendererHtmlData*) data->rsd;
269   return(data->line_width - html_nestdata(li_offset));
270 }
271 
html_recalc_currattr(tRendererData * data)272 static void html_recalc_currattr(tRendererData* data)
273 { if (html_do_attr(data))
274   { tRendererHtmlData* rsd = (tRendererHtmlData*) data->rsd;
275     const tHtmlNestingFlags hnestf = html_nestdata(hnestf);
276     tRendererAttr attr = 0;
277     if (hnestf & hnestfBold) attr |= A_BOLD;
278     if (hnestf & hnestfUnderlined) attr |= A_UNDERLINE;
279     if (hnestf & hnestfAeStyle)
280     { attr |= A_UNDERLINE;
281 #if (TGC_IS_CURSES) && (MIGHT_USE_COLORS)
282       if (html_do_colors(data)) attr |= my_color_attr(cpnBlue);
283 #endif
284     }
285     html_nestdata(currattr) = attr;
286   }
287 }
288 
html_form_off(tRendererHtmlData * rsd)289 static my_inline void html_form_off(tRendererHtmlData* rsd)
290 { rsd->form_level = 0; rsd->_hfn = INVALID_HTML_FORM_NUMBER; }
291 
__html_finish_ae(tRendererHtmlData * rsd)292 static my_inline void __html_finish_ae(tRendererHtmlData* rsd)
293 { rsd->ae_level = 0; rsd->_ae = INVALID_AE; }
294 
html_finish_ae(tRendererData * data)295 static void html_finish_ae(tRendererData* data)
296 { tRendererHtmlData* rsd = (tRendererHtmlData*) data->rsd;
297   __html_finish_ae(rsd);
298   html_nestdata(hnestf) &= ~hnestfAeStyle;
299   html_recalc_currattr(data);
300 }
301 
__html_deliver_line(tRendererData * data,tHtmlNodeFlags align)302 static void __html_deliver_line(tRendererData* data, tHtmlNodeFlags align)
303 { tRendererHtmlData* rsd = (tRendererHtmlData*) data->rsd;
304   size_t inttextlen = data->inttextlen, using, alignoff,
305     line_width = html_line_width(data);
306   tRendererText* inttext = data->inttext;
307   tBoolean do_attr = cond2boolean(html_do_attr(data)), is_line_empty,
308     do_ae = cond2boolean(html_do_ae(data));
309   tRendererElement element, spacer;
310   tHtmlNestingNumber li_recalc;
311 
312   if (inttextlen < line_width) using = inttextlen; /* use whole text */
313   else /* try to break the line at a space character */
314   { size_t si = line_width - 1; /* "split-index" */
315     while ( (si > 0) && (inttext[si] != ' ') ) si--;
316     if (si > 0) using = si;
317     else using = line_width - 1; /* a whole line of unbreakable text */
318   }
319   while ( (using > 0) && (inttext[using - 1] == ' ') ) using--;
320     /* remove trailing whitespace; e.g. necessary for empty-line decision and
321        also nice when dumping into a file */
322   is_line_empty = cond2boolean(!(using > 0));
323 
324   alignoff = html_nestdata(li_offset);
325   if (align)
326   { if (align & hnfAlignCenter) alignoff = (line_width - 1 - using) / 2;
327     else if (align & hnfAlignRight) alignoff = line_width - 1 - using;
328   }
329 
330   if ( (do_ae) && (using > 0) )
331   { tActiveElementNumber* intaenum = data->intaenum;
332     size_t start, count, end;
333     tActiveElementNumber _ae;
334     start = count = end = 0;
335     aeloop:
336     _ae = intaenum[count];
337     while (count < using)
338     { if (_ae != intaenum[count]) break;
339       end = count++;
340     }
341     if (_ae != INVALID_AE)
342     {
343 #if TGC_IS_CURSES
344       const tBrowserDocument* document = data->document;
345       tActiveElement* ae = &(document->active_element[_ae]);
346       tActiveElementCoordinates *ex = ae->aec, *aec =
347         memory_allocate(sizeof(tActiveElementCoordinates), mapRendering);
348       aec->y = rsd->currline;
349       aec->x1 = start + alignoff; aec->x2 = end + alignoff;
350       if (ex == NULL) ae->aec = aec;
351       else { while (ex->next != NULL) { ex = ex->next; } ex->next = aec; }
352 #endif
353     }
354     count = end + 1;
355     if (count < using) { start = count; goto aeloop; }
356   }
357   switch (data->ra)
358   { case raLayout:
359       if ( (using > 0) && (!(data->flags & rdfVirtual)) )
360       { if (alignoff > 0)
361         { my_memclr_var(spacer); spacer.textcount = alignoff;
362           spacer.is_spacer = truE; spacer.next = &element;
363           data->element = &spacer;
364         }
365         else data->element = &element;
366         my_memclr_var(element);
367         element.text = inttext; element.textcount = using;
368         if (do_attr) element.attr = data->intattr;
369       }
370       else data->element = NULL;
371       break;
372     case raCalcWidth: data->resulting_line_width = using; break;
373   }
374   if (!(data->flags & rdfCallerDone)) renderer_deliver_line(data);
375 
376   rsd->currline++;
377   if (is_line_empty) rsd->flags |= rhdfAtParStart;
378   else rsd->flags &= ~rhdfAtParStart;
379 
380   li_recalc = html_nestdata(li_recalc);
381   if (li_recalc > 0)
382   { size_t depth = (size_t) rsd->nesting[li_recalc].listdepth,
383       offset = depth * 2, full_line_width = data->line_width;
384     tHtmlNestingNumber hnn = li_recalc;
385     if (offset + 10 > full_line_width) offset = full_line_width - 10;
386     while (hnn <= html_currnest(rsd))
387     { rsd->nesting[hnn].li_recalc = 0; rsd->nesting[hnn].li_offset = offset;
388       hnn++;
389     }
390   }
391 
392   while ( (using < inttextlen) && (inttext[using] == ' ') ) using++;
393   if (using >= inttextlen) inttextlen = 0; /* all was used */
394   else
395   { size_t count;
396     for (count = 0; count < inttextlen - using; count++) /* IMPROVEME! */
397     { inttext[count] = inttext[count + using];
398       if (do_attr) data->intattr[count] = data->intattr[count + using];
399       if (do_ae) data->intaenum[count] = data->intaenum[count + using];
400     }
401     inttextlen -= using;
402   }
403   data->inttextlen = inttextlen;
404 }
405 
html_deliver_line(tRendererData * data)406 static my_inline void html_deliver_line(tRendererData* data)
407 { const tRendererHtmlData* rsd = (const tRendererHtmlData*) data->rsd;
408   __html_deliver_line(data, html_nestdata(align));
409 }
410 
html_append_charattr(tRendererData * data,tRendererText ch,tRendererAttr attr)411 static void html_append_charattr(tRendererData* data, tRendererText ch,
412   tRendererAttr attr)
413 { tRendererText* inttext = data->inttext;
414   size_t inttextlen = data->inttextlen;
415   if ((ch == ' ') && ( (inttextlen <= 0) || (inttext[inttextlen - 1] == ' ') ))
416   { /* at beginning of line or space character already present; htmlspec 9.1:
417        "user agents should collapse input white space sequences when producing
418        output inter-word space." */
419     const tRendererHtmlData* rsd = (const tRendererHtmlData*) data->rsd;
420     if (!html_inside_pre_tag(rsd)) return;
421   }
422   inttext[inttextlen] = ch;
423   if (html_do_attr(data)) data->intattr[inttextlen] = attr;
424   if (html_do_ae(data))
425     data->intaenum[inttextlen] = ((tRendererHtmlData*)(data->rsd))->_ae;
426   inttextlen++; data->inttextlen = inttextlen;
427   if (inttextlen > html_line_width(data)) html_deliver_line(data);
428 }
429 
html_append_char(tRendererData * data,tRendererText ch)430 static my_inline void html_append_char(tRendererData* data, tRendererText ch)
431 { const tRendererHtmlData* rsd = (const tRendererHtmlData*) data->rsd;
432   html_append_charattr(data, ch, html_nestdata(currattr));
433 }
434 
html_append_str(tRendererData * data,const char * str)435 static my_inline void html_append_str(tRendererData* data, const char* str)
436 { tRendererText ch;
437   while ( (ch = *str++) != '\0' ) html_append_char(data, ch);
438 }
439 
html_append_hline_char(tRendererData * data)440 static one_caller void html_append_hline_char(tRendererData* data)
441 {
442 #if (!TGC_IS_CURSES)
443   html_append_charattr(data, '-', 0);
444 #else
445   if ( (!html_do_attr(data)) || (__MY_HLINE == '-') ) /* hrm... */
446     html_append_charattr(data, '-', 0);
447   else html_append_charattr(data, __MY_HLINE, __MY_HLINE);
448 #endif
449 }
450 
451 #define for_each_hnn(hnn) for (hnn = html_currnest(rsd); hnn > 0; hnn--)
452 
html_nest(tRendererData * data,tHtmlTagKind htk,const char * utn,tHtmlNodeFlags hnf)453 static void html_nest(tRendererData* data, tHtmlTagKind htk, const char* utn,
454   tHtmlNodeFlags hnf)
455 { tRendererHtmlData* rsd = (tRendererHtmlData*) data->rsd;
456   tHtmlNestingNumber num = rsd->numnest, maxnum = rsd->maxnest;
457   tHtmlNesting *nesting = rsd->nesting, *n;
458   if (num >= maxnum)
459   { maxnum += 20; rsd->maxnest = maxnum;
460     rsd->nesting = nesting = (tHtmlNesting*)
461       memory_reallocate(nesting, maxnum * sizeof(tHtmlNesting), mapRendering);
462   }
463   n = &(nesting[num]);
464   if (num > 0)
465   { tHtmlNodeFlags align = hnf & hnfAlignAny;
466     *n = nesting[num - 1];
467     if (align) n->align = align;
468   }
469   else my_memclr_var(nesting[num]);
470   n->htk = htk; n->unknown_tagname = utn;
471   rsd->numnest++;
472   if (htk_forbids_pre(htk))
473   { html_nestdata(hnestf) &= ~hnestfInsidePre;
474     html_hnestf_or(hnestfForbidPre);
475   }
476 }
477 
html_linebreaks(tRendererData * data,tHtmlTagKind htk)478 static void html_linebreaks(tRendererData* data, tHtmlTagKind htk)
479 { if (htk_is_par(htk))
480   { const tRendererHtmlData* rsd = (const tRendererHtmlData*) data->rsd;
481     if (data->inttextlen > 0) html_deliver_line(data);
482     if (!(rsd->flags & rhdfAtParStart)) html_deliver_line(data);
483   }
484   else if (htk_is_block(htk))
485   { if (data->inttextlen > 0) html_deliver_line(data);
486   }
487 }
488 
__html_denest(tRendererData * data,tHtmlNestingNumber hnn)489 static void __html_denest(tRendererData* data, tHtmlNestingNumber hnn)
490 { tRendererHtmlData* rsd = (tRendererHtmlData*) data->rsd;
491   while (html_currnest(rsd) > hnn)
492   { const tHtmlTagKind htk = html_nestdata(htk);
493     switch (htk)
494     { case htkQ: html_append_char(data, '"'); break;
495       case htkSub: case htkSup: html_append_char(data, ')'); break;
496       case htkStrike: case htkS: case htkDel:
497         html_append_str(data, "]]"); break;
498     }
499     html_linebreaks(data, htk);
500     rsd->numnest--;
501   }
502   if (rsd->ae_level > html_currnest(rsd)) __html_finish_ae(rsd);
503   if (rsd->p_level > html_currnest(rsd)) rsd->p_level = 0;
504   if (rsd->table_level > html_currnest(rsd)) rsd->table_level = 0;
505 #if !HTML_FORMS_SLOPPY
506   if (rsd->form_level > html_currnest(rsd)) html_form_off(rsd);
507 #endif
508 }
509 
510 #define html_denest(hnn) __html_denest(data, hnn) /* abbr. */
511 
512 #define __try_denest(condition, offset) \
513   do \
514   { tHtmlNestingNumber hnn; \
515     for_each_hnn(hnn) \
516     { const tHtmlTagKind htk = rsd->nesting[hnn].htk; \
517       if (condition) { html_denest(hnn - offset); return; } \
518     } \
519   } while (0)
520 
521 #define try_denest(condition) __try_denest((condition), 0)
522 #define try_other_denest(condition) __try_denest((condition), 1)
523 
html_opening_denest(tRendererData * data,const tHtmlTagKind this_htk)524 static one_caller void html_opening_denest(tRendererData* data,
525   const tHtmlTagKind this_htk)
526 /* Certain opening tags cause auto-closing of certain former tags (mostly
527    related to htfAllowEndtag tags). */
528 { tRendererHtmlData* rsd = (tRendererHtmlData*) data->rsd;
529   tHtmlNestingNumber p_level = rsd->p_level;
530   if ( (p_level > 0) && (htk_is_block(this_htk)) )
531   { /* htmlspec 9.3.1 says: "The P element [...] cannot contain block-level
532        elements [...]." */
533     html_denest(p_level - 1); rsd->p_level = 0;
534   }
535   switch (this_htk)
536   { case htkHtml: /* close "everything" */
537       denest_everything: html_denest(0); break;
538     case htkHead: case htkBody: /* close everything but a htkHtml */
539       try_denest(htk == htkHtml);
540       /* didn't find a former htkHtml, thus: */
541       goto denest_everything; /*@notreached@*/ break;
542     case htkLi:
543       try_denest((htk == htkUl) || (htk == htkOl));
544       try_other_denest(htk == htkLi); break;
545     case htkTd: case htkTh:
546       try_denest((htk == htkTr) || (htk == htkTable));
547       try_other_denest((htk == htkTd) || (htk == htkTh)); break;
548     case htkTr:
549       try_denest(htk == htkTable); try_other_denest(htk == htkTr); break;
550     case htkOption:
551       try_denest((htk == htkSelect) || (htk == htkForm));
552       try_other_denest(htk == htkOption); break;
553     case htkDd: case htkDt:
554       try_denest(htk == htkDl);
555       try_other_denest((htk == htkDd) || (htk == htkDt)); break;
556   }
557 }
558 
html_nesting_opener(tRendererData * data,const tHtmlNode * node)559 static one_caller void html_nesting_opener(tRendererData* data,
560   const tHtmlNode* node)
561 { tRendererHtmlData* rsd = (tRendererHtmlData*) data->rsd;
562   const tHtmlTagKind htk = node->kind;
563   if (!htk_forbids_endtag(htk))
564   { const char* utn = NULL;
565     if (htk == htkInvalid)
566     { const tAttribute* a = find_attribute(node, anInternalTagname);
567       if (a != NULL) utn = a->value;
568     }
569     html_opening_denest(data, htk);
570     html_linebreaks(data, htk);
571     html_nest(data, htk, utn, node->flags);
572     if (htk == htkP) rsd->p_level = html_currnest(rsd);
573   }
574   else html_linebreaks(data, htk);
575   if ( (html_do_ae(data)) && (node->flags & hnfHasAeBase) )
576   { tBrowserDocument* document = data->document;
577     tActiveElement* aes = document->active_element;
578     const tCantent* cantent = document->cantent;
579     const tActiveElementBase* aebase = cantent->aebase;
580     tActiveElementNumber aenum = document->aenum, aemax = document->aemax, _ae;
581     tActiveElementKind aek;
582     rsd->__ae++; _ae = rsd->__ae; aek = aebase[_ae].kind;
583     if (aek == aekFormHidden) html_finish_ae(data);
584     else
585     { rsd->_ae = _ae; rsd->ae_level = html_currnest(rsd);
586       html_hnestf_or(hnestfAeStyle);
587     }
588     if (_ae >= aenum)
589     { tHtmlFormNumber hfn;
590       if (aenum >= aemax)
591       { aemax += aenum_incvalue(aemax); document->aemax = aemax;
592         aes = document->active_element = memory_reallocate(aes, aemax *
593           sizeof(tActiveElement), mapRendering);
594       }
595       aenum++; document->aenum = aenum;
596       init_ae(&(aebase[_ae]), &(aes[_ae]), truE, document);
597       if ( (is_form_aek(aek)) && ((hfn=rsd->_hfn) != INVALID_HTML_FORM_NUMBER))
598       { tHtmlForm* f = &(cantent->form[hfn]);
599         tActiveElementNumber last = f->last_ae;
600         if (f->first_ae == INVALID_AE) f->first_ae = _ae;
601         if ( (last == INVALID_AE) || (last < _ae) ) f->last_ae = _ae;
602       }
603     }
604   }
605 }
606 
html_nesting_closer(tRendererData * data,const tHtmlNode * node)607 static one_caller void html_nesting_closer(tRendererData* data,
608   const tHtmlNode* node)
609 { tRendererHtmlData* rsd = (tRendererHtmlData*) (data->rsd);
610   const tHtmlTagKind this_htk = node->kind;
611   if (this_htk != htkInvalid)
612   {
613 #if HTML_FORMS_SLOPPY
614     if (this_htk == htkForm) html_form_off(rsd); /* found explicit </form> */
615 #endif
616     try_other_denest(htk == this_htk);
617   }
618   else
619   { const tAttribute* attr = find_attribute(node, anInternalTagname);
620     const char *utn, *utn2;
621     if ( (attr != NULL) && ( (utn = attr->value) != NULL ) )
622     { const tHtmlNesting* nesting = rsd->nesting;
623       try_other_denest( (htk == htkInvalid) &&
624         ( (utn2 = nesting[hnn].unknown_tagname) != NULL ) &&
625         (!strcmp(utn, utn2)) );
626     }
627   }
628 }
629 
renderer_html(tRendererData * data)630 static one_caller void renderer_html(tRendererData* data)
631 { tRendererHtmlData* rsd = (tRendererHtmlData*) data->rsd;
632   const tBoolean do_ae = cond2boolean(html_do_ae(data));
633   const size_t maxinttextlen = data->line_width + 5;
634   const tHtmlNode* node;
635 
636   data->inttext = __memory_allocate(maxinttextlen * sizeof(tRendererText),
637     mapRendering);
638   if (html_do_attr(data))
639   { data->intattr = __memory_allocate(maxinttextlen * sizeof(tRendererAttr),
640       mapRendering);
641   }
642   if (do_ae)
643   { data->intaenum = __memory_allocate(maxinttextlen *
644       sizeof(tActiveElementNumber), mapRendering);
645   }
646 
647   while ( (!(data->flags & rdfCallerDone)) &&
648           ( (node = parser_html_next(cond2boolean(html_inside_pre_tag(rsd))))
649             != NULL ) )
650   { static const char listmarkers[] = "*+#o";
651     const tHtmlTagKind htk = node->kind;
652     const tHtmlNodeFlags hnf = node->flags;
653     const tBoolean is_endtag = cond2boolean(hnf & hnfIsEndtag);
654     const char* s;
655     char listbuf[3];
656     tBoolean finish_ae = falsE;
657 
658     if (htk == htkText) /* the most likely/special case first */
659     { char ch;
660       s = (const char*) (node->data);
661       if (s == NULL) goto next_tag;
662       put_text:
663       while ( (ch = *s++) != '\0' )
664       { const unsigned char c = (unsigned char) ch;
665         if (is_bad_uchar(c))
666         { if (ch == '\n')
667           { if (!html_inside_pre_tag(rsd)) ch = ' ';
668             else { html_deliver_line(data); continue; }
669           }
670           else if (ch == '\t')
671           { if (!html_inside_pre_tag(rsd)) ch = ' ';
672             else { html_append_str(data, "        "); continue; }
673           }
674           else if (ch == '\r') continue;
675           else ch = '?';
676         }
677         html_append_char(data, ch);
678       }
679       if (finish_ae) html_finish_ae(data);
680       goto next_tag;
681     }
682 
683     if (is_endtag) { html_nesting_closer(data, node); goto next_tag; }
684     html_nesting_opener(data, node);
685 
686     switch (htk)
687     { case htkBr: html_deliver_line(data); break;
688       case htkH1: case htkH2: case htkH3: case htkH4: case htkH5: case htkH6:
689       case htkB: case htkStrong: case htkBig:
690         html_hnestf_or(hnestfBold); break;
691       case htkBlockquote: case htkU: case htkI: case htkAddress: case htkEm:
692       case htkDfn: case htkCite:
693         html_hnestf_or(hnestfUnderlined); break;
694 
695       /* lists */
696       case htkLi:
697         listbuf[0] = listmarkers[(html_nestdata(listdepth)) & 3];
698         listbuf[1] = ' '; listbuf[2] = '\0';
699         if (html_nestdata(listdepth) < 255)
700         { html_nestdata(listdepth)++;
701           html_nestdata(li_recalc) = html_currnest(rsd);
702         }
703         s = listbuf; goto put_text; /*@notreached@*/ break;
704       case htkDd: s = "-> "; goto put_text; /*@notreached@*/ break;
705 
706       /* some active-element stuff */
707       case htkForm:
708         if (hnf & hnfGoodForm)
709         { rsd->__hfn++; rsd->_hfn = rsd->__hfn;
710           rsd->form_level = html_currnest(rsd);
711         }
712         else html_form_off(rsd);
713         break;
714       case htkInput: case htkTextarea:
715         if ( /* (do_ae) && */ (rsd->_ae != INVALID_AE) )
716         { const tBrowserDocument* document = data->document;
717           const tCantent* cantent = document->cantent;
718           const tActiveElementBase* aeb = &(cantent->aebase[rsd->_ae]);
719           const tActiveElement* ae = &(document->active_element[rsd->_ae]);
720           const tActiveElementFlags aeflags = ae->flags;
721           const tActiveElementKind aek = aeb->kind;
722           tHtmlInputLength l, len, deslen;
723           const char* temp;
724           set_form_data
725           switch (aek)
726           {case aekFormCheckbox:
727             s = ( (aeflags & aefCheckedSelected) ? "[X]" : "[_]" );
728             good_ae_text: finish_ae = truE; goto put_text;
729             /*@notreached@*/ break;
730            case aekFormRadio:
731             s = ( (aeflags & aefCheckedSelected) ? "(*)" : "(_)" );
732             goto good_ae_text; /*@notreached@*/ break;
733            case aekFormText: case aekFormPassword: case aekFormFile:
734             deslen = ((aek == aekFormFile) ? 20 : aeb->size);
735             if (deslen + 10 > html_line_width(data))
736               deslen = html_line_width(data) - 10; /* make layout possible */
737             temp = ae->current_text;
738             if (temp == NULL) { len = l = 0; }
739             else
740             { len = strlen(temp); l = 0;
741               if (len > deslen) len = deslen; /* obey web page author :-) */
742               if (aek == aekFormPassword) { while (l < len) strbuf3[l++]='*'; }
743               else
744               { while (l < len)
745                 { char ch = temp[l];
746                   if ( (ch <= 32) || (ch == 127) ) ch = '_';
747                   strbuf3[l++] = ch;
748                 }
749               }
750             }
751             while (l < deslen) strbuf3[l++] = '_';
752             strbuf3[l] = '\0'; s = strbuf3;
753             goto good_ae_text; /*@notreached@*/ break;
754            case aekFormSubmit: case aekFormReset:
755            case aekFormButton: case aekFormImage:
756             s = aeb->render; goto good_ae_text; /*@notreached@*/ break;
757           }
758         }
759         break;
760       case htkButton:
761         if ( /* (do_ae) && */ (rsd->_ae != INVALID_AE) ) { set_form_data }
762         break;
763       case htkArea:
764         if ( /* (do_ae) && */ (rsd->_ae != INVALID_AE) )
765         { const tAttribute* a = find_attribute(node, anAlt);
766           set_form_data
767           if ( (a != NULL) && ( (s = a->value) != NULL ) && (*s != '\0') )
768           { /* fine */ }
769           else s = _("[an image-map hotspot]");
770           goto good_ae_text;
771         }
772         break;
773       case htkSelect:
774         if ( /* (do_ae) && */ (rsd->_ae != INVALID_AE) )
775         { const char* bitfield =
776             data->document->active_element[rsd->_ae].current_text;
777           const tHtmlOption *o, *o0;
778           tHtmlOptionNumber num;
779           set_form_data
780           if (bitfield == NULL) goto empty_selection;
781           o = o0 = (const tHtmlOption*)
782             (data->document->cantent->aebase[rsd->_ae].render);
783           num = 0;
784           while (o != NULL)
785           { if (my_bit_test(bitfield, num)) /* found a selected option */
786             { s = o->render;
787               if (s != NULL) goto good_ae_text;
788             }
789             o = o->next; num++;
790           }
791           if (o0 != NULL) { s = o0->render; if (s != NULL) goto good_ae_text; }
792           empty_selection: s = _("[empty selection list]"); goto good_ae_text;
793         }
794         break;
795 
796       /* tables */
797       case htkTable:
798         rsd->table_level = html_currnest(rsd); rsd->flags |= rhdfIsFirstThTd;
799         break;
800       case htkTr: rsd->flags |= rhdfIsFirstThTd; break;
801 /*G*/ case htkTh: html_hnestf_or(hnestfBold); /*@fallthrough@*/
802 /*L*/ case htkTd:
803 /*U*/   if (rsd->flags & rhdfIsFirstThTd) rsd->flags &= ~rhdfIsFirstThTd;
804 /*E*/   else { s = " | "; goto put_text; }
805         break;
806 
807       /* other stuff */
808       case htkImg:
809         { const tAttribute* a = find_attribute(node, anAlt);
810           if ( (a != NULL) && ( (s = a->value) != NULL ) && (*s != '\0') )
811             goto put_text;
812         }
813         break;
814       case htkHr:
815         { const tAttribute* w = find_attribute(node, anWidth);
816           const char* wv;
817           size_t line_width = html_line_width(data), dashcount = line_width;
818           if ( (w != NULL) && ( (wv = w->value) != NULL ) && (my_isdigit(*wv)))
819           { int x;
820             my_atoi(wv, &x, &wv, 100);
821             if ( (*wv == '%') && (*(wv + 1) == '\0') && (x >= 0) && (x <= 100))
822             { const size_t x2 = (size_t) x;
823               dashcount = ( (line_width * x2) / 100 ); /* CHECKME: rounding? */
824             }
825           }
826           if (data->inttextlen > 0) html_deliver_line(data);
827           if (dashcount > 1)
828           { while (dashcount-- > 1) html_append_hline_char(data);
829             __html_deliver_line(data, hnf & hnfAlignAny);
830           }
831         }
832         break;
833       case htkPre:
834         if (!(html_nestdata(hnestf) & hnestfForbidPre))
835           html_hnestf_or(hnestfInsidePre);
836         break;
837       case htkSub: s = "_("; goto put_text; /*@notreached@*/ break;
838       case htkSup: s = "^("; goto put_text; /*@notreached@*/ break;
839       case htkStrike: case htkS: case htkDel:
840         s = "[["; goto put_text; /*@notreached@*/ break;
841       case htkQ:
842         html_hnestf_or(hnestfUnderlined); s = strDoubleQuote;
843         goto put_text; /*@notreached@*/ break;
844       case htkFrame: case htkIframe:
845         if ( /* (do_ae) && */ (rsd->_ae != INVALID_AE) )
846         { s = data->document->cantent->aebase[rsd->_ae].render;
847           goto good_ae_text;
848         }
849         break;
850       case htkObject:
851         if (find_attribute(node, anDeclare) == NULL)
852         { const tAttribute* t = find_attribute(node, anType);
853           const char* tv;
854           if ( (t != NULL) && ( (tv = t->value) != NULL ) && (*tv != '\0') &&
855                (strlen(tv) <= 1024) )
856           { sprint_safe(strbuf3, _("[an embedded object of type \"%s\"]"), tv);
857             s = strbuf3;
858           }
859           else s = _("[an embedded object]");
860           goto put_text;
861         }
862         break;
863     }
864 
865     next_tag: {}
866     if (!(hnf & hnfStoredInTree)) deallocate_html_node(node);
867   }
868   data->flags |= rdfOutOfContent;
869   html_denest(0);
870   while ( (data->inttextlen > 0) && (!(data->flags & rdfCallerDone)) )
871     html_deliver_line(data); /* deliver any remaining text */
872   data->flags |= rdfRendererDone;
873   memory_deallocate(data->inttext); __dealloc(data->intattr);
874   __dealloc(data->intaenum);
875 }
876 
877 
878 /* Generic renderer */
879 
renderer_start(tRendererData * data)880 static one_caller void renderer_start(tRendererData* data)
881 { tBrowserDocument* document = data->document;
882   tCantent* cantent = document->cantent;
883 #if TGC_IS_CURSES
884   tActiveElementNumber count, num;
885   tActiveElement* aes;
886 #endif
887   if ( (cantent == NULL) || ( (cantent->content) == NULL ) )
888   { data->flags |= rdfRendererDone | rdfOutOfContent; return; }
889 #if TGC_IS_CURSES
890   if (!(data->flags & rdfAttributes))
891   { /* In curses mode, we can't deliver colors etc. when attributes are off. */
892 #if MIGHT_USE_COLORS
893     data->flags &= ~rdfColors;
894 #endif
895   }
896 #endif
897 #if MIGHT_USE_COLORS
898   if (!use_colors) data->flags &= ~rdfColors;
899 #endif
900 #if TGC_IS_CURSES
901   if ( ( (num = document->aenum) > 0 ) &&
902        ( (aes = document->active_element) != NULL ) )
903   { for (count = 0; count < num; count++) deallocate_aec(&(aes[count].aec)); }
904 #endif
905   if (is_html(data))
906   { tRendererHtmlData* rsd;
907     parser_html_start(cantent); data->flags |= rdfCalledParserHtmlStart;
908     data->rsd = rsd = (tRendererHtmlData*)
909       memory_allocate(sizeof(tRendererHtmlData), mapRendering);
910     rsd->flags = rhdfAtParStart;
911     rsd->_ae = INVALID_AE; rsd->__ae = -1;
912     rsd->_hfn = INVALID_HTML_FORM_NUMBER; rsd->__hfn = -1;
913     html_nest(data, htkInvalid, NULL, hnfNone); /* (for simplicity) */
914   }
915 }
916 
renderer_finish(tRendererData * data)917 static one_caller void renderer_finish(tRendererData* data)
918 { if (data->flags & rdfCalledParserHtmlStart) parser_html_finish();
919   renderer_deallocate_rsd(data);
920 }
921 
renderer_run(tRendererData * data)922 renderer_interface void renderer_run(tRendererData* data)
923 /* sole entry point for "external" callers */
924 { renderer_start(data);
925   if (!(data->flags & rdfAnyDone))
926   { if (is_html(data)) renderer_html(data);
927     else renderer_plaintext(data);
928   }
929   renderer_finish(data);
930 }
931