1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <string.h>
4 #include <assert.h>
5 #include "cmark_ctype.h"
6 #include "config.h"
7 #include "cmark-gfm.h"
8 #include "houdini.h"
9 #include "scanners.h"
10 #include "syntax_extension.h"
11 #include "html.h"
12 #include "render.h"
13 
14 // Functions to convert cmark_nodes to HTML strings.
15 
escape_html(cmark_strbuf * dest,const unsigned char * source,bufsize_t length)16 static void escape_html(cmark_strbuf *dest, const unsigned char *source,
17                         bufsize_t length) {
18   houdini_escape_html0(dest, source, length, 0);
19 }
20 
filter_html_block(cmark_html_renderer * renderer,uint8_t * data,size_t len)21 static void filter_html_block(cmark_html_renderer *renderer, uint8_t *data, size_t len) {
22   cmark_strbuf *html = renderer->html;
23   cmark_llist *it;
24   cmark_syntax_extension *ext;
25   bool filtered;
26   uint8_t *match;
27 
28   while (len) {
29     match = (uint8_t *) memchr(data, '<', len);
30     if (!match)
31       break;
32 
33     if (match != data) {
34       cmark_strbuf_put(html, data, (bufsize_t)(match - data));
35       len -= (match - data);
36       data = match;
37     }
38 
39     filtered = false;
40     for (it = renderer->filter_extensions; it; it = it->next) {
41       ext = ((cmark_syntax_extension *) it->data);
42       if (!ext->html_filter_func(ext, data, len)) {
43         filtered = true;
44         break;
45       }
46     }
47 
48     if (!filtered) {
49       cmark_strbuf_putc(html, '<');
50     } else {
51       cmark_strbuf_puts(html, "&lt;");
52     }
53 
54     ++data;
55     --len;
56   }
57 
58   if (len)
59     cmark_strbuf_put(html, data, (bufsize_t)len);
60 }
61 
S_put_footnote_backref(cmark_html_renderer * renderer,cmark_strbuf * html)62 static bool S_put_footnote_backref(cmark_html_renderer *renderer, cmark_strbuf *html) {
63   if (renderer->written_footnote_ix >= renderer->footnote_ix)
64     return false;
65   renderer->written_footnote_ix = renderer->footnote_ix;
66 
67   cmark_strbuf_puts(html, "<a href=\"#fnref");
68   char n[32];
69   snprintf(n, sizeof(n), "%d", renderer->footnote_ix);
70   cmark_strbuf_puts(html, n);
71   cmark_strbuf_puts(html, "\" class=\"footnote-backref\">↩</a>");
72 
73   return true;
74 }
75 
S_render_node(cmark_html_renderer * renderer,cmark_node * node,cmark_event_type ev_type,int options)76 static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
77                          cmark_event_type ev_type, int options) {
78   cmark_node *parent;
79   cmark_node *grandparent;
80   cmark_strbuf *html = renderer->html;
81   cmark_llist *it;
82   cmark_syntax_extension *ext;
83   char start_heading[] = "<h0";
84   char end_heading[] = "</h0";
85   bool tight;
86   bool filtered;
87   char buffer[BUFFER_SIZE];
88 
89   bool entering = (ev_type == CMARK_EVENT_ENTER);
90 
91   if (renderer->plain == node) { // back at original node
92     renderer->plain = NULL;
93   }
94 
95   if (renderer->plain != NULL) {
96     switch (node->type) {
97     case CMARK_NODE_TEXT:
98     case CMARK_NODE_CODE:
99     case CMARK_NODE_HTML_INLINE:
100       escape_html(html, node->as.literal.data, node->as.literal.len);
101       break;
102 
103     case CMARK_NODE_LINEBREAK:
104     case CMARK_NODE_SOFTBREAK:
105       cmark_strbuf_putc(html, ' ');
106       break;
107 
108     default:
109       break;
110     }
111     return 1;
112   }
113 
114   if (node->extension && node->extension->html_render_func) {
115     node->extension->html_render_func(node->extension, renderer, node, ev_type, options);
116     return 1;
117   }
118 
119   switch (node->type) {
120   case CMARK_NODE_DOCUMENT:
121     break;
122 
123   case CMARK_NODE_BLOCK_QUOTE:
124     if (entering) {
125       cmark_html_render_cr(html);
126       cmark_strbuf_puts(html, "<blockquote");
127       cmark_html_render_sourcepos(node, html, options);
128       cmark_strbuf_puts(html, ">\n");
129     } else {
130       cmark_html_render_cr(html);
131       cmark_strbuf_puts(html, "</blockquote>\n");
132     }
133     break;
134 
135   case CMARK_NODE_LIST: {
136     cmark_list_type list_type = node->as.list.list_type;
137     int start = node->as.list.start;
138 
139     if (entering) {
140       cmark_html_render_cr(html);
141       if (list_type == CMARK_BULLET_LIST) {
142         cmark_strbuf_puts(html, "<ul");
143         cmark_html_render_sourcepos(node, html, options);
144         cmark_strbuf_puts(html, ">\n");
145       } else if (start == 1) {
146         cmark_strbuf_puts(html, "<ol");
147         cmark_html_render_sourcepos(node, html, options);
148         cmark_strbuf_puts(html, ">\n");
149       } else {
150         snprintf(buffer, BUFFER_SIZE, "<ol start=\"%d\"", start);
151         cmark_strbuf_puts(html, buffer);
152         cmark_html_render_sourcepos(node, html, options);
153         cmark_strbuf_puts(html, ">\n");
154       }
155     } else {
156       cmark_strbuf_puts(html,
157                         list_type == CMARK_BULLET_LIST ? "</ul>\n" : "</ol>\n");
158     }
159     break;
160   }
161 
162   case CMARK_NODE_ITEM:
163     if (entering) {
164       cmark_html_render_cr(html);
165       cmark_strbuf_puts(html, "<li");
166       cmark_html_render_sourcepos(node, html, options);
167       cmark_strbuf_putc(html, '>');
168     } else {
169       cmark_strbuf_puts(html, "</li>\n");
170     }
171     break;
172 
173   case CMARK_NODE_HEADING:
174     if (entering) {
175       cmark_html_render_cr(html);
176       start_heading[2] = (char)('0' + node->as.heading.level);
177       cmark_strbuf_puts(html, start_heading);
178       cmark_html_render_sourcepos(node, html, options);
179       cmark_strbuf_putc(html, '>');
180     } else {
181       end_heading[3] = (char)('0' + node->as.heading.level);
182       cmark_strbuf_puts(html, end_heading);
183       cmark_strbuf_puts(html, ">\n");
184     }
185     break;
186 
187   case CMARK_NODE_CODE_BLOCK:
188     cmark_html_render_cr(html);
189 
190     if (node->as.code.info.len == 0) {
191       cmark_strbuf_puts(html, "<pre");
192       cmark_html_render_sourcepos(node, html, options);
193       cmark_strbuf_puts(html, "><code>");
194     } else {
195       bufsize_t first_tag = 0;
196       while (first_tag < node->as.code.info.len &&
197              !cmark_isspace(node->as.code.info.data[first_tag])) {
198         first_tag += 1;
199       }
200 
201       if (options & CMARK_OPT_GITHUB_PRE_LANG) {
202         cmark_strbuf_puts(html, "<pre");
203         cmark_html_render_sourcepos(node, html, options);
204         cmark_strbuf_puts(html, " lang=\"");
205         escape_html(html, node->as.code.info.data, first_tag);
206         if (first_tag < node->as.code.info.len && (options & CMARK_OPT_FULL_INFO_STRING)) {
207           cmark_strbuf_puts(html, "\" data-meta=\"");
208           escape_html(html, node->as.code.info.data + first_tag + 1, node->as.code.info.len - first_tag - 1);
209         }
210         cmark_strbuf_puts(html, "\"><code>");
211       } else {
212         cmark_strbuf_puts(html, "<pre");
213         cmark_html_render_sourcepos(node, html, options);
214         cmark_strbuf_puts(html, "><code class=\"language-");
215         escape_html(html, node->as.code.info.data, first_tag);
216         if (first_tag < node->as.code.info.len && (options & CMARK_OPT_FULL_INFO_STRING)) {
217           cmark_strbuf_puts(html, "\" data-meta=\"");
218           escape_html(html, node->as.code.info.data + first_tag + 1, node->as.code.info.len - first_tag - 1);
219         }
220         cmark_strbuf_puts(html, "\">");
221       }
222     }
223 
224     escape_html(html, node->as.code.literal.data, node->as.code.literal.len);
225     cmark_strbuf_puts(html, "</code></pre>\n");
226     break;
227 
228   case CMARK_NODE_HTML_BLOCK:
229     cmark_html_render_cr(html);
230     if (!(options & CMARK_OPT_UNSAFE)) {
231       cmark_strbuf_puts(html, "<!-- raw HTML omitted -->");
232     } else if (renderer->filter_extensions) {
233       filter_html_block(renderer, node->as.literal.data, node->as.literal.len);
234     } else {
235       cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len);
236     }
237     cmark_html_render_cr(html);
238     break;
239 
240   case CMARK_NODE_CUSTOM_BLOCK:
241     cmark_html_render_cr(html);
242     if (entering) {
243       cmark_strbuf_put(html, node->as.custom.on_enter.data,
244                        node->as.custom.on_enter.len);
245     } else {
246       cmark_strbuf_put(html, node->as.custom.on_exit.data,
247                        node->as.custom.on_exit.len);
248     }
249     cmark_html_render_cr(html);
250     break;
251 
252   case CMARK_NODE_THEMATIC_BREAK:
253     cmark_html_render_cr(html);
254     cmark_strbuf_puts(html, "<hr");
255     cmark_html_render_sourcepos(node, html, options);
256     cmark_strbuf_puts(html, " />\n");
257     break;
258 
259   case CMARK_NODE_PARAGRAPH:
260     parent = cmark_node_parent(node);
261     grandparent = cmark_node_parent(parent);
262     if (grandparent != NULL && grandparent->type == CMARK_NODE_LIST) {
263       tight = grandparent->as.list.tight;
264     } else {
265       tight = false;
266     }
267     if (!tight) {
268       if (entering) {
269         cmark_html_render_cr(html);
270         cmark_strbuf_puts(html, "<p");
271         cmark_html_render_sourcepos(node, html, options);
272         cmark_strbuf_putc(html, '>');
273       } else {
274         if (parent->type == CMARK_NODE_FOOTNOTE_DEFINITION && node->next == NULL) {
275           cmark_strbuf_putc(html, ' ');
276           S_put_footnote_backref(renderer, html);
277         }
278         cmark_strbuf_puts(html, "</p>\n");
279       }
280     }
281     break;
282 
283   case CMARK_NODE_TEXT:
284     escape_html(html, node->as.literal.data, node->as.literal.len);
285     break;
286 
287   case CMARK_NODE_LINEBREAK:
288     cmark_strbuf_puts(html, "<br />\n");
289     break;
290 
291   case CMARK_NODE_SOFTBREAK:
292     if (options & CMARK_OPT_HARDBREAKS) {
293       cmark_strbuf_puts(html, "<br />\n");
294     } else if (options & CMARK_OPT_NOBREAKS) {
295       cmark_strbuf_putc(html, ' ');
296     } else {
297       cmark_strbuf_putc(html, '\n');
298     }
299     break;
300 
301   case CMARK_NODE_CODE:
302     cmark_strbuf_puts(html, "<code>");
303     escape_html(html, node->as.literal.data, node->as.literal.len);
304     cmark_strbuf_puts(html, "</code>");
305     break;
306 
307   case CMARK_NODE_HTML_INLINE:
308     if (!(options & CMARK_OPT_UNSAFE)) {
309       cmark_strbuf_puts(html, "<!-- raw HTML omitted -->");
310     } else {
311       filtered = false;
312       for (it = renderer->filter_extensions; it; it = it->next) {
313         ext = (cmark_syntax_extension *) it->data;
314         if (!ext->html_filter_func(ext, node->as.literal.data, node->as.literal.len)) {
315           filtered = true;
316           break;
317         }
318       }
319       if (!filtered) {
320         cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len);
321       } else {
322         cmark_strbuf_puts(html, "&lt;");
323         cmark_strbuf_put(html, node->as.literal.data + 1, node->as.literal.len - 1);
324       }
325     }
326     break;
327 
328   case CMARK_NODE_CUSTOM_INLINE:
329     if (entering) {
330       cmark_strbuf_put(html, node->as.custom.on_enter.data,
331                        node->as.custom.on_enter.len);
332     } else {
333       cmark_strbuf_put(html, node->as.custom.on_exit.data,
334                        node->as.custom.on_exit.len);
335     }
336     break;
337 
338   case CMARK_NODE_STRONG:
339     if (entering) {
340       cmark_strbuf_puts(html, "<strong>");
341     } else {
342       cmark_strbuf_puts(html, "</strong>");
343     }
344     break;
345 
346   case CMARK_NODE_EMPH:
347     if (entering) {
348       cmark_strbuf_puts(html, "<em>");
349     } else {
350       cmark_strbuf_puts(html, "</em>");
351     }
352     break;
353 
354   case CMARK_NODE_LINK:
355     if (entering) {
356       cmark_strbuf_puts(html, "<a href=\"");
357       if (!(!(options & CMARK_OPT_UNSAFE) &&
358             scan_dangerous_url(&node->as.link.url, 0))) {
359         houdini_escape_href(html, node->as.link.url.data,
360                             node->as.link.url.len);
361       }
362       if (node->as.link.title.len) {
363         cmark_strbuf_puts(html, "\" title=\"");
364         escape_html(html, node->as.link.title.data, node->as.link.title.len);
365       }
366       cmark_strbuf_puts(html, "\">");
367     } else {
368       cmark_strbuf_puts(html, "</a>");
369     }
370     break;
371 
372   case CMARK_NODE_IMAGE:
373     if (entering) {
374       cmark_strbuf_puts(html, "<img src=\"");
375       if (!(!(options & CMARK_OPT_UNSAFE) &&
376             scan_dangerous_url(&node->as.link.url, 0))) {
377         houdini_escape_href(html, node->as.link.url.data,
378                             node->as.link.url.len);
379       }
380       cmark_strbuf_puts(html, "\" alt=\"");
381       renderer->plain = node;
382     } else {
383       if (node->as.link.title.len) {
384         cmark_strbuf_puts(html, "\" title=\"");
385         escape_html(html, node->as.link.title.data, node->as.link.title.len);
386       }
387 
388       cmark_strbuf_puts(html, "\" />");
389     }
390     break;
391 
392   case CMARK_NODE_FOOTNOTE_DEFINITION:
393     if (entering) {
394       if (renderer->footnote_ix == 0) {
395         cmark_strbuf_puts(html, "<section class=\"footnotes\">\n<ol>\n");
396       }
397       ++renderer->footnote_ix;
398       cmark_strbuf_puts(html, "<li id=\"fn");
399       char n[32];
400       snprintf(n, sizeof(n), "%d", renderer->footnote_ix);
401       cmark_strbuf_puts(html, n);
402       cmark_strbuf_puts(html, "\">\n");
403     } else {
404       if (S_put_footnote_backref(renderer, html)) {
405         cmark_strbuf_putc(html, '\n');
406       }
407       cmark_strbuf_puts(html, "</li>\n");
408     }
409     break;
410 
411   case CMARK_NODE_FOOTNOTE_REFERENCE:
412     if (entering) {
413       cmark_strbuf_puts(html, "<sup class=\"footnote-ref\"><a href=\"#fn");
414       cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len);
415       cmark_strbuf_puts(html, "\" id=\"fnref");
416       cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len);
417       cmark_strbuf_puts(html, "\">");
418       cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len);
419       cmark_strbuf_puts(html, "</a></sup>");
420     }
421     break;
422 
423   default:
424     assert(false);
425     break;
426   }
427 
428   return 1;
429 }
430 
cmark_render_html(cmark_node * root,int options,cmark_llist * extensions)431 char *cmark_render_html(cmark_node *root, int options, cmark_llist *extensions) {
432   return cmark_render_html_with_mem(root, options, extensions, cmark_node_mem(root));
433 }
434 
cmark_render_html_with_mem(cmark_node * root,int options,cmark_llist * extensions,cmark_mem * mem)435 char *cmark_render_html_with_mem(cmark_node *root, int options, cmark_llist *extensions, cmark_mem *mem) {
436   char *result;
437   cmark_strbuf html = CMARK_BUF_INIT(mem);
438   cmark_event_type ev_type;
439   cmark_node *cur;
440   cmark_html_renderer renderer = {&html, NULL, NULL, 0, 0, NULL};
441   cmark_iter *iter = cmark_iter_new(root);
442 
443   for (; extensions; extensions = extensions->next)
444     if (((cmark_syntax_extension *) extensions->data)->html_filter_func)
445       renderer.filter_extensions = cmark_llist_append(
446           mem,
447           renderer.filter_extensions,
448           (cmark_syntax_extension *) extensions->data);
449 
450   while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
451     cur = cmark_iter_get_node(iter);
452     S_render_node(&renderer, cur, ev_type, options);
453   }
454 
455   if (renderer.footnote_ix) {
456     cmark_strbuf_puts(&html, "</ol>\n</section>\n");
457   }
458 
459   result = (char *)cmark_strbuf_detach(&html);
460 
461   cmark_llist_free(mem, renderer.filter_extensions);
462 
463   cmark_iter_free(iter);
464   return result;
465 }
466