1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <string.h>
4 #include <stdint.h>
5 #include <assert.h>
6 
7 #include "config.h"
8 #include "cmark-gfm.h"
9 #include "node.h"
10 #include "buffer.h"
11 #include "utf8.h"
12 #include "scanners.h"
13 #include "render.h"
14 #include "syntax_extension.h"
15 
16 #define OUT(s, wrap, escaping) renderer->out(renderer, node, s, wrap, escaping)
17 #define LIT(s) renderer->out(renderer, node, s, false, LITERAL)
18 #define CR() renderer->cr(renderer)
19 #define BLANKLINE() renderer->blankline(renderer)
20 #define ENCODED_SIZE 20
21 #define LISTMARKER_SIZE 20
22 
23 // Functions to convert cmark_nodes to commonmark strings.
24 
outc(cmark_renderer * renderer,cmark_node * node,cmark_escaping escape,int32_t c,unsigned char nextc)25 static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_node *node,
26                               cmark_escaping escape,
27                               int32_t c, unsigned char nextc) {
28   bool needs_escaping = false;
29   bool follows_digit =
30       renderer->buffer->size > 0 &&
31       cmark_isdigit(renderer->buffer->ptr[renderer->buffer->size - 1]);
32   char encoded[ENCODED_SIZE];
33 
34   needs_escaping =
35       c < 0x80 && escape != LITERAL &&
36       ((escape == NORMAL &&
37         (c == '*' || c == '_' || c == '[' || c == ']' || c == '#' || c == '<' ||
38          c == '>' || c == '\\' || c == '`' || c == '~' || c == '!' ||
39          (c == '&' && cmark_isalpha(nextc)) || (c == '!' && nextc == '[') ||
40          (renderer->begin_content && (c == '-' || c == '+' || c == '=') &&
41           // begin_content doesn't get set to false til we've passed digits
42           // at the beginning of line, so...
43           !follows_digit) ||
44          (renderer->begin_content && (c == '.' || c == ')') && follows_digit &&
45           (nextc == 0 || cmark_isspace(nextc))))) ||
46        (escape == URL &&
47         (c == '`' || c == '<' || c == '>' || cmark_isspace((char)c) || c == '\\' ||
48          c == ')' || c == '(')) ||
49        (escape == TITLE &&
50         (c == '`' || c == '<' || c == '>' || c == '"' || c == '\\')));
51 
52   if (needs_escaping) {
53     if (cmark_isspace((char)c)) {
54       // use percent encoding for spaces
55       snprintf(encoded, ENCODED_SIZE, "%%%2x", c);
56       cmark_strbuf_puts(renderer->buffer, encoded);
57       renderer->column += 3;
58     } else {
59       cmark_render_ascii(renderer, "\\");
60       cmark_render_code_point(renderer, c);
61     }
62   } else {
63     cmark_render_code_point(renderer, c);
64   }
65 }
66 
longest_backtick_sequence(const char * code)67 static int longest_backtick_sequence(const char *code) {
68   int longest = 0;
69   int current = 0;
70   size_t i = 0;
71   size_t code_len = strlen(code);
72   while (i <= code_len) {
73     if (code[i] == '`') {
74       current++;
75     } else {
76       if (current > longest) {
77         longest = current;
78       }
79       current = 0;
80     }
81     i++;
82   }
83   return longest;
84 }
85 
shortest_unused_backtick_sequence(const char * code)86 static int shortest_unused_backtick_sequence(const char *code) {
87   // note: if the shortest sequence is >= 32, this returns 32
88   // so as not to overflow the bit array.
89   uint32_t used = 1;
90   int current = 0;
91   size_t i = 0;
92   size_t code_len = strlen(code);
93   while (i <= code_len) {
94     if (code[i] == '`') {
95       current++;
96     } else {
97       if (current > 0 && current < 32) {
98         used |= (1U << current);
99       }
100       current = 0;
101     }
102     i++;
103   }
104   // return number of first bit that is 0:
105   i = 0;
106   while (i < 32 && used & 1) {
107     used = used >> 1;
108     i++;
109   }
110   return (int)i;
111 }
112 
is_autolink(cmark_node * node)113 static bool is_autolink(cmark_node *node) {
114   cmark_chunk *title;
115   cmark_chunk *url;
116   cmark_node *link_text;
117   char *realurl;
118   int realurllen;
119 
120   if (node->type != CMARK_NODE_LINK) {
121     return false;
122   }
123 
124   url = &node->as.link.url;
125   if (url->len == 0 || scan_scheme(url, 0) == 0) {
126     return false;
127   }
128 
129   title = &node->as.link.title;
130   // if it has a title, we can't treat it as an autolink:
131   if (title->len > 0) {
132     return false;
133   }
134 
135   link_text = node->first_child;
136   if (link_text == NULL) {
137     return false;
138   }
139   cmark_consolidate_text_nodes(link_text);
140   realurl = (char *)url->data;
141   realurllen = url->len;
142   if (strncmp(realurl, "mailto:", 7) == 0) {
143     realurl += 7;
144     realurllen -= 7;
145   }
146   return (realurllen == link_text->as.literal.len &&
147           strncmp(realurl, (char *)link_text->as.literal.data,
148                   link_text->as.literal.len) == 0);
149 }
150 
151 // if node is a block node, returns node.
152 // otherwise returns first block-level node that is an ancestor of node.
153 // if there is no block-level ancestor, returns NULL.
get_containing_block(cmark_node * node)154 static cmark_node *get_containing_block(cmark_node *node) {
155   while (node) {
156     if (CMARK_NODE_BLOCK_P(node)) {
157       return node;
158     } else {
159       node = node->parent;
160     }
161   }
162   return NULL;
163 }
164 
S_render_node(cmark_renderer * renderer,cmark_node * node,cmark_event_type ev_type,int options)165 static int S_render_node(cmark_renderer *renderer, cmark_node *node,
166                          cmark_event_type ev_type, int options) {
167   cmark_node *tmp;
168   int list_number;
169   cmark_delim_type list_delim;
170   int numticks;
171   bool extra_spaces;
172   int i;
173   bool entering = (ev_type == CMARK_EVENT_ENTER);
174   const char *info, *code, *title;
175   size_t info_len, code_len;
176   char listmarker[LISTMARKER_SIZE];
177   char *emph_delim;
178   bool first_in_list_item;
179   bufsize_t marker_width;
180   bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options) &&
181                     !(CMARK_OPT_HARDBREAKS & options);
182 
183   // Don't adjust tight list status til we've started the list.
184   // Otherwise we loose the blank line between a paragraph and
185   // a following list.
186   if (!(node->type == CMARK_NODE_ITEM && node->prev == NULL && entering)) {
187     tmp = get_containing_block(node);
188     renderer->in_tight_list_item =
189         tmp && // tmp might be NULL if there is no containing block
190         ((tmp->type == CMARK_NODE_ITEM &&
191           cmark_node_get_list_tight(tmp->parent)) ||
192          (tmp && tmp->parent && tmp->parent->type == CMARK_NODE_ITEM &&
193           cmark_node_get_list_tight(tmp->parent->parent)));
194   }
195 
196   if (node->extension && node->extension->commonmark_render_func) {
197     node->extension->commonmark_render_func(node->extension, renderer, node, ev_type, options);
198     return 1;
199   }
200 
201   switch (node->type) {
202   case CMARK_NODE_DOCUMENT:
203     break;
204 
205   case CMARK_NODE_BLOCK_QUOTE:
206     if (entering) {
207       LIT("> ");
208       renderer->begin_content = true;
209       cmark_strbuf_puts(renderer->prefix, "> ");
210     } else {
211       cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 2);
212       BLANKLINE();
213     }
214     break;
215 
216   case CMARK_NODE_LIST:
217     if (!entering && node->next && (node->next->type == CMARK_NODE_CODE_BLOCK ||
218                                     node->next->type == CMARK_NODE_LIST)) {
219       // this ensures that a following indented code block or list will be
220       // inteprereted correctly.
221       CR();
222       LIT("<!-- end list -->");
223       BLANKLINE();
224     }
225     break;
226 
227   case CMARK_NODE_ITEM:
228     if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) {
229       marker_width = 4;
230     } else {
231       list_number = cmark_node_get_list_start(node->parent);
232       list_delim = cmark_node_get_list_delim(node->parent);
233       tmp = node;
234       while (tmp->prev) {
235         tmp = tmp->prev;
236         list_number += 1;
237       }
238       // we ensure a width of at least 4 so
239       // we get nice transition from single digits
240       // to double
241       snprintf(listmarker, LISTMARKER_SIZE, "%d%s%s", list_number,
242                list_delim == CMARK_PAREN_DELIM ? ")" : ".",
243                list_number < 10 ? "  " : " ");
244       marker_width = (bufsize_t)strlen(listmarker);
245     }
246     if (entering) {
247       if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) {
248         LIT("  - ");
249         renderer->begin_content = true;
250       } else {
251         LIT(listmarker);
252         renderer->begin_content = true;
253       }
254       for (i = marker_width; i--;) {
255         cmark_strbuf_putc(renderer->prefix, ' ');
256       }
257     } else {
258       cmark_strbuf_truncate(renderer->prefix,
259                             renderer->prefix->size - marker_width);
260       CR();
261     }
262     break;
263 
264   case CMARK_NODE_HEADING:
265     if (entering) {
266       for (i = cmark_node_get_heading_level(node); i > 0; i--) {
267         LIT("#");
268       }
269       LIT(" ");
270       renderer->begin_content = true;
271       renderer->no_linebreaks = true;
272     } else {
273       renderer->no_linebreaks = false;
274       BLANKLINE();
275     }
276     break;
277 
278   case CMARK_NODE_CODE_BLOCK:
279     first_in_list_item = node->prev == NULL && node->parent &&
280                          node->parent->type == CMARK_NODE_ITEM;
281 
282     if (!first_in_list_item) {
283       BLANKLINE();
284     }
285     info = cmark_node_get_fence_info(node);
286     info_len = strlen(info);
287     code = cmark_node_get_literal(node);
288     code_len = strlen(code);
289     // use indented form if no info, and code doesn't
290     // begin or end with a blank line, and code isn't
291     // first thing in a list item
292     if (info_len == 0 && (code_len > 2 && !cmark_isspace(code[0]) &&
293                           !(cmark_isspace(code[code_len - 1]) &&
294                             cmark_isspace(code[code_len - 2]))) &&
295         !first_in_list_item) {
296       LIT("    ");
297       cmark_strbuf_puts(renderer->prefix, "    ");
298       OUT(cmark_node_get_literal(node), false, LITERAL);
299       cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 4);
300     } else {
301       numticks = longest_backtick_sequence(code) + 1;
302       if (numticks < 3) {
303         numticks = 3;
304       }
305       for (i = 0; i < numticks; i++) {
306         LIT("`");
307       }
308       LIT(" ");
309       OUT(info, false, LITERAL);
310       CR();
311       OUT(cmark_node_get_literal(node), false, LITERAL);
312       CR();
313       for (i = 0; i < numticks; i++) {
314         LIT("`");
315       }
316     }
317     BLANKLINE();
318     break;
319 
320   case CMARK_NODE_HTML_BLOCK:
321     BLANKLINE();
322     OUT(cmark_node_get_literal(node), false, LITERAL);
323     BLANKLINE();
324     break;
325 
326   case CMARK_NODE_CUSTOM_BLOCK:
327     BLANKLINE();
328     OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
329         false, LITERAL);
330     BLANKLINE();
331     break;
332 
333   case CMARK_NODE_THEMATIC_BREAK:
334     BLANKLINE();
335     LIT("-----");
336     BLANKLINE();
337     break;
338 
339   case CMARK_NODE_PARAGRAPH:
340     if (!entering) {
341       BLANKLINE();
342     }
343     break;
344 
345   case CMARK_NODE_TEXT:
346     OUT(cmark_node_get_literal(node), allow_wrap, NORMAL);
347     break;
348 
349   case CMARK_NODE_LINEBREAK:
350     if (!(CMARK_OPT_HARDBREAKS & options)) {
351       LIT("  ");
352     }
353     CR();
354     break;
355 
356   case CMARK_NODE_SOFTBREAK:
357     if (CMARK_OPT_HARDBREAKS & options) {
358       LIT("  ");
359       CR();
360     } else if (!renderer->no_linebreaks && renderer->width == 0 &&
361                !(CMARK_OPT_HARDBREAKS & options) &&
362                !(CMARK_OPT_NOBREAKS & options)) {
363       CR();
364     } else {
365       OUT(" ", allow_wrap, LITERAL);
366     }
367     break;
368 
369   case CMARK_NODE_CODE:
370     code = cmark_node_get_literal(node);
371     code_len = strlen(code);
372     numticks = shortest_unused_backtick_sequence(code);
373     extra_spaces = code_len == 0 ||
374 	    code[0] == '`' || code[code_len - 1] == '`' ||
375 	    code[0] == ' ' || code[code_len - 1] == ' ';
376     for (i = 0; i < numticks; i++) {
377       LIT("`");
378     }
379     if (extra_spaces) {
380       LIT(" ");
381     }
382     OUT(cmark_node_get_literal(node), allow_wrap, LITERAL);
383     if (extra_spaces) {
384       LIT(" ");
385     }
386     for (i = 0; i < numticks; i++) {
387       LIT("`");
388     }
389     break;
390 
391   case CMARK_NODE_HTML_INLINE:
392     OUT(cmark_node_get_literal(node), false, LITERAL);
393     break;
394 
395   case CMARK_NODE_CUSTOM_INLINE:
396     OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
397         false, LITERAL);
398     break;
399 
400   case CMARK_NODE_STRONG:
401     if (entering) {
402       LIT("**");
403     } else {
404       LIT("**");
405     }
406     break;
407 
408   case CMARK_NODE_EMPH:
409     // If we have EMPH(EMPH(x)), we need to use *_x_*
410     // because **x** is STRONG(x):
411     if (node->parent && node->parent->type == CMARK_NODE_EMPH &&
412         node->next == NULL && node->prev == NULL) {
413       emph_delim = "_";
414     } else {
415       emph_delim = "*";
416     }
417     if (entering) {
418       LIT(emph_delim);
419     } else {
420       LIT(emph_delim);
421     }
422     break;
423 
424   case CMARK_NODE_LINK:
425     if (is_autolink(node)) {
426       if (entering) {
427         LIT("<");
428         if (strncmp(cmark_node_get_url(node), "mailto:", 7) == 0) {
429           LIT((const char *)cmark_node_get_url(node) + 7);
430         } else {
431           LIT((const char *)cmark_node_get_url(node));
432         }
433         LIT(">");
434         // return signal to skip contents of node...
435         return 0;
436       }
437     } else {
438       if (entering) {
439         LIT("[");
440       } else {
441         LIT("](");
442         OUT(cmark_node_get_url(node), false, URL);
443         title = cmark_node_get_title(node);
444         if (strlen(title) > 0) {
445           LIT(" \"");
446           OUT(title, false, TITLE);
447           LIT("\"");
448         }
449         LIT(")");
450       }
451     }
452     break;
453 
454   case CMARK_NODE_IMAGE:
455     if (entering) {
456       LIT("![");
457     } else {
458       LIT("](");
459       OUT(cmark_node_get_url(node), false, URL);
460       title = cmark_node_get_title(node);
461       if (strlen(title) > 0) {
462         OUT(" \"", allow_wrap, LITERAL);
463         OUT(title, false, TITLE);
464         LIT("\"");
465       }
466       LIT(")");
467     }
468     break;
469 
470   case CMARK_NODE_FOOTNOTE_REFERENCE:
471     if (entering) {
472       LIT("[^");
473       OUT(cmark_chunk_to_cstr(renderer->mem, &node->as.literal), false, LITERAL);
474       LIT("]");
475     }
476     break;
477 
478   case CMARK_NODE_FOOTNOTE_DEFINITION:
479     if (entering) {
480       renderer->footnote_ix += 1;
481       LIT("[^");
482       char n[32];
483       snprintf(n, sizeof(n), "%d", renderer->footnote_ix);
484       OUT(n, false, LITERAL);
485       LIT("]:\n");
486 
487       cmark_strbuf_puts(renderer->prefix, "    ");
488     } else {
489       cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 4);
490     }
491     break;
492 
493   default:
494     assert(false);
495     break;
496   }
497 
498   return 1;
499 }
500 
cmark_render_commonmark(cmark_node * root,int options,int width)501 char *cmark_render_commonmark(cmark_node *root, int options, int width) {
502   return cmark_render_commonmark_with_mem(root, options, width, cmark_node_mem(root));
503 }
504 
cmark_render_commonmark_with_mem(cmark_node * root,int options,int width,cmark_mem * mem)505 char *cmark_render_commonmark_with_mem(cmark_node *root, int options, int width, cmark_mem *mem) {
506   if (options & CMARK_OPT_HARDBREAKS) {
507     // disable breaking on width, since it has
508     // a different meaning with OPT_HARDBREAKS
509     width = 0;
510   }
511   return cmark_render(mem, root, options, width, outc, S_render_node);
512 }
513