1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <string.h>
4 #include <stdint.h>
5 #include <assert.h>
6
7 #include "config.h"
8 #include "cmark-gfm.h"
9 #include "node.h"
10 #include "buffer.h"
11 #include "utf8.h"
12 #include "scanners.h"
13 #include "render.h"
14 #include "syntax_extension.h"
15
16 #define OUT(s, wrap, escaping) renderer->out(renderer, node, s, wrap, escaping)
17 #define LIT(s) renderer->out(renderer, node, s, false, LITERAL)
18 #define CR() renderer->cr(renderer)
19 #define BLANKLINE() renderer->blankline(renderer)
20 #define ENCODED_SIZE 20
21 #define LISTMARKER_SIZE 20
22
23 // Functions to convert cmark_nodes to commonmark strings.
24
outc(cmark_renderer * renderer,cmark_node * node,cmark_escaping escape,int32_t c,unsigned char nextc)25 static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_node *node,
26 cmark_escaping escape,
27 int32_t c, unsigned char nextc) {
28 bool needs_escaping = false;
29 bool follows_digit =
30 renderer->buffer->size > 0 &&
31 cmark_isdigit(renderer->buffer->ptr[renderer->buffer->size - 1]);
32 char encoded[ENCODED_SIZE];
33
34 needs_escaping =
35 c < 0x80 && escape != LITERAL &&
36 ((escape == NORMAL &&
37 (c == '*' || c == '_' || c == '[' || c == ']' || c == '#' || c == '<' ||
38 c == '>' || c == '\\' || c == '`' || c == '~' || c == '!' ||
39 (c == '&' && cmark_isalpha(nextc)) || (c == '!' && nextc == '[') ||
40 (renderer->begin_content && (c == '-' || c == '+' || c == '=') &&
41 // begin_content doesn't get set to false til we've passed digits
42 // at the beginning of line, so...
43 !follows_digit) ||
44 (renderer->begin_content && (c == '.' || c == ')') && follows_digit &&
45 (nextc == 0 || cmark_isspace(nextc))))) ||
46 (escape == URL &&
47 (c == '`' || c == '<' || c == '>' || cmark_isspace((char)c) || c == '\\' ||
48 c == ')' || c == '(')) ||
49 (escape == TITLE &&
50 (c == '`' || c == '<' || c == '>' || c == '"' || c == '\\')));
51
52 if (needs_escaping) {
53 if (cmark_isspace((char)c)) {
54 // use percent encoding for spaces
55 snprintf(encoded, ENCODED_SIZE, "%%%2x", c);
56 cmark_strbuf_puts(renderer->buffer, encoded);
57 renderer->column += 3;
58 } else {
59 cmark_render_ascii(renderer, "\\");
60 cmark_render_code_point(renderer, c);
61 }
62 } else {
63 cmark_render_code_point(renderer, c);
64 }
65 }
66
longest_backtick_sequence(const char * code)67 static int longest_backtick_sequence(const char *code) {
68 int longest = 0;
69 int current = 0;
70 size_t i = 0;
71 size_t code_len = strlen(code);
72 while (i <= code_len) {
73 if (code[i] == '`') {
74 current++;
75 } else {
76 if (current > longest) {
77 longest = current;
78 }
79 current = 0;
80 }
81 i++;
82 }
83 return longest;
84 }
85
shortest_unused_backtick_sequence(const char * code)86 static int shortest_unused_backtick_sequence(const char *code) {
87 // note: if the shortest sequence is >= 32, this returns 32
88 // so as not to overflow the bit array.
89 uint32_t used = 1;
90 int current = 0;
91 size_t i = 0;
92 size_t code_len = strlen(code);
93 while (i <= code_len) {
94 if (code[i] == '`') {
95 current++;
96 } else {
97 if (current > 0 && current < 32) {
98 used |= (1U << current);
99 }
100 current = 0;
101 }
102 i++;
103 }
104 // return number of first bit that is 0:
105 i = 0;
106 while (i < 32 && used & 1) {
107 used = used >> 1;
108 i++;
109 }
110 return (int)i;
111 }
112
is_autolink(cmark_node * node)113 static bool is_autolink(cmark_node *node) {
114 cmark_chunk *title;
115 cmark_chunk *url;
116 cmark_node *link_text;
117 char *realurl;
118 int realurllen;
119
120 if (node->type != CMARK_NODE_LINK) {
121 return false;
122 }
123
124 url = &node->as.link.url;
125 if (url->len == 0 || scan_scheme(url, 0) == 0) {
126 return false;
127 }
128
129 title = &node->as.link.title;
130 // if it has a title, we can't treat it as an autolink:
131 if (title->len > 0) {
132 return false;
133 }
134
135 link_text = node->first_child;
136 if (link_text == NULL) {
137 return false;
138 }
139 cmark_consolidate_text_nodes(link_text);
140 realurl = (char *)url->data;
141 realurllen = url->len;
142 if (strncmp(realurl, "mailto:", 7) == 0) {
143 realurl += 7;
144 realurllen -= 7;
145 }
146 return (realurllen == link_text->as.literal.len &&
147 strncmp(realurl, (char *)link_text->as.literal.data,
148 link_text->as.literal.len) == 0);
149 }
150
151 // if node is a block node, returns node.
152 // otherwise returns first block-level node that is an ancestor of node.
153 // if there is no block-level ancestor, returns NULL.
get_containing_block(cmark_node * node)154 static cmark_node *get_containing_block(cmark_node *node) {
155 while (node) {
156 if (CMARK_NODE_BLOCK_P(node)) {
157 return node;
158 } else {
159 node = node->parent;
160 }
161 }
162 return NULL;
163 }
164
S_render_node(cmark_renderer * renderer,cmark_node * node,cmark_event_type ev_type,int options)165 static int S_render_node(cmark_renderer *renderer, cmark_node *node,
166 cmark_event_type ev_type, int options) {
167 cmark_node *tmp;
168 int list_number;
169 cmark_delim_type list_delim;
170 int numticks;
171 bool extra_spaces;
172 int i;
173 bool entering = (ev_type == CMARK_EVENT_ENTER);
174 const char *info, *code, *title;
175 size_t info_len, code_len;
176 char listmarker[LISTMARKER_SIZE];
177 char *emph_delim;
178 bool first_in_list_item;
179 bufsize_t marker_width;
180 bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options) &&
181 !(CMARK_OPT_HARDBREAKS & options);
182
183 // Don't adjust tight list status til we've started the list.
184 // Otherwise we loose the blank line between a paragraph and
185 // a following list.
186 if (!(node->type == CMARK_NODE_ITEM && node->prev == NULL && entering)) {
187 tmp = get_containing_block(node);
188 renderer->in_tight_list_item =
189 tmp && // tmp might be NULL if there is no containing block
190 ((tmp->type == CMARK_NODE_ITEM &&
191 cmark_node_get_list_tight(tmp->parent)) ||
192 (tmp && tmp->parent && tmp->parent->type == CMARK_NODE_ITEM &&
193 cmark_node_get_list_tight(tmp->parent->parent)));
194 }
195
196 if (node->extension && node->extension->commonmark_render_func) {
197 node->extension->commonmark_render_func(node->extension, renderer, node, ev_type, options);
198 return 1;
199 }
200
201 switch (node->type) {
202 case CMARK_NODE_DOCUMENT:
203 break;
204
205 case CMARK_NODE_BLOCK_QUOTE:
206 if (entering) {
207 LIT("> ");
208 renderer->begin_content = true;
209 cmark_strbuf_puts(renderer->prefix, "> ");
210 } else {
211 cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 2);
212 BLANKLINE();
213 }
214 break;
215
216 case CMARK_NODE_LIST:
217 if (!entering && node->next && (node->next->type == CMARK_NODE_CODE_BLOCK ||
218 node->next->type == CMARK_NODE_LIST)) {
219 // this ensures that a following indented code block or list will be
220 // inteprereted correctly.
221 CR();
222 LIT("<!-- end list -->");
223 BLANKLINE();
224 }
225 break;
226
227 case CMARK_NODE_ITEM:
228 if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) {
229 marker_width = 4;
230 } else {
231 list_number = cmark_node_get_list_start(node->parent);
232 list_delim = cmark_node_get_list_delim(node->parent);
233 tmp = node;
234 while (tmp->prev) {
235 tmp = tmp->prev;
236 list_number += 1;
237 }
238 // we ensure a width of at least 4 so
239 // we get nice transition from single digits
240 // to double
241 snprintf(listmarker, LISTMARKER_SIZE, "%d%s%s", list_number,
242 list_delim == CMARK_PAREN_DELIM ? ")" : ".",
243 list_number < 10 ? " " : " ");
244 marker_width = (bufsize_t)strlen(listmarker);
245 }
246 if (entering) {
247 if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) {
248 LIT(" - ");
249 renderer->begin_content = true;
250 } else {
251 LIT(listmarker);
252 renderer->begin_content = true;
253 }
254 for (i = marker_width; i--;) {
255 cmark_strbuf_putc(renderer->prefix, ' ');
256 }
257 } else {
258 cmark_strbuf_truncate(renderer->prefix,
259 renderer->prefix->size - marker_width);
260 CR();
261 }
262 break;
263
264 case CMARK_NODE_HEADING:
265 if (entering) {
266 for (i = cmark_node_get_heading_level(node); i > 0; i--) {
267 LIT("#");
268 }
269 LIT(" ");
270 renderer->begin_content = true;
271 renderer->no_linebreaks = true;
272 } else {
273 renderer->no_linebreaks = false;
274 BLANKLINE();
275 }
276 break;
277
278 case CMARK_NODE_CODE_BLOCK:
279 first_in_list_item = node->prev == NULL && node->parent &&
280 node->parent->type == CMARK_NODE_ITEM;
281
282 if (!first_in_list_item) {
283 BLANKLINE();
284 }
285 info = cmark_node_get_fence_info(node);
286 info_len = strlen(info);
287 code = cmark_node_get_literal(node);
288 code_len = strlen(code);
289 // use indented form if no info, and code doesn't
290 // begin or end with a blank line, and code isn't
291 // first thing in a list item
292 if (info_len == 0 && (code_len > 2 && !cmark_isspace(code[0]) &&
293 !(cmark_isspace(code[code_len - 1]) &&
294 cmark_isspace(code[code_len - 2]))) &&
295 !first_in_list_item) {
296 LIT(" ");
297 cmark_strbuf_puts(renderer->prefix, " ");
298 OUT(cmark_node_get_literal(node), false, LITERAL);
299 cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 4);
300 } else {
301 numticks = longest_backtick_sequence(code) + 1;
302 if (numticks < 3) {
303 numticks = 3;
304 }
305 for (i = 0; i < numticks; i++) {
306 LIT("`");
307 }
308 LIT(" ");
309 OUT(info, false, LITERAL);
310 CR();
311 OUT(cmark_node_get_literal(node), false, LITERAL);
312 CR();
313 for (i = 0; i < numticks; i++) {
314 LIT("`");
315 }
316 }
317 BLANKLINE();
318 break;
319
320 case CMARK_NODE_HTML_BLOCK:
321 BLANKLINE();
322 OUT(cmark_node_get_literal(node), false, LITERAL);
323 BLANKLINE();
324 break;
325
326 case CMARK_NODE_CUSTOM_BLOCK:
327 BLANKLINE();
328 OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
329 false, LITERAL);
330 BLANKLINE();
331 break;
332
333 case CMARK_NODE_THEMATIC_BREAK:
334 BLANKLINE();
335 LIT("-----");
336 BLANKLINE();
337 break;
338
339 case CMARK_NODE_PARAGRAPH:
340 if (!entering) {
341 BLANKLINE();
342 }
343 break;
344
345 case CMARK_NODE_TEXT:
346 OUT(cmark_node_get_literal(node), allow_wrap, NORMAL);
347 break;
348
349 case CMARK_NODE_LINEBREAK:
350 if (!(CMARK_OPT_HARDBREAKS & options)) {
351 LIT(" ");
352 }
353 CR();
354 break;
355
356 case CMARK_NODE_SOFTBREAK:
357 if (CMARK_OPT_HARDBREAKS & options) {
358 LIT(" ");
359 CR();
360 } else if (!renderer->no_linebreaks && renderer->width == 0 &&
361 !(CMARK_OPT_HARDBREAKS & options) &&
362 !(CMARK_OPT_NOBREAKS & options)) {
363 CR();
364 } else {
365 OUT(" ", allow_wrap, LITERAL);
366 }
367 break;
368
369 case CMARK_NODE_CODE:
370 code = cmark_node_get_literal(node);
371 code_len = strlen(code);
372 numticks = shortest_unused_backtick_sequence(code);
373 extra_spaces = code_len == 0 ||
374 code[0] == '`' || code[code_len - 1] == '`' ||
375 code[0] == ' ' || code[code_len - 1] == ' ';
376 for (i = 0; i < numticks; i++) {
377 LIT("`");
378 }
379 if (extra_spaces) {
380 LIT(" ");
381 }
382 OUT(cmark_node_get_literal(node), allow_wrap, LITERAL);
383 if (extra_spaces) {
384 LIT(" ");
385 }
386 for (i = 0; i < numticks; i++) {
387 LIT("`");
388 }
389 break;
390
391 case CMARK_NODE_HTML_INLINE:
392 OUT(cmark_node_get_literal(node), false, LITERAL);
393 break;
394
395 case CMARK_NODE_CUSTOM_INLINE:
396 OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
397 false, LITERAL);
398 break;
399
400 case CMARK_NODE_STRONG:
401 if (entering) {
402 LIT("**");
403 } else {
404 LIT("**");
405 }
406 break;
407
408 case CMARK_NODE_EMPH:
409 // If we have EMPH(EMPH(x)), we need to use *_x_*
410 // because **x** is STRONG(x):
411 if (node->parent && node->parent->type == CMARK_NODE_EMPH &&
412 node->next == NULL && node->prev == NULL) {
413 emph_delim = "_";
414 } else {
415 emph_delim = "*";
416 }
417 if (entering) {
418 LIT(emph_delim);
419 } else {
420 LIT(emph_delim);
421 }
422 break;
423
424 case CMARK_NODE_LINK:
425 if (is_autolink(node)) {
426 if (entering) {
427 LIT("<");
428 if (strncmp(cmark_node_get_url(node), "mailto:", 7) == 0) {
429 LIT((const char *)cmark_node_get_url(node) + 7);
430 } else {
431 LIT((const char *)cmark_node_get_url(node));
432 }
433 LIT(">");
434 // return signal to skip contents of node...
435 return 0;
436 }
437 } else {
438 if (entering) {
439 LIT("[");
440 } else {
441 LIT("](");
442 OUT(cmark_node_get_url(node), false, URL);
443 title = cmark_node_get_title(node);
444 if (strlen(title) > 0) {
445 LIT(" \"");
446 OUT(title, false, TITLE);
447 LIT("\"");
448 }
449 LIT(")");
450 }
451 }
452 break;
453
454 case CMARK_NODE_IMAGE:
455 if (entering) {
456 LIT("![");
457 } else {
458 LIT("](");
459 OUT(cmark_node_get_url(node), false, URL);
460 title = cmark_node_get_title(node);
461 if (strlen(title) > 0) {
462 OUT(" \"", allow_wrap, LITERAL);
463 OUT(title, false, TITLE);
464 LIT("\"");
465 }
466 LIT(")");
467 }
468 break;
469
470 case CMARK_NODE_FOOTNOTE_REFERENCE:
471 if (entering) {
472 LIT("[^");
473 OUT(cmark_chunk_to_cstr(renderer->mem, &node->as.literal), false, LITERAL);
474 LIT("]");
475 }
476 break;
477
478 case CMARK_NODE_FOOTNOTE_DEFINITION:
479 if (entering) {
480 renderer->footnote_ix += 1;
481 LIT("[^");
482 char n[32];
483 snprintf(n, sizeof(n), "%d", renderer->footnote_ix);
484 OUT(n, false, LITERAL);
485 LIT("]:\n");
486
487 cmark_strbuf_puts(renderer->prefix, " ");
488 } else {
489 cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 4);
490 }
491 break;
492
493 default:
494 assert(false);
495 break;
496 }
497
498 return 1;
499 }
500
cmark_render_commonmark(cmark_node * root,int options,int width)501 char *cmark_render_commonmark(cmark_node *root, int options, int width) {
502 return cmark_render_commonmark_with_mem(root, options, width, cmark_node_mem(root));
503 }
504
cmark_render_commonmark_with_mem(cmark_node * root,int options,int width,cmark_mem * mem)505 char *cmark_render_commonmark_with_mem(cmark_node *root, int options, int width, cmark_mem *mem) {
506 if (options & CMARK_OPT_HARDBREAKS) {
507 // disable breaking on width, since it has
508 // a different meaning with OPT_HARDBREAKS
509 width = 0;
510 }
511 return cmark_render(mem, root, options, width, outc, S_render_node);
512 }
513