1 #include "document.h"
2
3 #include <assert.h>
4 #include <string.h>
5 #include <ctype.h>
6 #include <stdio.h>
7 #include <sys/types.h>
8 #include <sys/stat.h>
9 #include <unistd.h>
10
11 #include "stack.h"
12
13 #ifndef _MSC_VER
14 #include <strings.h>
15 #else
16 #define strncasecmp _strnicmp
17 #endif
18
19 #define REF_TABLE_SIZE 8
20
21 #define BUFFER_BLOCK 0
22 #define BUFFER_SPAN 1
23
24 #define HOEDOWN_LI_END 8 /* internal list flag */
25
26 const char *hoedown_find_block_tag(const char *str, unsigned int len);
27 int find_ref(reference * refs, char*id, int *counter);
28
29 /***************
30 * LOCAL TYPES *
31 ***************/
32
33 /* link_ref: reference to a link */
34 struct link_ref {
35 unsigned int id;
36
37 hoedown_buffer *link;
38 hoedown_buffer *title;
39
40 struct link_ref *next;
41 };
42
43 /* footnote_ref: reference to a footnote */
44 struct footnote_ref {
45 unsigned int id;
46
47 int is_used;
48 unsigned int num;
49
50 hoedown_buffer *contents;
51 };
52
53 /* footnote_item: an item in a footnote_list */
54 struct footnote_item {
55 struct footnote_ref *ref;
56 struct footnote_item *next;
57 };
58
59 /* footnote_list: linked list of footnote_item */
60 struct footnote_list {
61 unsigned int count;
62 struct footnote_item *head;
63 struct footnote_item *tail;
64 };
65
66 /* char_trigger: function pointer to render active chars */
67 /* returns the number of chars taken care of */
68 /* data is the pointer of the beginning of the span */
69 /* offset is the number of valid chars before data */
70 typedef size_t
71 (*char_trigger)(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
72
73 static size_t char_emphasis(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
74 static size_t char_quote(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
75 static size_t char_linebreak(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
76 static size_t char_codespan(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
77 static size_t char_escape(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
78 static size_t char_entity(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
79 static size_t char_langle_tag(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
80 static size_t char_autolink_url(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
81 static size_t char_autolink_email(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
82 static size_t char_autolink_www(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
83 static size_t char_link(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
84 static size_t char_image(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
85 static size_t char_superscript(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
86 static size_t char_math(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
87 static size_t char_ref(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
88
89 void sub_render(hoedown_document *doc, hoedown_buffer *ob, const uint8_t *data, size_t size, int position);
90
91 enum markdown_char_t {
92 MD_CHAR_NONE = 0,
93 MD_CHAR_EMPHASIS,
94 MD_CHAR_CODESPAN,
95 MD_CHAR_LINEBREAK,
96 MD_CHAR_LINK,
97 MD_CHAR_IMAGE,
98 MD_CHAR_LANGLE,
99 MD_CHAR_ESCAPE,
100 MD_CHAR_ENTITY,
101 MD_CHAR_AUTOLINK_URL,
102 MD_CHAR_AUTOLINK_EMAIL,
103 MD_CHAR_AUTOLINK_WWW,
104 MD_CHAR_SUPERSCRIPT,
105 MD_CHAR_QUOTE,
106 MD_CHAR_MATH,
107 MD_CHAR_REF
108 };
109
110 static char_trigger markdown_char_ptrs[] = {
111 NULL,
112 &char_emphasis,
113 &char_codespan,
114 &char_linebreak,
115 &char_link,
116 &char_image,
117 &char_langle_tag,
118 &char_escape,
119 &char_entity,
120 &char_autolink_url,
121 &char_autolink_email,
122 &char_autolink_www,
123 &char_superscript,
124 &char_quote,
125 &char_math,
126 &char_ref
127 };
128
129 struct hoedown_document {
130 hoedown_renderer md;
131 hoedown_renderer_data data;
132 metadata * document_metadata;
133 reference * floating_references;
134 ext_definition * extensions;
135 toc * table_of_contents;
136 h_counter counter;
137
138 char * base_folder;
139
140 struct link_ref *refs[REF_TABLE_SIZE];
141 struct footnote_list footnotes_found;
142 struct footnote_list footnotes_used;
143 uint8_t active_char[256];
144 hoedown_stack work_bufs[2];
145 hoedown_extensions ext_flags;
146 size_t max_nesting;
147 int in_link_body;
148 };
149
150 /***************************
151 * HELPER FUNCTIONS *
152 ***************************/
153
154 static int
startsWith(char * pre,char * str)155 startsWith(char *pre, char *str)
156 {
157 if (!pre || !str)
158 return 0;
159 size_t lenpre = strlen(pre),
160 lenstr = strlen(str);
161 return lenstr < lenpre ? 0 : strncmp(pre, str, lenpre) == 0;
162 }
163
164 int
is_separator(uint8_t chr)165 is_separator(uint8_t chr)
166 {
167 return chr == ' ' || chr == '(' || chr == '\t' || chr == '\n';
168 }
169
170 static int
is_regular_file(const char * path,char * base_folder)171 is_regular_file(const char *path, char * base_folder)
172 {
173 if (path[0] != '/') {
174 char *cwd;
175
176 if (base_folder != NULL) {
177 int n1 = strlen(base_folder);
178 int n2 = strlen(path);
179 int n = n1 + n2 + 2;
180 cwd = malloc(n*sizeof(char));
181 cwd[n-1] = 0;
182 memcpy(cwd, base_folder, n1);
183 cwd[n1] = '/';
184 memcpy(cwd+n1+1, path, n2);
185 } else {
186 cwd = malloc(256*sizeof(char));
187 memset(cwd, 0, 256);
188 getcwd(cwd, 256);
189 strcat(cwd, "/");
190 strcat(cwd, path);
191 }
192 struct stat path_stat;
193 stat(cwd, &path_stat);
194 free(cwd);
195 return S_ISREG(path_stat.st_mode);
196 }
197
198 struct stat path_stat;
199 stat(path, &path_stat);
200 return S_ISREG(path_stat.st_mode);
201 }
202
203 static hoedown_buffer *
newbuf(hoedown_document * doc,int type)204 newbuf(hoedown_document *doc, int type)
205 {
206 static const size_t buf_size[2] = {256, 64};
207 hoedown_buffer *work = NULL;
208 hoedown_stack *pool = &doc->work_bufs[type];
209
210 if (pool->size < pool->asize &&
211 pool->item[pool->size] != NULL) {
212 work = pool->item[pool->size++];
213 work->size = 0;
214 } else {
215 work = hoedown_buffer_new(buf_size[type]);
216 hoedown_stack_push(pool, work);
217 }
218
219 return work;
220 }
221
222 static void
popbuf(hoedown_document * doc,int type)223 popbuf(hoedown_document *doc, int type)
224 {
225 doc->work_bufs[type].size--;
226 }
227
228 static void
unscape_text(hoedown_buffer * ob,hoedown_buffer * src)229 unscape_text(hoedown_buffer *ob, hoedown_buffer *src)
230 {
231 size_t i = 0, org;
232 while (i < src->size) {
233 org = i;
234 while (i < src->size && src->data[i] != '\\')
235 i++;
236
237 if (i > org)
238 hoedown_buffer_put(ob, src->data + org, i - org);
239
240 if (i + 1 >= src->size)
241 break;
242
243 hoedown_buffer_putc(ob, src->data[i + 1]);
244 i += 2;
245 }
246 }
247
248 static unsigned int
hash_link_ref(const uint8_t * link_ref,size_t length)249 hash_link_ref(const uint8_t *link_ref, size_t length)
250 {
251 size_t i;
252 unsigned int hash = 0;
253
254 for (i = 0; i < length; ++i)
255 hash = tolower(link_ref[i]) + (hash << 6) + (hash << 16) - hash;
256
257 return hash;
258 }
259
260 static struct link_ref *
add_link_ref(struct link_ref ** references,const uint8_t * name,size_t name_size)261 add_link_ref(
262 struct link_ref **references,
263 const uint8_t *name, size_t name_size)
264 {
265 struct link_ref *ref = hoedown_calloc(1, sizeof(struct link_ref));
266
267 ref->id = hash_link_ref(name, name_size);
268 ref->next = references[ref->id % REF_TABLE_SIZE];
269
270 references[ref->id % REF_TABLE_SIZE] = ref;
271 return ref;
272 }
273
274 static struct link_ref *
find_link_ref(struct link_ref ** references,uint8_t * name,size_t length)275 find_link_ref(struct link_ref **references, uint8_t *name, size_t length)
276 {
277 unsigned int hash = hash_link_ref(name, length);
278 struct link_ref *ref = NULL;
279
280 ref = references[hash % REF_TABLE_SIZE];
281
282 while (ref != NULL) {
283 if (ref->id == hash)
284 return ref;
285
286 ref = ref->next;
287 }
288
289 return NULL;
290 }
291
292 static void
free_link_refs(struct link_ref ** references)293 free_link_refs(struct link_ref **references)
294 {
295 size_t i;
296
297 for (i = 0; i < REF_TABLE_SIZE; ++i) {
298 struct link_ref *r = references[i];
299 struct link_ref *next;
300
301 while (r) {
302 next = r->next;
303 hoedown_buffer_free(r->link);
304 hoedown_buffer_free(r->title);
305 free(r);
306 r = next;
307 }
308 }
309 }
310
311 static struct footnote_ref *
create_footnote_ref(struct footnote_list * list,const uint8_t * name,size_t name_size)312 create_footnote_ref(struct footnote_list *list, const uint8_t *name, size_t name_size)
313 {
314 struct footnote_ref *ref = hoedown_calloc(1, sizeof(struct footnote_ref));
315 ref->id = hash_link_ref(name, name_size);
316
317 return ref;
318 }
319
320 static int
add_footnote_ref(struct footnote_list * list,struct footnote_ref * ref)321 add_footnote_ref(struct footnote_list *list, struct footnote_ref *ref)
322 {
323 struct footnote_item *item = hoedown_calloc(1, sizeof(struct footnote_item));
324 if (!item)
325 return 0;
326 item->ref = ref;
327
328 if (list->head == NULL) {
329 list->head = list->tail = item;
330 } else {
331 list->tail->next = item;
332 list->tail = item;
333 }
334 list->count++;
335
336 return 1;
337 }
338
339 static struct footnote_ref *
find_footnote_ref(struct footnote_list * list,uint8_t * name,size_t length)340 find_footnote_ref(struct footnote_list *list, uint8_t *name, size_t length)
341 {
342 unsigned int hash = hash_link_ref(name, length);
343 struct footnote_item *item = NULL;
344
345 item = list->head;
346
347 while (item != NULL) {
348 if (item->ref->id == hash)
349 return item->ref;
350 item = item->next;
351 }
352
353 return NULL;
354 }
355
356 static void
free_footnote_ref(struct footnote_ref * ref)357 free_footnote_ref(struct footnote_ref *ref)
358 {
359 hoedown_buffer_free(ref->contents);
360 free(ref);
361 }
362
363 static void
free_footnote_list(struct footnote_list * list,int free_refs)364 free_footnote_list(struct footnote_list *list, int free_refs)
365 {
366 struct footnote_item *item = list->head;
367 struct footnote_item *next;
368
369 while (item) {
370 next = item->next;
371 if (free_refs)
372 free_footnote_ref(item->ref);
373 free(item);
374 item = next;
375 }
376 }
377
378
379 /*
380 * Check whether a char is a Markdown spacing char.
381
382 * Right now we only consider spaces the actual
383 * space and a newline: tabs and carriage returns
384 * are filtered out during the preprocessing phase.
385 *
386 * If we wanted to actually be UTF-8 compliant, we
387 * should instead extract an Unicode codepoint from
388 * this character and check for space properties.
389 */
390 static int
_isspace(int c)391 _isspace(int c)
392 {
393 return c == ' ' || c == '\n';
394 }
395
396 /* is_empty_all: verify that all the data is spacing */
397 static int
is_empty_all(const uint8_t * data,size_t size)398 is_empty_all(const uint8_t *data, size_t size)
399 {
400 size_t i = 0;
401 while (i < size && _isspace(data[i])) i++;
402 return i == size;
403 }
404
405 /*
406 * Replace all spacing characters in data with spaces. As a special
407 * case, this collapses a newline with the previous space, if possible.
408 */
409 static void
replace_spacing(hoedown_buffer * ob,const uint8_t * data,size_t size)410 replace_spacing(hoedown_buffer *ob, const uint8_t *data, size_t size)
411 {
412 size_t i = 0, mark;
413 hoedown_buffer_grow(ob, size);
414 while (1) {
415 mark = i;
416 while (i < size && data[i] != '\n') i++;
417 hoedown_buffer_put(ob, data + mark, i - mark);
418
419 if (i >= size) break;
420
421 if (!(i > 0 && data[i-1] == ' '))
422 hoedown_buffer_putc(ob, ' ');
423 i++;
424 }
425 }
426
427 /****************************
428 * INLINE PARSING FUNCTIONS *
429 ****************************/
430
431 /* is_mail_autolink • looks for the address part of a mail autolink and '>' */
432 /* this is less strict than the original markdown e-mail address matching */
433 static size_t
is_mail_autolink(uint8_t * data,size_t size)434 is_mail_autolink(uint8_t *data, size_t size)
435 {
436 size_t i = 0, nb = 0;
437
438 /* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */
439 for (i = 0; i < size; ++i) {
440 if (isalnum(data[i]))
441 continue;
442
443 switch (data[i]) {
444 case '@':
445 nb++;
446
447 case '-':
448 case '.':
449 case '_':
450 break;
451
452 case '>':
453 return (nb == 1) ? i + 1 : 0;
454
455 default:
456 return 0;
457 }
458 }
459
460 return 0;
461 }
462
463 /* tag_length • returns the length of the given tag, or 0 is it's not valid */
464 static size_t
tag_length(uint8_t * data,size_t size,hoedown_autolink_type * autolink)465 tag_length(uint8_t *data, size_t size, hoedown_autolink_type *autolink)
466 {
467 size_t i, j;
468
469 /* a valid tag can't be shorter than 3 chars */
470 if (size < 3) return 0;
471
472 if (data[0] != '<') return 0;
473
474 /* HTML comment, laxist form */
475 if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') {
476 i = 5;
477
478 while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>'))
479 i++;
480
481 i++;
482
483 if (i <= size)
484 return i;
485 }
486
487 /* begins with a '<' optionally followed by '/', followed by letter or number */
488 i = (data[1] == '/') ? 2 : 1;
489
490 if (!isalnum(data[i]))
491 return 0;
492
493 /* scheme test */
494 *autolink = HOEDOWN_AUTOLINK_NONE;
495
496 /* try to find the beginning of an URI */
497 while (i < size && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-'))
498 i++;
499
500 if (i > 1 && data[i] == '@') {
501 if ((j = is_mail_autolink(data + i, size - i)) != 0) {
502 *autolink = HOEDOWN_AUTOLINK_EMAIL;
503 return i + j;
504 }
505 }
506
507 if (i > 2 && data[i] == ':') {
508 *autolink = HOEDOWN_AUTOLINK_NORMAL;
509 i++;
510 }
511
512 /* completing autolink test: no spacing or ' or " */
513 if (i >= size)
514 *autolink = HOEDOWN_AUTOLINK_NONE;
515
516 else if (*autolink) {
517 j = i;
518
519 while (i < size) {
520 if (data[i] == '\\') i += 2;
521 else if (data[i] == '>' || data[i] == '\'' ||
522 data[i] == '"' || data[i] == ' ' || data[i] == '\n')
523 break;
524 else i++;
525 }
526
527 if (i >= size) return 0;
528 if (i > j && data[i] == '>') return i + 1;
529 /* one of the forbidden chars has been found */
530 *autolink = HOEDOWN_AUTOLINK_NONE;
531 }
532
533 /* looking for something looking like a tag end */
534 while (i < size && data[i] != '>') i++;
535 if (i >= size) return 0;
536 return i + 1;
537 }
538
539 /* parse_inline • parses inline markdown elements */
540 static void
parse_inline(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t size)541 parse_inline(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
542 {
543 size_t i = 0, end = 0, consumed = 0;
544 hoedown_buffer work = { 0, 0, 0, 0, NULL, NULL, NULL };
545 uint8_t *active_char = doc->active_char;
546
547 if (doc->work_bufs[BUFFER_SPAN].size +
548 doc->work_bufs[BUFFER_BLOCK].size > doc->max_nesting)
549 return;
550
551 while (i < size) {
552 /* copying inactive chars into the output */
553 while (end < size && active_char[data[end]] == 0)
554 end++;
555
556 if (doc->md.normal_text) {
557 work.data = data + i;
558 work.size = end - i;
559 doc->md.normal_text(ob, &work, &doc->data);
560 }
561 else
562 hoedown_buffer_put(ob, data + i, end - i);
563
564 if (end >= size) break;
565 i = end;
566
567 end = markdown_char_ptrs[ (int)active_char[data[end]] ](ob, doc, data + i, i - consumed, size - i);
568 if (!end) /* no action from the callback */
569 end = i + 1;
570 else {
571 i += end;
572 end = i;
573 consumed = i;
574 }
575 }
576 }
577
578 /* is_escaped • returns whether special char at data[loc] is escaped by '\\' */
579 static int
is_escaped(uint8_t * data,size_t loc)580 is_escaped(uint8_t *data, size_t loc)
581 {
582 size_t i = loc;
583 while (i >= 1 && data[i - 1] == '\\')
584 i--;
585
586 /* odd numbers of backslashes escapes data[loc] */
587 return (loc - i) % 2;
588 }
589
590 /* find_emph_char • looks for the next emph uint8_t, skipping other constructs */
591 static size_t
find_emph_char(uint8_t * data,size_t size,uint8_t c)592 find_emph_char(uint8_t *data, size_t size, uint8_t c)
593 {
594 size_t i = 0;
595
596 while (i < size) {
597 while (i < size && data[i] != c && data[i] != '[' && data[i] != '`')
598 i++;
599
600 if (i == size)
601 return 0;
602
603 /* not counting escaped chars */
604 if (is_escaped(data, i)) {
605 i++; continue;
606 }
607
608 if (data[i] == c)
609 return i;
610
611 /* skipping a codespan */
612 if (data[i] == '`') {
613 size_t span_nb = 0, bt;
614 size_t tmp_i = 0;
615
616 /* counting the number of opening backticks */
617 while (i < size && data[i] == '`') {
618 i++; span_nb++;
619 }
620
621 if (i >= size) return 0;
622
623 /* finding the matching closing sequence */
624 bt = 0;
625 while (i < size && bt < span_nb) {
626 if (!tmp_i && data[i] == c) tmp_i = i;
627 if (data[i] == '`') bt++;
628 else bt = 0;
629 i++;
630 }
631
632 /* not a well-formed codespan; use found matching emph char */
633 if (bt < span_nb && i >= size) return tmp_i;
634 }
635 /* skipping a link */
636 else if (data[i] == '[') {
637 size_t tmp_i = 0;
638 uint8_t cc;
639
640 i++;
641 while (i < size && data[i] != ']') {
642 if (!tmp_i && data[i] == c) tmp_i = i;
643 i++;
644 }
645
646 i++;
647 while (i < size && _isspace(data[i]))
648 i++;
649
650 if (i >= size)
651 return tmp_i;
652
653 switch (data[i]) {
654 case '[':
655 cc = ']'; break;
656
657 case '(':
658 cc = ')'; break;
659
660 default:
661 if (tmp_i)
662 return tmp_i;
663 else
664 continue;
665 }
666
667 i++;
668 while (i < size && data[i] != cc) {
669 if (!tmp_i && data[i] == c) tmp_i = i;
670 i++;
671 }
672
673 if (i >= size)
674 return tmp_i;
675
676 i++;
677 }
678 }
679
680 return 0;
681 }
682
683 /* parse_emph1 • parsing single emphase */
684 /* closed by a symbol not preceded by spacing and not followed by symbol */
685 static size_t
parse_emph1(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t size,uint8_t c)686 parse_emph1(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, uint8_t c)
687 {
688 size_t i = 0, len;
689 hoedown_buffer *work = 0;
690 int r;
691
692 /* skipping one symbol if coming from emph3 */
693 if (size > 1 && data[0] == c && data[1] == c) i = 1;
694
695 while (i < size) {
696 len = find_emph_char(data + i, size - i, c);
697 if (!len) return 0;
698 i += len;
699 if (i >= size) return 0;
700
701 if (data[i] == c && !_isspace(data[i - 1])) {
702
703 if (doc->ext_flags & HOEDOWN_EXT_NO_INTRA_EMPHASIS) {
704 if (i + 1 < size && isalnum(data[i + 1]))
705 continue;
706 }
707
708 work = newbuf(doc, BUFFER_SPAN);
709 parse_inline(work, doc, data, i);
710
711 if (doc->ext_flags & HOEDOWN_EXT_UNDERLINE && c == '_')
712 r = doc->md.underline(ob, work, &doc->data);
713 else
714 r = doc->md.emphasis(ob, work, &doc->data);
715
716 popbuf(doc, BUFFER_SPAN);
717 return r ? i + 1 : 0;
718 }
719 }
720
721 return 0;
722 }
723
724 /* parse_emph2 • parsing single emphase */
725 static size_t
parse_emph2(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t size,uint8_t c)726 parse_emph2(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, uint8_t c)
727 {
728 size_t i = 0, len;
729 hoedown_buffer *work = 0;
730 int r;
731
732 while (i < size) {
733 len = find_emph_char(data + i, size - i, c);
734 if (!len) return 0;
735 i += len;
736
737 if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !_isspace(data[i - 1])) {
738 work = newbuf(doc, BUFFER_SPAN);
739 parse_inline(work, doc, data, i);
740
741 if (c == '~')
742 r = doc->md.strikethrough(ob, work, &doc->data);
743 else if (c == '=')
744 r = doc->md.highlight(ob, work, &doc->data);
745 else
746 r = doc->md.double_emphasis(ob, work, &doc->data);
747
748 popbuf(doc, BUFFER_SPAN);
749 return r ? i + 2 : 0;
750 }
751 i++;
752 }
753 return 0;
754 }
755
756 /* parse_emph3 • parsing single emphase */
757 /* finds the first closing tag, and delegates to the other emph */
758 static size_t
parse_emph3(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t size,uint8_t c)759 parse_emph3(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, uint8_t c)
760 {
761 size_t i = 0, len;
762 int r;
763
764 while (i < size) {
765 len = find_emph_char(data + i, size - i, c);
766 if (!len) return 0;
767 i += len;
768
769 /* skip spacing preceded symbols */
770 if (data[i] != c || _isspace(data[i - 1]))
771 continue;
772
773 if (i + 2 < size && data[i + 1] == c && data[i + 2] == c && doc->md.triple_emphasis) {
774 /* triple symbol found */
775 hoedown_buffer *work = newbuf(doc, BUFFER_SPAN);
776
777 parse_inline(work, doc, data, i);
778 r = doc->md.triple_emphasis(ob, work, &doc->data);
779 popbuf(doc, BUFFER_SPAN);
780 return r ? i + 3 : 0;
781
782 } else if (i + 1 < size && data[i + 1] == c) {
783 /* double symbol found, handing over to emph1 */
784 len = parse_emph1(ob, doc, data - 2, size + 2, c);
785 if (!len) return 0;
786 else return len - 2;
787
788 } else {
789 /* single symbol found, handing over to emph2 */
790 len = parse_emph2(ob, doc, data - 1, size + 1, c);
791 if (!len) return 0;
792 else return len - 1;
793 }
794 }
795 return 0;
796 }
797
798 /* parse_math • parses a math span until the given ending delimiter */
799 static size_t
parse_math(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t offset,size_t size,const char * end,size_t delimsz,int displaymode)800 parse_math(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size, const char *end, size_t delimsz, int displaymode)
801 {
802 hoedown_buffer text = { NULL, 0, 0, 0, NULL, NULL, NULL };
803 size_t i = delimsz;
804
805 if (!doc->md.math)
806 return 0;
807
808 /* find ending delimiter */
809 while (1) {
810 while (i < size && data[i] != (uint8_t)end[0])
811 i++;
812
813 if (i >= size)
814 return 0;
815
816 if (!is_escaped(data, i) && !(i + delimsz > size)
817 && memcmp(data + i, end, delimsz) == 0)
818 break;
819
820 i++;
821 }
822
823 /* prepare buffers */
824 text.data = data + delimsz;
825 text.size = i - delimsz;
826
827 /* if this is a $$ and MATH_EXPLICIT is not active,
828 * guess whether displaymode should be enabled from the context */
829 i += delimsz;
830 if (delimsz == 2 && !(doc->ext_flags & HOEDOWN_EXT_MATH_EXPLICIT))
831 displaymode = is_empty_all(data - offset, offset) && is_empty_all(data + i, size - i);
832
833 /* call callback */
834 if (doc->md.math(ob, &text, displaymode, &doc->data))
835 return i;
836
837 return 0;
838 }
839
840 static char*
load_file(const char * path,char * base_folder,size_t * size)841 load_file(const char* path, char* base_folder, size_t * size)
842 {
843 if (path == NULL)
844 return NULL;
845 FILE *f;
846 if (path[0] != '/') {
847 char *cwd;
848
849 if (base_folder != NULL) {
850 int n1 = strlen(base_folder);
851 int n2 = strlen(path);
852 int n = n1 + n2 + 2;
853 cwd = malloc(n*sizeof(char));
854 cwd[n-1] = 0;
855 memcpy(cwd, base_folder, n1);
856 cwd[n1] = '/';
857 memcpy(cwd+n1+1, path, n2);
858 } else {
859 cwd = malloc(128*sizeof(char));
860 memset(cwd, 0, 128);
861 getcwd(cwd, 128);
862 strcat(cwd, "/");
863 strcat(cwd, path);
864 }
865 f =fopen(cwd, "rb");
866 free(cwd);
867 }
868 else
869 f = fopen(path, "rb");
870
871 fseek(f, 0, SEEK_END);
872 *size = ftell(f);
873 fseek(f, 0, SEEK_SET);
874
875 char *string = malloc(*size + 1);
876 fread(string, *size, 1, f);
877 fclose(f);
878
879 string[*size] = 0;
880 return string;
881 }
882
883 static size_t
parse_include(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t offset,size_t size)884 parse_include(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
885 {
886 /* @include(path) */
887 size_t i = 9;
888 size_t n = 0;
889 for (;i < size; i++)
890 {
891 if (data[i] == ')')
892 {
893 break;
894 }
895 n++;
896 }
897 if (n){
898 char * path = malloc((n+1)*sizeof(uint8_t));
899 path[n] = 0;
900 memcpy(path, data+9, n);
901 if (is_regular_file(path, doc->base_folder)){
902 size_t neu_size = 0;
903 char * buffer = load_file(path, doc->base_folder, &neu_size);
904
905 sub_render(doc, ob, (uint8_t*)buffer, neu_size, 0);
906
907 }
908 free(path);
909 }
910 return i+1;
911 }
912
913
914 /* char_emphasis • single and double emphasis parsing */
915 static size_t
char_emphasis(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t offset,size_t size)916 char_emphasis(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
917 {
918 uint8_t c = data[0];
919 size_t ret;
920
921 if (doc->ext_flags & HOEDOWN_EXT_NO_INTRA_EMPHASIS) {
922 if (offset > 0 && !_isspace(data[-1]) && data[-1] != '>' && data[-1] != '(')
923 return 0;
924 }
925
926 if (size > 2 && data[1] != c) {
927 /* spacing cannot follow an opening emphasis;
928 * strikethrough and highlight only takes two characters '~~' */
929 if (c == '~' || c == '=' || _isspace(data[1]) || (ret = parse_emph1(ob, doc, data + 1, size - 1, c)) == 0)
930 return 0;
931
932 return ret + 1;
933 }
934
935 if (size > 3 && data[1] == c && data[2] != c) {
936 if (_isspace(data[2]) || (ret = parse_emph2(ob, doc, data + 2, size - 2, c)) == 0)
937 return 0;
938
939 return ret + 2;
940 }
941
942 if (size > 4 && data[1] == c && data[2] == c && data[3] != c) {
943 if (c == '~' || c == '=' || _isspace(data[3]) || (ret = parse_emph3(ob, doc, data + 3, size - 3, c)) == 0)
944 return 0;
945
946 return ret + 3;
947 }
948
949 return 0;
950 }
951
952
953 /* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */
954 static size_t
char_linebreak(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t offset,size_t size)955 char_linebreak(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
956 {
957 if (offset < 2 || data[-1] != ' ' || data[-2] != ' ')
958 return 0;
959
960 /* removing the last space from ob and rendering */
961 while (ob->size && ob->data[ob->size - 1] == ' ')
962 ob->size--;
963
964 return doc->md.linebreak(ob, &doc->data) ? 1 : 0;
965 }
966
967
968 /* char_codespan • '`' parsing a code span (assuming codespan != 0) */
969 static size_t
char_codespan(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t offset,size_t size)970 char_codespan(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
971 {
972 hoedown_buffer work = { NULL, 0, 0, 0, NULL, NULL, NULL };
973 size_t end, nb = 0, i, f_begin, f_end;
974
975 /* counting the number of backticks in the delimiter */
976 while (nb < size && data[nb] == '`')
977 nb++;
978
979 /* finding the next delimiter */
980 i = 0;
981 for (end = nb; end < size && i < nb; end++) {
982 if (data[end] == '`') i++;
983 else i = 0;
984 }
985
986 if (i < nb && end >= size)
987 return 0; /* no matching delimiter */
988
989 /* trimming outside spaces */
990 f_begin = nb;
991 while (f_begin < end && data[f_begin] == ' ')
992 f_begin++;
993
994 f_end = end - nb;
995 while (f_end > nb && data[f_end-1] == ' ')
996 f_end--;
997
998 /* real code span */
999 if (f_begin < f_end) {
1000 work.data = data + f_begin;
1001 work.size = f_end - f_begin;
1002
1003 if (!doc->md.codespan(ob, &work, &doc->data))
1004 end = 0;
1005 } else {
1006 if (!doc->md.codespan(ob, 0, &doc->data))
1007 end = 0;
1008 }
1009
1010 return end;
1011 }
1012
1013 /* char_quote • '"' parsing a quote */
1014 static size_t
char_quote(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t offset,size_t size)1015 char_quote(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1016 {
1017 size_t end, nq = 0, i, f_begin, f_end;
1018
1019 /* counting the number of quotes in the delimiter */
1020 while (nq < size && data[nq] == '"')
1021 nq++;
1022
1023 /* finding the next delimiter */
1024 end = nq;
1025 while (1) {
1026 i = end;
1027 end += find_emph_char(data + end, size - end, '"');
1028 if (end == i) return 0; /* no matching delimiter */
1029 i = end;
1030 while (end < size && data[end] == '"' && end - i < nq) end++;
1031 if (end - i >= nq) break;
1032 }
1033
1034 /* trimming outside spaces */
1035 f_begin = nq;
1036 while (f_begin < end && data[f_begin] == ' ')
1037 f_begin++;
1038
1039 f_end = end - nq;
1040 while (f_end > nq && data[f_end-1] == ' ')
1041 f_end--;
1042
1043 /* real quote */
1044 if (f_begin < f_end) {
1045 hoedown_buffer *work = newbuf(doc, BUFFER_SPAN);
1046 parse_inline(work, doc, data + f_begin, f_end - f_begin);
1047
1048 if (!doc->md.quote(ob, work, &doc->data))
1049 end = 0;
1050 popbuf(doc, BUFFER_SPAN);
1051 } else {
1052 if (!doc->md.quote(ob, 0, &doc->data))
1053 end = 0;
1054 }
1055
1056 return end;
1057 }
1058
1059
1060 /* char_escape • '\\' backslash escape */
1061 static size_t
char_escape(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t offset,size_t size)1062 char_escape(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1063 {
1064 static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>^~=\"$";
1065 hoedown_buffer work = { 0, 0, 0, 0, NULL, NULL, NULL };
1066 size_t w;
1067
1068 if (size > 1) {
1069 if (data[1] == '\\' && (doc->ext_flags & HOEDOWN_EXT_MATH) &&
1070 size > 2 && (data[2] == '(' || data[2] == '[')) {
1071 const char *end = (data[2] == '[') ? "\\\\]" : "\\\\)";
1072 w = parse_math(ob, doc, data, offset, size, end, 3, data[2] == '[');
1073 if (w) return w;
1074 }
1075
1076 if (strchr(escape_chars, data[1]) == NULL)
1077 return 0;
1078
1079 if (doc->md.normal_text) {
1080 work.data = data + 1;
1081 work.size = 1;
1082 doc->md.normal_text(ob, &work, &doc->data);
1083 }
1084 else hoedown_buffer_putc(ob, data[1]);
1085 } else if (size == 1) {
1086 if (doc->md.normal_text) {
1087 work.data = data;
1088 work.size = 1;
1089 doc->md.normal_text(ob, &work, &doc->data);
1090 }
1091 else hoedown_buffer_putc(ob, data[0]);
1092 }
1093
1094 return 2;
1095 }
1096
1097 /* char_entity • '&' escaped when it doesn't belong to an entity */
1098 /* valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; */
1099 static size_t
char_entity(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t offset,size_t size)1100 char_entity(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1101 {
1102 size_t end = 1;
1103 hoedown_buffer work = { 0, 0, 0, 0, NULL, NULL, NULL };
1104
1105 if (end < size && data[end] == '#')
1106 end++;
1107
1108 while (end < size && isalnum(data[end]))
1109 end++;
1110
1111 if (end < size && data[end] == ';')
1112 end++; /* real entity */
1113 else
1114 return 0; /* lone '&' */
1115
1116 if (doc->md.entity) {
1117 work.data = data;
1118 work.size = end;
1119 doc->md.entity(ob, &work, &doc->data);
1120 }
1121 else hoedown_buffer_put(ob, data, end);
1122
1123 return end;
1124 }
1125
1126 /* char_langle_tag • '<' when tags or autolinks are allowed */
1127 static size_t
char_langle_tag(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t offset,size_t size)1128 char_langle_tag(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1129 {
1130 hoedown_buffer work = { NULL, 0, 0, 0, NULL, NULL, NULL };
1131 hoedown_autolink_type altype = HOEDOWN_AUTOLINK_NONE;
1132 size_t end = tag_length(data, size, &altype);
1133 int ret = 0;
1134
1135 work.data = data;
1136 work.size = end;
1137
1138 if (end > 2) {
1139 if (doc->md.autolink && altype != HOEDOWN_AUTOLINK_NONE) {
1140 hoedown_buffer *u_link = newbuf(doc, BUFFER_SPAN);
1141 work.data = data + 1;
1142 work.size = end - 2;
1143 unscape_text(u_link, &work);
1144 ret = doc->md.autolink(ob, u_link, altype, &doc->data);
1145 popbuf(doc, BUFFER_SPAN);
1146 }
1147 else if (doc->md.raw_html)
1148 ret = doc->md.raw_html(ob, &work, &doc->data);
1149 }
1150
1151 if (!ret) return 0;
1152 else return end;
1153 }
1154
1155 static size_t
char_autolink_www(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t offset,size_t size)1156 char_autolink_www(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1157 {
1158 hoedown_buffer *link, *link_url, *link_text;
1159 size_t link_len, rewind;
1160
1161 if (!doc->md.link || doc->in_link_body)
1162 return 0;
1163
1164 link = newbuf(doc, BUFFER_SPAN);
1165
1166 if ((link_len = hoedown_autolink__www(&rewind, link, data, offset, size, HOEDOWN_AUTOLINK_SHORT_DOMAINS)) > 0) {
1167 link_url = newbuf(doc, BUFFER_SPAN);
1168 HOEDOWN_BUFPUTSL(link_url, "http://");
1169 hoedown_buffer_put(link_url, link->data, link->size);
1170
1171 if (ob->size > rewind)
1172 ob->size -= rewind;
1173 else
1174 ob->size = 0;
1175
1176 if (doc->md.normal_text) {
1177 link_text = newbuf(doc, BUFFER_SPAN);
1178 doc->md.normal_text(link_text, link, &doc->data);
1179 doc->md.link(ob, link_text, link_url, NULL, &doc->data);
1180 popbuf(doc, BUFFER_SPAN);
1181 } else {
1182 doc->md.link(ob, link, link_url, NULL, &doc->data);
1183 }
1184 popbuf(doc, BUFFER_SPAN);
1185 }
1186
1187 popbuf(doc, BUFFER_SPAN);
1188 return link_len;
1189 }
1190
1191 static size_t
char_ref(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t offset,size_t size)1192 char_ref(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1193 {
1194
1195 if (startsWith("(#", (char*)data)){
1196 size_t i;
1197 for (i=2; i < size; i++)
1198 {
1199 if (data[i]==')')
1200 break;
1201 }
1202 char * ref_id = malloc((i-1)*sizeof(char));
1203 ref_id[i-2] = 0;
1204 memcpy(ref_id, data+2, i-2);
1205 int count = 0;
1206 if (find_ref(doc->floating_references, ref_id, &count))
1207 {
1208 if (doc->md.ref)
1209 doc->md.ref(ob, ref_id, count);
1210 return i+1;
1211 } else {
1212 if (doc->md.ref)
1213 doc->md.ref(ob, ref_id, -1);
1214 return i+1;
1215 }
1216 }
1217 return 0;
1218 }
1219
1220 static size_t
char_autolink_email(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t offset,size_t size)1221 char_autolink_email(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1222 {
1223
1224 if (startsWith("@include(", (char*)data))
1225 {
1226 return parse_include(ob, doc, data, offset, size);
1227 }
1228 if (startsWith("@\\", (char*)data) && is_separator(data[2])){
1229 if (doc->md.linebreak)
1230 {
1231 doc->md.linebreak(ob, &doc->data);
1232 }
1233 return 3;
1234 }
1235 if (startsWith("@pagebreak", (char*)data))
1236 {
1237 if (doc->md.pagebreak)
1238 {
1239 doc->md.pagebreak(ob);
1240 }
1241 return 10;
1242 }
1243 if (startsWith("@caption(", (char*)data))
1244 {
1245 /** skip it **/
1246 size_t i;
1247 for (i=9; data[i] != '\n' && i < size; i++){
1248 if (data[i] == ')' && data[i-1] != '\\')
1249 break;
1250 }
1251 return i+1;
1252 }
1253 hoedown_buffer *link;
1254 size_t link_len, rewind;
1255
1256 if (!doc->md.autolink || doc->in_link_body)
1257 return 0;
1258
1259 link = newbuf(doc, BUFFER_SPAN);
1260
1261 if ((link_len = hoedown_autolink__email(&rewind, link, data, offset, size, 0)) > 0) {
1262 if (ob->size > rewind)
1263 ob->size -= rewind;
1264 else
1265 ob->size = 0;
1266
1267 doc->md.autolink(ob, link, HOEDOWN_AUTOLINK_EMAIL, &doc->data);
1268 }
1269
1270 popbuf(doc, BUFFER_SPAN);
1271 return link_len;
1272 }
1273
1274 static size_t
char_autolink_url(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t offset,size_t size)1275 char_autolink_url(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1276 {
1277 hoedown_buffer *link;
1278 size_t link_len, rewind;
1279
1280 if (!doc->md.autolink || doc->in_link_body)
1281 return 0;
1282
1283 link = newbuf(doc, BUFFER_SPAN);
1284
1285 if ((link_len = hoedown_autolink__url(&rewind, link, data, offset, size, 0)) > 0) {
1286 if (ob->size > rewind)
1287 ob->size -= rewind;
1288 else
1289 ob->size = 0;
1290
1291 doc->md.autolink(ob, link, HOEDOWN_AUTOLINK_NORMAL, &doc->data);
1292 }
1293
1294 popbuf(doc, BUFFER_SPAN);
1295 return link_len;
1296 }
1297
1298 static size_t
char_image(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t offset,size_t size)1299 char_image(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size) {
1300 size_t ret;
1301
1302 if (size < 2 || data[1] != '[') return 0;
1303
1304 ret = char_link(ob, doc, data + 1, offset + 1, size - 1);
1305 if (!ret) return 0;
1306 return ret + 1;
1307 }
1308
1309 /* char_link • '[': parsing a link, a footnote or an image */
1310 static size_t
char_link(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t offset,size_t size)1311 char_link(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1312 {
1313 int is_img = (offset && data[-1] == '!' && !is_escaped(data - offset, offset - 1));
1314 int is_footnote = (doc->ext_flags & HOEDOWN_EXT_FOOTNOTES && data[1] == '^');
1315 size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0;
1316 hoedown_buffer *content = NULL;
1317 hoedown_buffer *link = NULL;
1318 hoedown_buffer *title = NULL;
1319 hoedown_buffer *u_link = NULL;
1320 size_t org_work_size = doc->work_bufs[BUFFER_SPAN].size;
1321 int ret = 0, in_title = 0, qtype = 0;
1322
1323 /* checking whether the correct renderer exists */
1324 if ((is_footnote && !doc->md.footnote_ref) || (is_img && !doc->md.image)
1325 || (!is_img && !is_footnote && !doc->md.link))
1326 goto cleanup;
1327
1328 /* looking for the matching closing bracket */
1329 i += find_emph_char(data + i, size - i, ']');
1330 txt_e = i;
1331
1332 if (i < size && data[i] == ']') i++;
1333 else goto cleanup;
1334
1335 /* footnote link */
1336 if (is_footnote) {
1337 hoedown_buffer id = { NULL, 0, 0, 0, NULL, NULL, NULL };
1338 struct footnote_ref *fr;
1339
1340 if (txt_e < 3)
1341 goto cleanup;
1342
1343 id.data = data + 2;
1344 id.size = txt_e - 2;
1345
1346 fr = find_footnote_ref(&doc->footnotes_found, id.data, id.size);
1347
1348 /* mark footnote used */
1349 if (fr && !fr->is_used) {
1350 if(!add_footnote_ref(&doc->footnotes_used, fr))
1351 goto cleanup;
1352 fr->is_used = 1;
1353 fr->num = doc->footnotes_used.count;
1354
1355 /* render */
1356 if (doc->md.footnote_ref)
1357 ret = doc->md.footnote_ref(ob, fr->num, &doc->data);
1358 } else if (doc->md.footnote_ref) {
1359 ret = doc->md.footnote_ref(ob, -1, &doc->data);
1360 }
1361
1362 goto cleanup;
1363 }
1364
1365 /* skip any amount of spacing */
1366 /* (this is much more laxist than original markdown syntax) */
1367 while (i < size && _isspace(data[i]))
1368 i++;
1369
1370 /* inline style link */
1371 if (i < size && data[i] == '(') {
1372 size_t nb_p;
1373
1374 /* skipping initial spacing */
1375 i++;
1376
1377 while (i < size && _isspace(data[i]))
1378 i++;
1379
1380 link_b = i;
1381
1382 /* looking for link end: ' " ) */
1383 /* Count the number of open parenthesis */
1384 nb_p = 0;
1385
1386 while (i < size) {
1387 if (data[i] == '\\') i += 2;
1388 else if (data[i] == '(' && i != 0) {
1389 nb_p++; i++;
1390 }
1391 else if (data[i] == ')') {
1392 if (nb_p == 0) break;
1393 else nb_p--;
1394 i++;
1395 } else if (i >= 1 && _isspace(data[i-1]) && (data[i] == '\'' || data[i] == '"')) break;
1396 else i++;
1397 }
1398
1399 if (i >= size) goto cleanup;
1400 link_e = i;
1401
1402 /* looking for title end if present */
1403 if (data[i] == '\'' || data[i] == '"') {
1404 qtype = data[i];
1405 in_title = 1;
1406 i++;
1407 title_b = i;
1408
1409 while (i < size) {
1410 if (data[i] == '\\') i += 2;
1411 else if (data[i] == qtype) {in_title = 0; i++;}
1412 else if ((data[i] == ')') && !in_title) break;
1413 else i++;
1414 }
1415
1416 if (i >= size) goto cleanup;
1417
1418 /* skipping spacing after title */
1419 title_e = i - 1;
1420 while (title_e > title_b && _isspace(data[title_e]))
1421 title_e--;
1422
1423 /* checking for closing quote presence */
1424 if (data[title_e] != '\'' && data[title_e] != '"') {
1425 title_b = title_e = 0;
1426 link_e = i;
1427 }
1428 }
1429
1430 /* remove spacing at the end of the link */
1431 while (link_e > link_b && _isspace(data[link_e - 1]))
1432 link_e--;
1433
1434 /* remove optional angle brackets around the link */
1435 if (data[link_b] == '<' && data[link_e - 1] == '>') {
1436 link_b++;
1437 link_e--;
1438 }
1439
1440 /* building escaped link and title */
1441 if (link_e > link_b) {
1442 link = newbuf(doc, BUFFER_SPAN);
1443 hoedown_buffer_put(link, data + link_b, link_e - link_b);
1444 }
1445
1446 if (title_e > title_b) {
1447 title = newbuf(doc, BUFFER_SPAN);
1448 hoedown_buffer_put(title, data + title_b, title_e - title_b);
1449 }
1450
1451 i++;
1452 }
1453
1454 /* reference style link */
1455 else if (i < size && data[i] == '[') {
1456 hoedown_buffer *id = newbuf(doc, BUFFER_SPAN);
1457 struct link_ref *lr;
1458
1459 /* looking for the id */
1460 i++;
1461 link_b = i;
1462 while (i < size && data[i] != ']') i++;
1463 if (i >= size) goto cleanup;
1464 link_e = i;
1465
1466 /* finding the link_ref */
1467 if (link_b == link_e)
1468 replace_spacing(id, data + 1, txt_e - 1);
1469 else
1470 hoedown_buffer_put(id, data + link_b, link_e - link_b);
1471
1472 lr = find_link_ref(doc->refs, id->data, id->size);
1473 if (!lr)
1474 goto cleanup;
1475
1476 /* keeping link and title from link_ref */
1477 link = lr->link;
1478 title = lr->title;
1479 i++;
1480 }
1481
1482 /* shortcut reference style link */
1483 else {
1484 hoedown_buffer *id = newbuf(doc, BUFFER_SPAN);
1485 struct link_ref *lr;
1486
1487 /* crafting the id */
1488 replace_spacing(id, data + 1, txt_e - 1);
1489
1490 /* finding the link_ref */
1491 lr = find_link_ref(doc->refs, id->data, id->size);
1492 if (!lr)
1493 goto cleanup;
1494
1495 /* keeping link and title from link_ref */
1496 link = lr->link;
1497 title = lr->title;
1498
1499 /* rewinding the spacing */
1500 i = txt_e + 1;
1501 }
1502
1503 /* building content: img alt is kept, only link content is parsed */
1504 if (txt_e > 1) {
1505 content = newbuf(doc, BUFFER_SPAN);
1506 if (is_img) {
1507 hoedown_buffer_put(content, data + 1, txt_e - 1);
1508 } else {
1509 /* disable autolinking when parsing inline the
1510 * content of a link */
1511 doc->in_link_body = 1;
1512 parse_inline(content, doc, data + 1, txt_e - 1);
1513 doc->in_link_body = 0;
1514 }
1515 }
1516
1517 if (link) {
1518 u_link = newbuf(doc, BUFFER_SPAN);
1519 unscape_text(u_link, link);
1520 }
1521
1522 /* calling the relevant rendering function */
1523 if (is_img) {
1524 ret = doc->md.image(ob, u_link, title, content, &doc->data);
1525 } else {
1526 ret = doc->md.link(ob, content, u_link, title, &doc->data);
1527 }
1528
1529 /* cleanup */
1530 cleanup:
1531 doc->work_bufs[BUFFER_SPAN].size = (int)org_work_size;
1532 return ret ? i : 0;
1533 }
1534
1535 static size_t
char_superscript(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t offset,size_t size)1536 char_superscript(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1537 {
1538 size_t sup_start, sup_len;
1539 hoedown_buffer *sup;
1540
1541 if (!doc->md.superscript)
1542 return 0;
1543
1544 if (size < 2)
1545 return 0;
1546
1547 if (data[1] == '(') {
1548 sup_start = 2;
1549 sup_len = find_emph_char(data + 2, size - 2, ')') + 2;
1550
1551 if (sup_len == size)
1552 return 0;
1553 } else {
1554 sup_start = sup_len = 1;
1555
1556 while (sup_len < size && !_isspace(data[sup_len]))
1557 sup_len++;
1558 }
1559
1560 if (sup_len - sup_start == 0)
1561 return (sup_start == 2) ? 3 : 0;
1562
1563 sup = newbuf(doc, BUFFER_SPAN);
1564 parse_inline(sup, doc, data + sup_start, sup_len - sup_start);
1565 doc->md.superscript(ob, sup, &doc->data);
1566 popbuf(doc, BUFFER_SPAN);
1567
1568 return (sup_start == 2) ? sup_len + 1 : sup_len;
1569 }
1570
1571 static size_t
char_math(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t offset,size_t size)1572 char_math(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1573 {
1574 /* double dollar */
1575 if (size > 1 && data[1] == '$')
1576 return parse_math(ob, doc, data, offset, size, "$$", 2, 1);
1577
1578 /* single dollar allowed only with MATH_EXPLICIT flag */
1579 if (doc->ext_flags & HOEDOWN_EXT_MATH_EXPLICIT)
1580 return parse_math(ob, doc, data, offset, size, "$", 1, 0);
1581
1582 return 0;
1583 }
1584
1585 /*********************************
1586 * BLOCK-LEVEL PARSING FUNCTIONS *
1587 *********************************/
1588
1589 /* is_empty • returns the line length when it is empty, 0 otherwise */
1590 static size_t
is_empty(const uint8_t * data,size_t size)1591 is_empty(const uint8_t *data, size_t size)
1592 {
1593 size_t i;
1594
1595 for (i = 0; i < size && data[i] != '\n'; i++)
1596 if (data[i] != ' ')
1597 return 0;
1598
1599 return i + 1;
1600 }
1601
1602 /* is_hrule • returns whether a line is a horizontal rule */
1603 static int
is_hrule(uint8_t * data,size_t size)1604 is_hrule(uint8_t *data, size_t size)
1605 {
1606 size_t i = 0, n = 0;
1607 uint8_t c;
1608
1609 /* skipping initial spaces */
1610 if (size < 3) return 0;
1611 if (data[0] == ' ') { i++;
1612 if (data[1] == ' ') { i++;
1613 if (data[2] == ' ') { i++; } } }
1614
1615 /* looking at the hrule uint8_t */
1616 if (i + 2 >= size
1617 || (data[i] != '*' && data[i] != '-' && data[i] != '_'))
1618 return 0;
1619 c = data[i];
1620
1621 /* the whole line must be the char or space */
1622 while (i < size && data[i] != '\n') {
1623 if (data[i] == c) n++;
1624 else if (data[i] != ' ')
1625 return 0;
1626
1627 i++;
1628 }
1629
1630 return n >= 3;
1631 }
1632
1633 /* check if a line is a code fence; return the
1634 * end of the code fence. if passed, width of
1635 * the fence rule and character will be returned */
1636 static size_t
is_codefence(uint8_t * data,size_t size,size_t * width,uint8_t * chr)1637 is_codefence(uint8_t *data, size_t size, size_t *width, uint8_t *chr)
1638 {
1639 size_t i = 0, n = 1;
1640 uint8_t c;
1641
1642 /* skipping initial spaces */
1643 if (size < 3)
1644 return 0;
1645
1646 if (data[0] == ' ') { i++;
1647 if (data[1] == ' ') { i++;
1648 if (data[2] == ' ') { i++; } } }
1649
1650 /* looking at the hrule uint8_t */
1651 c = data[i];
1652 if (i + 2 >= size || !(c=='~' || c=='`'))
1653 return 0;
1654
1655 /* the fence must be that same character */
1656 while (++i < size && data[i] == c)
1657 ++n;
1658
1659 if (n < 3)
1660 return 0;
1661
1662 if (width) *width = n;
1663 if (chr) *chr = c;
1664 return i;
1665 }
1666
1667 /* expects single line, checks if it's a codefence and extracts language */
1668 static size_t
parse_codefence(uint8_t * data,size_t size,hoedown_buffer * lang,size_t * width,uint8_t * chr)1669 parse_codefence(uint8_t *data, size_t size, hoedown_buffer *lang, size_t *width, uint8_t *chr)
1670 {
1671 size_t i, w, lang_start;
1672
1673 i = w = is_codefence(data, size, width, chr);
1674 if (i == 0)
1675 return 0;
1676
1677 while (i < size && _isspace(data[i]))
1678 i++;
1679
1680 lang_start = i;
1681
1682 while (i < size && !_isspace(data[i]))
1683 i++;
1684
1685 lang->data = data + lang_start;
1686 lang->size = i - lang_start;
1687
1688 /* Avoid parsing a codespan as a fence */
1689 i = lang_start + 2;
1690 while (i < size && !(data[i] == *chr && data[i-1] == *chr && data[i-2] == *chr)) i++;
1691 if (i < size) return 0;
1692
1693 return w;
1694 }
1695
1696 /* is_atxheader • returns whether the line is a hash-prefixed header */
1697 static int
is_atxheader(hoedown_document * doc,uint8_t * data,size_t size)1698 is_atxheader(hoedown_document *doc, uint8_t *data, size_t size)
1699 {
1700 if (data[0] != '#')
1701 return 0;
1702
1703 if (doc->ext_flags & HOEDOWN_EXT_SPACE_HEADERS) {
1704 size_t level = 0;
1705
1706 while (level < size && level < 6 && data[level] == '#')
1707 level++;
1708
1709 if (level < size && data[level] != ' ')
1710 return 0;
1711 }
1712
1713 return 1;
1714 }
1715
1716 /* is_headerline • returns whether the line is a setext-style hdr underline */
1717 static int
is_headerline(uint8_t * data,size_t size)1718 is_headerline(uint8_t *data, size_t size)
1719 {
1720 size_t i = 0;
1721
1722 /* test of level 1 header */
1723 if (data[i] == '=') {
1724 for (i = 1; i < size && data[i] == '='; i++);
1725 while (i < size && data[i] == ' ') i++;
1726 return (i >= size || data[i] == '\n') ? 1 : 0; }
1727
1728 /* test of level 2 header */
1729 if (data[i] == '-') {
1730 for (i = 1; i < size && data[i] == '-'; i++);
1731 while (i < size && data[i] == ' ') i++;
1732 return (i >= size || data[i] == '\n') ? 2 : 0; }
1733
1734 return 0;
1735 }
1736
1737 static int
is_next_headerline(uint8_t * data,size_t size)1738 is_next_headerline(uint8_t *data, size_t size)
1739 {
1740 size_t i = 0;
1741
1742 while (i < size && data[i] != '\n')
1743 i++;
1744
1745 if (++i >= size)
1746 return 0;
1747
1748 return is_headerline(data + i, size - i);
1749 }
1750
1751 /* prefix_quote • returns blockquote prefix length */
1752 static size_t
prefix_quote(uint8_t * data,size_t size)1753 prefix_quote(uint8_t *data, size_t size)
1754 {
1755 size_t i = 0;
1756 if (i < size && data[i] == ' ') i++;
1757 if (i < size && data[i] == ' ') i++;
1758 if (i < size && data[i] == ' ') i++;
1759
1760 if (i < size && data[i] == '>') {
1761 if (i + 1 < size && data[i + 1] == ' ')
1762 return i + 2;
1763
1764 return i + 1;
1765 }
1766
1767 return 0;
1768 }
1769
1770 /* prefix_code • returns prefix length for block code*/
1771 static size_t
prefix_code(uint8_t * data,size_t size)1772 prefix_code(uint8_t *data, size_t size)
1773 {
1774 if (size > 3 && data[0] == ' ' && data[1] == ' '
1775 && data[2] == ' ' && data[3] == ' ') return 4;
1776
1777 return 0;
1778 }
1779
1780 /* prefix_oli • returns ordered list item prefix */
1781 static size_t
prefix_oli(uint8_t * data,size_t size)1782 prefix_oli(uint8_t *data, size_t size)
1783 {
1784 size_t i = 0;
1785
1786 if (i < size && data[i] == ' ') i++;
1787 if (i < size && data[i] == ' ') i++;
1788 if (i < size && data[i] == ' ') i++;
1789
1790 if (i >= size || data[i] < '0' || data[i] > '9')
1791 return 0;
1792
1793 while (i < size && data[i] >= '0' && data[i] <= '9')
1794 i++;
1795
1796 if (i + 1 >= size || data[i] != '.' || data[i + 1] != ' ')
1797 return 0;
1798
1799 if (is_next_headerline(data + i, size - i))
1800 return 0;
1801
1802 return i + 2;
1803 }
1804
1805 /* prefix_checkbox_open returns open checkbox prefix*/
1806 static size_t
prefix_checkbox(uint8_t * data,size_t size)1807 prefix_checkbox(uint8_t *data, size_t size)
1808 {
1809 size_t i = 0;
1810 if (i < size && data[i] == ' ') i++;
1811 if (i < size && data[i] == ' ') i++;
1812 if (i < size && data[i] == ' ') i++;
1813
1814 if (i + 3 >= size ||
1815 (data[i] != '-') ||
1816 data[i + 1] != ' ' || data[i+2] != '[' || data[i+3] != ' ' || data[i+4] != ']' || data[i+5] != ' ')
1817 return 0;
1818
1819 if (is_next_headerline(data + i, size - i))
1820 return 0;
1821 return i + 6;
1822 }
1823
1824 /* prefix_checkbox_open returns checked checkbox prefix*/
1825 static size_t
prefix_checkbox_checked(uint8_t * data,size_t size)1826 prefix_checkbox_checked(uint8_t *data, size_t size)
1827 {
1828 size_t i = 0;
1829 if (i < size && data[i] == ' ') i++;
1830 if (i < size && data[i] == ' ') i++;
1831 if (i < size && data[i] == ' ') i++;
1832
1833 if (i + 3 >= size ||
1834 (data[i] != '-') ||
1835 data[i + 1] != ' ' || data[i+2] != '[' || data[i+3] != 'x' || data[i+4] != ']' || data[i+5] != ' ')
1836 return 0;
1837
1838 if (is_next_headerline(data + i, size - i))
1839 return 0;
1840 return i + 6;
1841 }
1842
1843 /* prefix_uli • returns unordered list item prefix */
1844 static size_t
prefix_uli(uint8_t * data,size_t size)1845 prefix_uli(uint8_t *data, size_t size)
1846 {
1847 size_t i = 0;
1848
1849 if (i < size && data[i] == ' ') i++;
1850 if (i < size && data[i] == ' ') i++;
1851 if (i < size && data[i] == ' ') i++;
1852
1853 if (i + 1 >= size ||
1854 (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
1855 data[i + 1] != ' ')
1856 return 0;
1857
1858 if (is_next_headerline(data + i, size - i))
1859 return 0;
1860
1861 return i + 2;
1862 }
1863
1864 static size_t
prefix_float(uint8_t * data,size_t size)1865 prefix_float(uint8_t * data, size_t size)
1866 {
1867 char * txt = (char*) data;
1868 return (startsWith("@figure", txt) || startsWith("@table",txt) ||
1869 startsWith("@code", txt) || startsWith("@listing",txt) ||
1870 startsWith("@abstract", txt) || startsWith("@equation", txt) ||
1871 startsWith("@toc", txt));
1872 }
1873
1874 /* parse_block • parsing of one block, returning next uint8_t to parse */
1875 static void parse_block(hoedown_buffer *ob, hoedown_document *doc,
1876 uint8_t *data, size_t size, int position);
1877
1878
1879 /* parse_blockquote • handles parsing of a blockquote fragment */
1880 static size_t
parse_blockquote(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t size)1881 parse_blockquote(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
1882 {
1883 size_t beg, end = 0, pre, work_size = 0;
1884 uint8_t *work_data = 0;
1885 hoedown_buffer *out = 0;
1886
1887 out = newbuf(doc, BUFFER_BLOCK);
1888 beg = 0;
1889 while (beg < size) {
1890 for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
1891
1892 pre = prefix_quote(data + beg, end - beg);
1893
1894 if (pre)
1895 beg += pre; /* skipping prefix */
1896
1897 /* empty line followed by non-quote line */
1898 else if (is_empty(data + beg, end - beg) &&
1899 (end >= size || (prefix_quote(data + end, size - end) == 0 &&
1900 !is_empty(data + end, size - end))))
1901 break;
1902
1903 if (beg < end) { /* copy into the in-place working buffer */
1904 /* hoedown_buffer_put(work, data + beg, end - beg); */
1905 if (!work_data)
1906 work_data = data + beg;
1907 else if (data + beg != work_data + work_size)
1908 memmove(work_data + work_size, data + beg, end - beg);
1909 work_size += end - beg;
1910 }
1911 beg = end;
1912 }
1913
1914 parse_block(out, doc, work_data, work_size, -1);
1915 if (doc->md.blockquote)
1916 doc->md.blockquote(ob, out, &doc->data);
1917 popbuf(doc, BUFFER_BLOCK);
1918 return end;
1919 }
1920
1921 static size_t
1922 parse_htmlblock(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, int do_render);
1923
1924 /* parse_blockquote • handles parsing of a regular paragraph */
1925 static size_t
parse_paragraph(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t size)1926 parse_paragraph(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
1927 {
1928 hoedown_buffer work = { NULL, 0, 0, 0, NULL, NULL, NULL };
1929 size_t i = 0, end = 0;
1930 int level = 0;
1931
1932 work.data = data;
1933
1934 while (i < size) {
1935 for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */;
1936
1937 if (is_empty(data + i, size - i))
1938 break;
1939
1940 if ((level = is_headerline(data + i, size - i)) != 0)
1941 break;
1942
1943 if (is_atxheader(doc, data + i, size - i) ||
1944 is_hrule(data + i, size - i) ||
1945 prefix_quote(data + i, size - i)) {
1946 end = i;
1947 break;
1948 }
1949
1950 i = end;
1951 }
1952
1953 work.size = i;
1954 while (work.size && data[work.size - 1] == '\n')
1955 work.size--;
1956
1957 if (!level) {
1958 hoedown_buffer *tmp = newbuf(doc, BUFFER_BLOCK);
1959 parse_inline(tmp, doc, work.data, work.size);
1960 if (doc->md.paragraph)
1961 doc->md.paragraph(ob, tmp, &doc->data);
1962 popbuf(doc, BUFFER_BLOCK);
1963 } else {
1964 hoedown_buffer *header_work;
1965
1966 if (work.size) {
1967 size_t beg;
1968 i = work.size;
1969 work.size -= 1;
1970
1971 while (work.size && data[work.size] != '\n')
1972 work.size -= 1;
1973
1974 beg = work.size + 1;
1975 while (work.size && data[work.size - 1] == '\n')
1976 work.size -= 1;
1977
1978 if (work.size > 0) {
1979 hoedown_buffer *tmp = newbuf(doc, BUFFER_BLOCK);
1980 parse_inline(tmp, doc, work.data, work.size);
1981
1982 if (doc->md.paragraph)
1983 doc->md.paragraph(ob, tmp, &doc->data);
1984
1985 popbuf(doc, BUFFER_BLOCK);
1986 work.data += beg;
1987 work.size = i - beg;
1988 }
1989 else work.size = i;
1990 }
1991
1992 header_work = newbuf(doc, BUFFER_SPAN);
1993 parse_inline(header_work, doc, work.data, work.size);
1994 if (level == 1)
1995 {
1996 doc->counter.chapter++;
1997 doc->counter.section = 0;
1998 doc->counter.subsection = 0;
1999 } else if (level == 2) {
2000 doc->counter.section++;
2001 doc->counter.subsection=0;
2002 } else if (level == 3) {
2003 doc->counter.subsection++;
2004 }
2005
2006 if (doc->md.header){
2007
2008 doc->md.header(ob, header_work, (int)level, &doc->data, doc->counter, doc->document_metadata->numbering);
2009 }
2010 popbuf(doc, BUFFER_SPAN);
2011 }
2012
2013 return end;
2014 }
2015
2016 /* parse_fencedcode • handles parsing of a block-level code fragment */
2017 static size_t
parse_fencedcode(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t size)2018 parse_fencedcode(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
2019 {
2020 hoedown_buffer text = { 0, 0, 0, 0, NULL, NULL, NULL };
2021 hoedown_buffer lang = { 0, 0, 0, 0, NULL, NULL, NULL };
2022 size_t i = 0, text_start, line_start;
2023 size_t w, w2;
2024 size_t width, width2;
2025 uint8_t chr, chr2;
2026
2027 /* parse codefence line */
2028 while (i < size && data[i] != '\n')
2029 i++;
2030
2031 w = parse_codefence(data, i, &lang, &width, &chr);
2032 if (!w)
2033 return 0;
2034
2035 /* search for end */
2036 i++;
2037 text_start = i;
2038 while ((line_start = i) < size) {
2039 while (i < size && data[i] != '\n')
2040 i++;
2041
2042 w2 = is_codefence(data + line_start, i - line_start, &width2, &chr2);
2043 if (w == w2 && width == width2 && chr == chr2 &&
2044 is_empty(data + (line_start+w), i - (line_start+w)))
2045 break;
2046
2047 i++;
2048 }
2049
2050 text.data = data + text_start;
2051 text.size = line_start - text_start;
2052
2053 if (doc->md.blockcode)
2054 doc->md.blockcode(ob, text.size ? &text : NULL, lang.size ? &lang : NULL, &doc->data);
2055
2056 return i;
2057 }
2058
2059 static size_t
parse_blockcode(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t size)2060 parse_blockcode(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
2061 {
2062 size_t beg, end, pre;
2063 hoedown_buffer *work = 0;
2064
2065 work = newbuf(doc, BUFFER_BLOCK);
2066
2067 beg = 0;
2068 while (beg < size) {
2069 for (end = beg + 1; end < size && data[end - 1] != '\n'; end++) {};
2070 pre = prefix_code(data + beg, end - beg);
2071
2072 if (pre)
2073 beg += pre; /* skipping prefix */
2074 else if (!is_empty(data + beg, end - beg))
2075 /* non-empty non-prefixed line breaks the pre */
2076 break;
2077
2078 if (beg < end) {
2079 /* verbatim copy to the working buffer,
2080 escaping entities */
2081 if (is_empty(data + beg, end - beg))
2082 hoedown_buffer_putc(work, '\n');
2083 else hoedown_buffer_put(work, data + beg, end - beg);
2084 }
2085 beg = end;
2086 }
2087
2088 while (work->size && work->data[work->size - 1] == '\n')
2089 work->size -= 1;
2090
2091 hoedown_buffer_putc(work, '\n');
2092
2093 if (doc->md.blockcode)
2094 doc->md.blockcode(ob, work, NULL, &doc->data);
2095
2096 popbuf(doc, BUFFER_BLOCK);
2097 return beg;
2098 }
2099
2100
2101
2102 /* parse_listitem • parsing of a single list item */
2103 /* assuming initial prefix is already removed */
2104 static size_t
parse_listitem(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t size,hoedown_list_flags * flags)2105 parse_listitem(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, hoedown_list_flags *flags)
2106 {
2107 hoedown_buffer *work = 0, *inter = 0;
2108 size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i;
2109 int in_empty = 0, has_inside_empty = 0, in_fence = 0;
2110
2111 /* keeping track of the first indentation prefix */
2112 while (orgpre < 3 && orgpre < size && data[orgpre] == ' ')
2113 orgpre++;
2114
2115 beg = prefix_checkbox(data, size);
2116 if (!beg)
2117 beg=prefix_checkbox_checked(data,size);
2118 if (!beg)
2119 beg = prefix_uli(data, size);
2120 if (!beg)
2121 beg = prefix_oli(data, size);
2122
2123 if (!beg)
2124 return 0;
2125
2126 /* skipping to the beginning of the following line */
2127 end = beg;
2128 while (end < size && data[end - 1] != '\n')
2129 end++;
2130
2131 /* getting working buffers */
2132 work = newbuf(doc, BUFFER_SPAN);
2133 inter = newbuf(doc, BUFFER_SPAN);
2134
2135 /* putting the first line into the working buffer */
2136 hoedown_buffer_put(work, data + beg, end - beg);
2137 beg = end;
2138
2139 /* process the following lines */
2140 while (beg < size) {
2141 size_t has_next_uli = 0, has_next_oli = 0;
2142
2143 end++;
2144
2145 while (end < size && data[end - 1] != '\n')
2146 end++;
2147
2148 /* process an empty line */
2149 if (is_empty(data + beg, end - beg)) {
2150 in_empty = 1;
2151 beg = end;
2152 continue;
2153 }
2154
2155 /* calculating the indentation */
2156 i = 0;
2157 while (i < 4 && beg + i < end && data[beg + i] == ' ')
2158 i++;
2159
2160 pre = i;
2161
2162 if (doc->ext_flags & HOEDOWN_EXT_FENCED_CODE) {
2163 if (is_codefence(data + beg + i, end - beg - i, NULL, NULL))
2164 in_fence = !in_fence;
2165 }
2166
2167 /* Only check for new list items if we are **not** inside
2168 * a fenced code block */
2169 if (!in_fence) {
2170 has_next_uli = prefix_uli(data + beg + i, end - beg - i);
2171 has_next_oli = prefix_oli(data + beg + i, end - beg - i);
2172 }
2173
2174 /* checking for a new item */
2175 if ((has_next_uli && !is_hrule(data + beg + i, end - beg - i)) || has_next_oli) {
2176 if (in_empty)
2177 has_inside_empty = 1;
2178
2179 /* the following item must have the same (or less) indentation */
2180 if (pre <= orgpre) {
2181 /* if the following item has different list type, we end this list */
2182 if (in_empty && (
2183 ((*flags & HOEDOWN_LIST_ORDERED) && has_next_uli) ||
2184 (!(*flags & HOEDOWN_LIST_ORDERED) && has_next_oli)))
2185 *flags |= HOEDOWN_LI_END;
2186
2187 break;
2188 }
2189
2190 if (!sublist)
2191 sublist = work->size;
2192 }
2193 /* joining only indented stuff after empty lines;
2194 * note that now we only require 1 space of indentation
2195 * to continue a list */
2196 else if (in_empty && pre == 0) {
2197 *flags |= HOEDOWN_LI_END;
2198 break;
2199 }
2200
2201 if (in_empty) {
2202 hoedown_buffer_putc(work, '\n');
2203 has_inside_empty = 1;
2204 in_empty = 0;
2205 }
2206
2207 /* adding the line without prefix into the working buffer */
2208 hoedown_buffer_put(work, data + beg + i, end - beg - i);
2209 beg = end;
2210 }
2211
2212 /* render of li contents */
2213 if (has_inside_empty)
2214 *flags |= HOEDOWN_LI_BLOCK;
2215
2216 if (*flags & HOEDOWN_LI_BLOCK) {
2217 /* intermediate render of block li */
2218 if (sublist && sublist < work->size) {
2219 parse_block(inter, doc, work->data, sublist, -1);
2220 parse_block(inter, doc, work->data + sublist, work->size - sublist, -1);
2221 }
2222 else
2223 parse_block(inter, doc, work->data, work->size, -1);
2224 } else {
2225 /* intermediate render of inline li */
2226 if (sublist && sublist < work->size) {
2227 parse_inline(inter, doc, work->data, sublist);
2228 parse_block(inter, doc, work->data + sublist, work->size - sublist, -1);
2229 }
2230 else
2231 parse_inline(inter, doc, work->data, work->size);
2232 }
2233
2234 /* render of li itself */
2235 if (doc->md.listitem)
2236 doc->md.listitem(ob, inter, *flags, &doc->data);
2237
2238 popbuf(doc, BUFFER_SPAN);
2239 popbuf(doc, BUFFER_SPAN);
2240 return beg;
2241 }
2242
2243
2244 /* parse_list • parsing ordered or unordered list block */
2245 static size_t
parse_list(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t size,hoedown_list_flags flags)2246 parse_list(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, hoedown_list_flags flags)
2247 {
2248 hoedown_buffer *work = 0;
2249 size_t i = 0, j;
2250
2251 work = newbuf(doc, BUFFER_BLOCK);
2252
2253 while (i < size) {
2254 j = parse_listitem(work, doc, data + i, size - i, &flags);
2255 i += j;
2256
2257 if (!j || (flags & HOEDOWN_LI_END))
2258 break;
2259 }
2260
2261 if (doc->md.list)
2262 doc->md.list(ob, work, flags, &doc->data);
2263 popbuf(doc, BUFFER_BLOCK);
2264 return i;
2265 }
2266
2267 uint8_t *
get_atxheader_info(uint8_t * data,size_t size,size_t * level,size_t * skip)2268 get_atxheader_info(uint8_t *data, size_t size, size_t * level, size_t * skip)
2269 {
2270 *level = 0;
2271 size_t i, end;
2272
2273 while (*level < size && *level < 6 && data[*level] == '#'){
2274 (*level)++;
2275 }
2276
2277 for (i = *level; i < size && data[i] == ' '; i++);
2278 for (end = i; end < size && data[end] != '\n'; end++);
2279 if (skip)
2280 *skip = end;
2281
2282 while (end && data[end - 1] == '#')
2283 end--;
2284
2285 while (end && data[end - 1] == ' ')
2286 end--;
2287
2288 if (end <= i)
2289 return NULL;
2290
2291 uint8_t * title = malloc(sizeof(uint8_t)*(end - i + 1));
2292 title[end-i] = 0;
2293 memcpy(title, data+i, end-i);
2294 return title;
2295 }
2296
2297 /* parse_atxheader • parsing of atx-style headers */
2298 static size_t
parse_atxheader(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t size)2299 parse_atxheader(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
2300 {
2301 size_t level = 0;
2302 size_t skip = 0;
2303
2304 uint8_t * title = get_atxheader_info(data, size, &level, &skip);
2305
2306 if (level == 1)
2307 {
2308 doc->counter.chapter ++ ;
2309 doc->counter.section = 0;
2310 doc->counter.subsection = 0;
2311 } else if (level == 2)
2312 {
2313 doc->counter.section ++;
2314 doc->counter.subsection = 0;
2315 } else if (level == 3)
2316 {
2317 doc->counter.subsection ++;
2318 }
2319
2320 if (title) {
2321 hoedown_buffer *work = newbuf(doc, BUFFER_SPAN);
2322
2323 parse_inline(work, doc, title, strlen((char*)title));
2324
2325 if (doc->md.header)
2326 {
2327 doc->md.header(ob, work, (int)level, &doc->data, doc->counter, doc->document_metadata->numbering);
2328 }
2329 popbuf(doc, BUFFER_SPAN);
2330 }
2331
2332 return skip;
2333 }
2334
2335 /* parse_footnote_def • parse a single footnote definition */
2336 static void
parse_footnote_def(hoedown_buffer * ob,hoedown_document * doc,unsigned int num,uint8_t * data,size_t size)2337 parse_footnote_def(hoedown_buffer *ob, hoedown_document *doc, unsigned int num, uint8_t *data, size_t size)
2338 {
2339 hoedown_buffer *work = 0;
2340 work = newbuf(doc, BUFFER_SPAN);
2341
2342 parse_block(work, doc, data, size, -1);
2343
2344 if (doc->md.footnote_def)
2345 doc->md.footnote_def(ob, work, num, &doc->data);
2346 popbuf(doc, BUFFER_SPAN);
2347 }
2348
2349 /* parse_footnote_list • render the contents of the footnotes */
2350 static void
parse_footnote_list(hoedown_buffer * ob,hoedown_document * doc,struct footnote_list * footnotes)2351 parse_footnote_list(hoedown_buffer *ob, hoedown_document *doc, struct footnote_list *footnotes)
2352 {
2353 hoedown_buffer *work = 0;
2354 struct footnote_item *item;
2355 struct footnote_ref *ref;
2356
2357 if (footnotes->count == 0)
2358 return;
2359
2360 work = newbuf(doc, BUFFER_BLOCK);
2361
2362 item = footnotes->head;
2363 while (item) {
2364 ref = item->ref;
2365 parse_footnote_def(work, doc, ref->num, ref->contents->data, ref->contents->size);
2366 item = item->next;
2367 }
2368
2369 if (doc->md.footnotes)
2370 doc->md.footnotes(ob, work, &doc->data);
2371 popbuf(doc, BUFFER_BLOCK);
2372 }
2373
2374 /* htmlblock_is_end • check for end of HTML block : </tag>( *)\n */
2375 /* returns tag length on match, 0 otherwise */
2376 /* assumes data starts with "<" */
2377 static size_t
htmlblock_is_end(const char * tag,size_t tag_len,hoedown_document * doc,uint8_t * data,size_t size)2378 htmlblock_is_end(
2379 const char *tag,
2380 size_t tag_len,
2381 hoedown_document *doc,
2382 uint8_t *data,
2383 size_t size)
2384 {
2385 size_t i = tag_len + 3, w;
2386
2387 /* try to match the end tag */
2388 /* note: we're not considering tags like "</tag >" which are still valid */
2389 if (i > size ||
2390 data[1] != '/' ||
2391 strncasecmp((char *)data + 2, tag, tag_len) != 0 ||
2392 data[tag_len + 2] != '>')
2393 return 0;
2394
2395 /* rest of the line must be empty */
2396 if ((w = is_empty(data + i, size - i)) == 0 && i < size)
2397 return 0;
2398
2399 return i + w;
2400 }
2401
2402 /* htmlblock_find_end • try to find HTML block ending tag */
2403 /* returns the length on match, 0 otherwise */
2404 static size_t
htmlblock_find_end(const char * tag,size_t tag_len,hoedown_document * doc,uint8_t * data,size_t size)2405 htmlblock_find_end(
2406 const char *tag,
2407 size_t tag_len,
2408 hoedown_document *doc,
2409 uint8_t *data,
2410 size_t size)
2411 {
2412 size_t i = 0, w;
2413
2414 while (1) {
2415 while (i < size && data[i] != '<') i++;
2416 if (i >= size) return 0;
2417
2418 w = htmlblock_is_end(tag, tag_len, doc, data + i, size - i);
2419 if (w) return i + w;
2420 i++;
2421 }
2422 }
2423
2424 /* htmlblock_find_end_strict • try to find end of HTML block in strict mode */
2425 /* (it must be an unindented line, and have a blank line afterwads) */
2426 /* returns the length on match, 0 otherwise */
2427 static size_t
htmlblock_find_end_strict(const char * tag,size_t tag_len,hoedown_document * doc,uint8_t * data,size_t size)2428 htmlblock_find_end_strict(
2429 const char *tag,
2430 size_t tag_len,
2431 hoedown_document *doc,
2432 uint8_t *data,
2433 size_t size)
2434 {
2435 size_t i = 0, mark;
2436
2437 while (1) {
2438 mark = i;
2439 while (i < size && data[i] != '\n') i++;
2440 if (i < size) i++;
2441 if (i == mark) return 0;
2442
2443 if (data[mark] == ' ' && mark > 0) continue;
2444 mark += htmlblock_find_end(tag, tag_len, doc, data + mark, i - mark);
2445 if (mark == i && (is_empty(data + i, size - i) || i >= size)) break;
2446 }
2447
2448 return i;
2449 }
2450
2451 /* parse_htmlblock • parsing of inline HTML block */
2452 static size_t
parse_htmlblock(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t size,int do_render)2453 parse_htmlblock(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, int do_render)
2454 {
2455 hoedown_buffer work = { NULL, 0, 0, 0, NULL, NULL, NULL };
2456 size_t i, j = 0, tag_len, tag_end;
2457 const char *curtag = NULL;
2458
2459 work.data = data;
2460
2461 /* identification of the opening tag */
2462 if (size < 2 || data[0] != '<')
2463 return 0;
2464
2465 i = 1;
2466 while (i < size && data[i] != '>' && data[i] != ' ')
2467 i++;
2468
2469 if (i < size)
2470 curtag = hoedown_find_block_tag((char *)data + 1, (int)i - 1);
2471
2472 /* handling of special cases */
2473 if (!curtag) {
2474
2475 /* HTML comment, laxist form */
2476 if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') {
2477 i = 5;
2478
2479 while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>'))
2480 i++;
2481
2482 i++;
2483
2484 if (i < size)
2485 j = is_empty(data + i, size - i);
2486
2487 if (j) {
2488 work.size = i + j;
2489 if (do_render && doc->md.blockhtml)
2490 doc->md.blockhtml(ob, &work, &doc->data);
2491 return work.size;
2492 }
2493 }
2494
2495 /* HR, which is the only self-closing block tag considered */
2496 if (size > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R')) {
2497 i = 3;
2498 while (i < size && data[i] != '>')
2499 i++;
2500
2501 if (i + 1 < size) {
2502 i++;
2503 j = is_empty(data + i, size - i);
2504 if (j) {
2505 work.size = i + j;
2506 if (do_render && doc->md.blockhtml)
2507 doc->md.blockhtml(ob, &work, &doc->data);
2508 return work.size;
2509 }
2510 }
2511 }
2512
2513 /* no special case recognised */
2514 return 0;
2515 }
2516
2517 /* looking for a matching closing tag in strict mode */
2518 tag_len = strlen(curtag);
2519 tag_end = htmlblock_find_end_strict(curtag, tag_len, doc, data, size);
2520
2521 /* if not found, trying a second pass looking for indented match */
2522 /* but not if tag is "ins" or "del" (following original Markdown.pl) */
2523 if (!tag_end && strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0)
2524 tag_end = htmlblock_find_end(curtag, tag_len, doc, data, size);
2525
2526 if (!tag_end)
2527 return 0;
2528
2529 /* the end of the block has been found */
2530 work.size = tag_end;
2531 if (do_render && doc->md.blockhtml)
2532 doc->md.blockhtml(ob, &work, &doc->data);
2533
2534 return tag_end;
2535 }
2536
2537 static void
parse_table_row(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t size,size_t columns,hoedown_table_flags * col_data,hoedown_table_flags header_flag)2538 parse_table_row(
2539 hoedown_buffer *ob,
2540 hoedown_document *doc,
2541 uint8_t *data,
2542 size_t size,
2543 size_t columns,
2544 hoedown_table_flags *col_data,
2545 hoedown_table_flags header_flag)
2546 {
2547 size_t i = 0, col, len;
2548 hoedown_buffer *row_work = 0;
2549
2550 if (!doc->md.table_cell || !doc->md.table_row)
2551 return;
2552
2553 row_work = newbuf(doc, BUFFER_SPAN);
2554
2555 if (i < size && data[i] == '|')
2556 i++;
2557
2558 for (col = 0; col < columns && i < size; ++col) {
2559 size_t cell_start, cell_end;
2560 hoedown_buffer *cell_work;
2561
2562 cell_work = newbuf(doc, BUFFER_SPAN);
2563
2564 while (i < size && _isspace(data[i]))
2565 i++;
2566
2567 cell_start = i;
2568
2569 len = find_emph_char(data + i, size - i, '|');
2570
2571 /* Two possibilities for len == 0:
2572 1) No more pipe char found in the current line.
2573 2) The next pipe is right after the current one, i.e. empty cell.
2574 For case 1, we skip to the end of line; for case 2 we just continue.
2575 */
2576 if (len == 0 && i < size && data[i] != '|')
2577 len = size - i;
2578 i += len;
2579
2580 cell_end = i - 1;
2581
2582 while (cell_end > cell_start && _isspace(data[cell_end]))
2583 cell_end--;
2584
2585 parse_inline(cell_work, doc, data + cell_start, 1 + cell_end - cell_start);
2586 doc->md.table_cell(row_work, cell_work, col_data[col] | header_flag, &doc->data);
2587
2588 popbuf(doc, BUFFER_SPAN);
2589 i++;
2590 }
2591
2592 for (; col < columns; ++col) {
2593 hoedown_buffer empty_cell = { 0, 0, 0, 0, NULL, NULL, NULL };
2594 doc->md.table_cell(row_work, &empty_cell, col_data[col] | header_flag, &doc->data);
2595 }
2596
2597 doc->md.table_row(ob, row_work, &doc->data);
2598
2599 popbuf(doc, BUFFER_SPAN);
2600 }
2601
2602 static size_t
parse_table_header(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t size,size_t * columns,hoedown_table_flags ** column_data)2603 parse_table_header(
2604 hoedown_buffer *ob,
2605 hoedown_document *doc,
2606 uint8_t *data,
2607 size_t size,
2608 size_t *columns,
2609 hoedown_table_flags **column_data)
2610 {
2611 int pipes;
2612 size_t i = 0, col, header_end, under_end;
2613
2614 pipes = 0;
2615 while (i < size && data[i] != '\n')
2616 if (data[i++] == '|')
2617 pipes++;
2618
2619 if (i == size || pipes == 0)
2620 return 0;
2621
2622 header_end = i;
2623
2624 while (header_end > 0 && _isspace(data[header_end - 1]))
2625 header_end--;
2626
2627 if (data[0] == '|')
2628 pipes--;
2629
2630 if (header_end && data[header_end - 1] == '|')
2631 pipes--;
2632
2633 if (pipes < 0)
2634 return 0;
2635
2636 *columns = pipes + 1;
2637 *column_data = hoedown_calloc(*columns, sizeof(hoedown_table_flags));
2638
2639 /* Parse the header underline */
2640 i++;
2641 if (i < size && data[i] == '|')
2642 i++;
2643
2644 under_end = i;
2645 while (under_end < size && data[under_end] != '\n')
2646 under_end++;
2647
2648 for (col = 0; col < *columns && i < under_end; ++col) {
2649 size_t dashes = 0;
2650
2651 while (i < under_end && data[i] == ' ')
2652 i++;
2653
2654 if (data[i] == ':') {
2655 i++; (*column_data)[col] |= HOEDOWN_TABLE_ALIGN_LEFT;
2656 dashes++;
2657 }
2658
2659 while (i < under_end && data[i] == '-') {
2660 i++; dashes++;
2661 }
2662
2663 if (i < under_end && data[i] == ':') {
2664 i++; (*column_data)[col] |= HOEDOWN_TABLE_ALIGN_RIGHT;
2665 dashes++;
2666 }
2667
2668 while (i < under_end && data[i] == ' ')
2669 i++;
2670
2671 if (i < under_end && data[i] != '|' && data[i] != '+')
2672 break;
2673
2674 if (dashes < 3)
2675 break;
2676
2677 i++;
2678 }
2679
2680 if (col < *columns)
2681 return 0;
2682
2683 parse_table_row(
2684 ob, doc, data,
2685 header_end,
2686 *columns,
2687 *column_data,
2688 HOEDOWN_TABLE_HEADER
2689 );
2690
2691 return under_end + 1;
2692 }
2693
2694 static size_t
parse_table(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t size)2695 parse_table(
2696 hoedown_buffer *ob,
2697 hoedown_document *doc,
2698 uint8_t *data,
2699 size_t size)
2700 {
2701 size_t i;
2702
2703 hoedown_buffer *work = 0;
2704 hoedown_buffer *header_work = 0;
2705 hoedown_buffer *body_work = 0;
2706
2707 size_t columns;
2708 hoedown_table_flags *col_data = NULL;
2709
2710 work = newbuf(doc, BUFFER_BLOCK);
2711 header_work = newbuf(doc, BUFFER_SPAN);
2712 body_work = newbuf(doc, BUFFER_BLOCK);
2713
2714 i = parse_table_header(header_work, doc, data, size, &columns, &col_data);
2715 if (i > 0) {
2716
2717 while (i < size) {
2718 size_t row_start;
2719 int pipes = 0;
2720
2721 row_start = i;
2722
2723 while (i < size && data[i] != '\n')
2724 if (data[i++] == '|')
2725 pipes++;
2726
2727 if (pipes == 0 || i == size) {
2728 i = row_start;
2729 break;
2730 }
2731
2732 parse_table_row(
2733 body_work,
2734 doc,
2735 data + row_start,
2736 i - row_start,
2737 columns,
2738 col_data, 0
2739 );
2740
2741 i++;
2742 }
2743
2744 if (doc->md.table_header)
2745 doc->md.table_header(work, header_work, &doc->data);
2746
2747 if (doc->md.table_body)
2748 doc->md.table_body(work, body_work, &doc->data);
2749
2750 if (doc->md.table)
2751 doc->md.table(ob, work, &doc->data, col_data, columns);
2752 }
2753
2754 free(col_data);
2755 popbuf(doc, BUFFER_SPAN);
2756 popbuf(doc, BUFFER_BLOCK);
2757 popbuf(doc, BUFFER_BLOCK);
2758 return i;
2759 }
2760
2761 static size_t
parse_abstract(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t size)2762 parse_abstract(
2763 hoedown_buffer *ob,
2764 hoedown_document *doc,
2765 uint8_t *data,
2766 size_t size)
2767 {
2768 size_t skip = 0;
2769 while (skip < size && !startsWith("\n@/\n", (char*)data+skip))
2770 {
2771 skip ++;
2772 }
2773
2774
2775 if (doc->md.abstract)
2776 {
2777 doc->md.abstract(ob);
2778 parse_block(ob, doc, data, skip, -1);
2779 if (doc->md.keywords && doc->document_metadata->keywords)
2780 {
2781 hoedown_buffer * b = hoedown_buffer_new(1);
2782 hoedown_buffer_puts(b, doc->document_metadata->keywords);
2783 doc->md.keywords(ob,b,NULL);
2784 hoedown_buffer_free(b);
2785
2786 }
2787 doc->md.close(ob);
2788 }
2789 if (skip < size)
2790 {
2791 skip += 4;
2792 }
2793 return skip;
2794 }
2795 uint8_t *
parse_caption(hoedown_document * doc,uint8_t * data,size_t size)2796 parse_caption(hoedown_document *doc,
2797 uint8_t *data,
2798 size_t size)
2799 {
2800 if (!data || size <= 0)
2801 return NULL;
2802 uint32_t i=0;
2803 while (i < size && data[i] !='\n'){
2804 if (data[i] == ')' && (i==0 || data[i-1] != '\\'))
2805 break;
2806 i++;
2807 }
2808 if (i) {
2809 hoedown_buffer * buf = hoedown_buffer_new(1);
2810 parse_inline(buf, doc, data, i);
2811 uint8_t * tmp = malloc(sizeof(uint8_t) * (buf->size+1));
2812 tmp[buf->size] = 0;
2813 memcpy(tmp, buf->data, buf->size);
2814 // clean escape chars
2815 tmp = (uint8_t*)clean_string((char*)tmp, buf->size);
2816 hoedown_buffer_free(buf);
2817 return tmp;
2818 }
2819 return NULL;
2820 }
2821
2822 static size_t
parse_fl(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t size,float_type type)2823 parse_fl(
2824 hoedown_buffer *ob,
2825 hoedown_document *doc,
2826 uint8_t *data,
2827 size_t size,
2828 float_type type)
2829 {
2830 size_t begin = 0;
2831 size_t skip = 0;
2832 float_args args = {};
2833 args.type = type;
2834 args.caption = NULL;
2835
2836 if (data[0] == '(')
2837 {
2838 begin ++;
2839 while (begin < size && (data[begin] !=')' && data[begin] !='\n')){
2840 begin ++;
2841 }
2842 if (begin > 2){
2843 args.id = malloc(sizeof(char)*(begin));
2844 args.id[begin-1] = 0;
2845 memcpy(args.id, data+1, begin-1);
2846 }
2847 begin++;
2848
2849 }
2850 while (skip+begin < size && !startsWith("\n@/", (char*)data+skip+begin))
2851 {
2852 if (startsWith("\n@caption(",(char*) data+skip+begin))
2853 {
2854 args.caption = (char*)parse_caption(doc, data+skip+begin+10, size-begin-skip-10);
2855 }
2856 skip ++;
2857 }
2858
2859
2860 if (doc->md.open_float)
2861 {
2862 doc->md.open_float(ob, args, &doc->data);
2863 parse_block(ob, doc, data+begin, skip, -1);
2864 doc->md.close_float(ob, args, &doc->data);
2865 }
2866 if (skip < size)
2867 {
2868 skip += 4;
2869 }
2870 return skip + begin;
2871 }
2872
2873 static size_t
parse_eq(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t size)2874 parse_eq(
2875 hoedown_buffer *ob,
2876 hoedown_document *doc,
2877 uint8_t *data,
2878 size_t size)
2879 {
2880 size_t begin = 0;
2881 size_t skip = 0;
2882 float_args args = {};
2883 args.type = EQUATION;
2884
2885 if (data[0] == '(')
2886 {
2887 begin ++;
2888 while (begin < size && (data[begin] !=')' && data[begin] !='\n')){
2889 begin ++;
2890 }
2891 args.id = malloc(sizeof(char)*(begin));
2892 args.id[begin-1] = 0;
2893 memcpy(args.id, data+1, begin-1);
2894 begin++;
2895 }
2896 while (skip+begin < size && !startsWith("\n@/", (char*)data+skip+begin))
2897 {
2898 skip ++;
2899 }
2900
2901 if (doc->md.opn_equation && skip)
2902 {
2903 doc->md.opn_equation(ob, args.id, &doc->data);
2904 hoedown_buffer * text = hoedown_buffer_new(skip);
2905 hoedown_buffer_put(text, data+begin, skip);
2906 if (doc->md.eq_math)
2907 doc->md.eq_math(ob, text, 2, &doc->data);
2908 doc->md.cls_equation(ob, &doc->data);
2909 }
2910 if (skip < size)
2911 {
2912 skip += 4;
2913 }
2914 return skip + begin;
2915 }
2916
2917
2918 static size_t
parse_float(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t size)2919 parse_float(
2920 hoedown_buffer *ob,
2921 hoedown_document *doc,
2922 uint8_t *data,
2923 size_t size)
2924 {
2925 if (startsWith("@abstract", (char*)data) && is_separator(data[9])) {
2926 return parse_abstract(ob, doc, data+9,size-9)+9;
2927 }
2928 if (startsWith("@figure", (char*)data) && is_separator(data[7])) {
2929 return parse_fl(ob, doc, data+7, size-7, FIGURE)+7;
2930 }
2931 if (startsWith("@table", (char*)data) && is_separator(data[6])) {
2932 return parse_fl(ob, doc, data+6, size-6, TABLE)+6;
2933 }
2934 if (startsWith("@listing", (char*)data) && is_separator(data[8])) {
2935 return parse_fl(ob, doc, data+8, size-8, LISTING)+8;
2936 }
2937 if (startsWith("@equation", (char*)data) && is_separator(data[9])) {
2938 return parse_eq(ob, doc, data+9, size-9) + 9;
2939 }
2940 if (startsWith("@toc", (char*)data) && is_separator(data[4]))
2941 {
2942 if (doc->md.toc && doc->table_of_contents)
2943 doc->md.toc(ob, doc->table_of_contents, doc->document_metadata->numbering);
2944 return 4;
2945 }
2946
2947 return 1;
2948 }
2949
2950 static void
parse_position(hoedown_buffer * ob,hoedown_document * doc)2951 parse_position(hoedown_buffer *ob, hoedown_document *doc){
2952 if (doc->md.position){
2953 doc->md.position(ob);
2954 }
2955 }
2956
2957 /* parse_block • parsing of one block, returning next uint8_t to parse */
2958 static void
parse_block(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t size,int position)2959 parse_block(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, int position)
2960 {
2961 size_t beg, end, i;
2962 uint8_t *txt_data;
2963 beg = 0;
2964
2965 if (doc->work_bufs[BUFFER_SPAN].size +
2966 doc->work_bufs[BUFFER_BLOCK].size > doc->max_nesting)
2967 return;
2968
2969 while (beg < size) {
2970 if (position >= 0 && beg >= position) {
2971 position = -1;
2972 parse_position(ob, doc);
2973 }
2974 txt_data = data + beg;
2975 end = size - beg;
2976
2977 if (is_atxheader(doc, txt_data, end))
2978 beg += parse_atxheader(ob, doc, txt_data, end);
2979
2980 else if (data[beg] == '<' && doc->md.blockhtml &&
2981 (i = parse_htmlblock(ob, doc, txt_data, end, 1)) != 0)
2982 beg += i;
2983
2984 else if ((i = is_empty(txt_data, end)) != 0)
2985 beg += i;
2986
2987 else if (is_hrule(txt_data, end)) {
2988 if (doc->md.hrule)
2989 doc->md.hrule(ob, &doc->data);
2990
2991 while (beg < size && data[beg] != '\n')
2992 beg++;
2993
2994 beg++;
2995 }
2996
2997 else if ((doc->ext_flags & HOEDOWN_EXT_FENCED_CODE) != 0 &&
2998 (i = parse_fencedcode(ob, doc, txt_data, end)) != 0)
2999 beg += i;
3000
3001 else if ((doc->ext_flags & HOEDOWN_EXT_TABLES) != 0 &&
3002 (i = parse_table(ob, doc, txt_data, end)) != 0)
3003 beg += i;
3004
3005 else if (prefix_quote(txt_data, end))
3006 beg += parse_blockquote(ob, doc, txt_data, end);
3007
3008 else if (!(doc->ext_flags & HOEDOWN_EXT_DISABLE_INDENTED_CODE) && prefix_code(txt_data, end))
3009 beg += parse_blockcode(ob, doc, txt_data, end);
3010
3011 else if (prefix_float(txt_data, end))
3012 beg += parse_float(ob, doc, txt_data, end);
3013
3014 else if (prefix_uli(txt_data, end))
3015 beg += parse_list(ob, doc, txt_data, end, 0);
3016
3017 else if (prefix_oli(txt_data, end))
3018 beg += parse_list(ob, doc, txt_data, end, HOEDOWN_LIST_ORDERED);
3019
3020 else
3021 beg += parse_paragraph(ob, doc, txt_data, end);
3022 }
3023 if (position > 0) {
3024 parse_position(ob, doc);
3025 }
3026 }
3027
3028
3029
3030 /*********************
3031 * REFERENCE PARSING *
3032 *********************/
3033 void load_notes(const uint8_t * text, size_t size, char* base_folder, struct footnote_list *list);
3034
3035 /* is_footnote • returns whether a line is a footnote definition or not */
3036 static int
is_footnote(const uint8_t * data,size_t beg,size_t end,size_t * last,char * base_folder,struct footnote_list * list)3037 is_footnote(const uint8_t *data, size_t beg, size_t end, size_t *last, char* base_folder, struct footnote_list *list)
3038 {
3039 if (startsWith("@bib(", (char*)data+beg))
3040 {
3041
3042 size_t n = 0;
3043 size_t i = 5+beg;
3044 while(data[i] != '\n' && i != end)
3045 {
3046 if (data[i]==')')
3047 break;
3048 n++;
3049 i++;
3050 }
3051
3052 if (n){
3053 char * path = malloc((n+1)*sizeof(char));
3054 path[n] = 0;
3055 strncpy(path, (char*)data+beg+5, n);
3056 if (is_regular_file(path, base_folder)){
3057 size_t size = 0;
3058 char * bib = load_file(path, base_folder, &size);
3059 load_notes((uint8_t*)bib, size, base_folder, list);
3060 free(bib);
3061 }
3062 free(path);
3063 }
3064
3065 i = beg;
3066 while(data[i]!='\n')
3067 i ++;
3068 *last = i;
3069
3070 return 1;
3071 }
3072 size_t i = 0;
3073 hoedown_buffer *contents = 0;
3074 size_t ind = 0;
3075 int in_empty = 0;
3076 size_t start = 0;
3077
3078 size_t id_offset, id_end;
3079
3080 /* up to 3 optional leading spaces */
3081 if (beg + 3 >= end) return 0;
3082 if (data[beg] == ' ') { i = 1;
3083 if (data[beg + 1] == ' ') { i = 2;
3084 if (data[beg + 2] == ' ') { i = 3;
3085 if (data[beg + 3] == ' ') return 0; } } }
3086 i += beg;
3087
3088 /* id part: caret followed by anything between brackets */
3089 if (data[i] != '[') return 0;
3090 i++;
3091 if (i >= end || data[i] != '^') return 0;
3092 i++;
3093 id_offset = i;
3094 while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']')
3095 i++;
3096 if (i >= end || data[i] != ']') return 0;
3097 id_end = i;
3098
3099 /* spacer: colon (space | tab)* newline? (space | tab)* */
3100 i++;
3101 if (i >= end || data[i] != ':') return 0;
3102 i++;
3103
3104 /* getting content buffer */
3105 contents = hoedown_buffer_new(64);
3106
3107 start = i;
3108
3109 /* process lines similar to a list item */
3110 while (i < end) {
3111 while (i < end && data[i] != '\n' && data[i] != '\r') i++;
3112
3113 /* process an empty line */
3114 if (is_empty(data + start, i - start)) {
3115 in_empty = 1;
3116 if (i < end && (data[i] == '\n' || data[i] == '\r')) {
3117 i++;
3118 if (i < end && data[i] == '\n' && data[i - 1] == '\r') i++;
3119 }
3120 start = i;
3121 continue;
3122 }
3123
3124 /* calculating the indentation */
3125 ind = 0;
3126 while (ind < 4 && start + ind < end && data[start + ind] == ' ')
3127 ind++;
3128
3129 /* joining only indented stuff after empty lines;
3130 * note that now we only require 1 space of indentation
3131 * to continue, just like lists */
3132 if (ind == 0) {
3133 if (start == id_end + 2 && data[start] == '\t') {}
3134 else break;
3135 }
3136 else if (in_empty) {
3137 hoedown_buffer_putc(contents, '\n');
3138 }
3139
3140 in_empty = 0;
3141
3142 /* adding the line into the content buffer */
3143 hoedown_buffer_put(contents, data + start + ind, i - start - ind);
3144 /* add carriage return */
3145 if (i < end) {
3146 hoedown_buffer_putc(contents, '\n');
3147 if (i < end && (data[i] == '\n' || data[i] == '\r')) {
3148 i++;
3149 if (i < end && data[i] == '\n' && data[i - 1] == '\r') i++;
3150 }
3151 }
3152 start = i;
3153 }
3154
3155 if (last)
3156 *last = start;
3157
3158 if (list) {
3159 struct footnote_ref *ref;
3160 ref = create_footnote_ref(list, data + id_offset, id_end - id_offset);
3161 if (!ref)
3162 return 0;
3163 if (!add_footnote_ref(list, ref)) {
3164 free_footnote_ref(ref);
3165 return 0;
3166 }
3167 ref->contents = contents;
3168 }
3169
3170 return 1;
3171 }
3172
3173 /* is_ref • returns whether a line is a reference or not */
3174 static int
is_ref(const uint8_t * data,size_t beg,size_t end,size_t * last,struct link_ref ** refs)3175 is_ref(const uint8_t *data, size_t beg, size_t end, size_t *last, struct link_ref **refs)
3176 {
3177 /* int n; */
3178
3179 size_t i = 0;
3180 size_t id_offset, id_end;
3181 size_t link_offset, link_end;
3182 size_t title_offset, title_end;
3183 size_t line_end;
3184
3185 /* up to 3 optional leading spaces */
3186 if (beg + 3 >= end) return 0;
3187 if (data[beg] == ' ') { i = 1;
3188 if (data[beg + 1] == ' ') { i = 2;
3189 if (data[beg + 2] == ' ') { i = 3;
3190 if (data[beg + 3] == ' ') return 0; } } }
3191 i += beg;
3192
3193 /* id part: anything but a newline between brackets */
3194 if (data[i] != '[') return 0;
3195 i++;
3196 id_offset = i;
3197 while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']')
3198 i++;
3199 if (i >= end || data[i] != ']') return 0;
3200 id_end = i;
3201
3202 /* spacer: colon (space | tab)* newline? (space | tab)* */
3203 i++;
3204 if (i >= end || data[i] != ':') return 0;
3205 i++;
3206 while (i < end && data[i] == ' ') i++;
3207 if (i < end && (data[i] == '\n' || data[i] == '\r')) {
3208 i++;
3209 if (i < end && data[i] == '\r' && data[i - 1] == '\n') i++; }
3210 while (i < end && data[i] == ' ') i++;
3211 if (i >= end) return 0;
3212
3213 /* link: spacing-free sequence, optionally between angle brackets */
3214 if (data[i] == '<')
3215 i++;
3216
3217 link_offset = i;
3218
3219 while (i < end && data[i] != ' ' && data[i] != '\n' && data[i] != '\r')
3220 i++;
3221
3222 if (data[i - 1] == '>') link_end = i - 1;
3223 else link_end = i;
3224
3225 /* optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) */
3226 while (i < end && data[i] == ' ') i++;
3227 if (i < end && data[i] != '\n' && data[i] != '\r'
3228 && data[i] != '\'' && data[i] != '"' && data[i] != '(')
3229 return 0;
3230 line_end = 0;
3231 /* computing end-of-line */
3232 if (i >= end || data[i] == '\r' || data[i] == '\n') line_end = i;
3233 if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
3234 line_end = i + 1;
3235
3236 /* optional (space|tab)* spacer after a newline */
3237 if (line_end) {
3238 i = line_end + 1;
3239 while (i < end && data[i] == ' ') i++; }
3240
3241 /* optional title: any non-newline sequence enclosed in '"()
3242 alone on its line */
3243 title_offset = title_end = 0;
3244 if (i + 1 < end
3245 && (data[i] == '\'' || data[i] == '"' || data[i] == '(')) {
3246 i++;
3247 title_offset = i;
3248 /* looking for EOL */
3249 while (i < end && data[i] != '\n' && data[i] != '\r') i++;
3250 if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
3251 title_end = i + 1;
3252 else title_end = i;
3253 /* stepping back */
3254 i -= 1;
3255 while (i > title_offset && data[i] == ' ')
3256 i -= 1;
3257 if (i > title_offset
3258 && (data[i] == '\'' || data[i] == '"' || data[i] == ')')) {
3259 line_end = title_end;
3260 title_end = i; } }
3261
3262 if (!line_end || link_end == link_offset)
3263 return 0; /* garbage after the link empty link */
3264
3265 /* a valid ref has been found, filling-in return structures */
3266 if (last)
3267 *last = line_end;
3268
3269 if (refs) {
3270 struct link_ref *ref;
3271
3272 ref = add_link_ref(refs, data + id_offset, id_end - id_offset);
3273 if (!ref)
3274 return 0;
3275
3276 ref->link = hoedown_buffer_new(link_end - link_offset);
3277 hoedown_buffer_put(ref->link, data + link_offset, link_end - link_offset);
3278
3279 if (title_end > title_offset) {
3280 ref->title = hoedown_buffer_new(title_end - title_offset);
3281 hoedown_buffer_put(ref->title, data + title_offset, title_end - title_offset);
3282 }
3283 }
3284
3285 return 1;
3286 }
3287
3288
3289 void
load_notes(const uint8_t * data,size_t size,char * base_folder,struct footnote_list * list)3290 load_notes(const uint8_t * data, size_t size, char* base_folder, struct footnote_list *list)
3291 {
3292 static const uint8_t UTF8_BOM[] = {0xEF, 0xBB, 0xBF};
3293 size_t beg, end;
3294 beg = 0;
3295 /* Skip a possible UTF-8 BOM, even though the Unicode standard
3296 * discourages having these in UTF-8 documents */
3297 if (size >= 3 && memcmp(data, UTF8_BOM, 3) == 0)
3298 beg += 3;
3299
3300 while (beg < size) /* iterating over lines */
3301 {
3302 if (is_footnote(data, beg, size, &end, base_folder, list))
3303 beg = end;
3304 else { /* skipping to the next line */
3305 end = beg;
3306 while (end < size && data[end] != '\n' && data[end] != '\r')
3307 end++;
3308 while (end < size && (data[end] == '\n' || data[end] == '\r')) {
3309
3310 end++;
3311 }
3312 beg = end;
3313 }
3314 }
3315 }
expand_tabs(hoedown_buffer * ob,const uint8_t * line,size_t size)3316 static void expand_tabs(hoedown_buffer *ob, const uint8_t *line, size_t size)
3317 {
3318 /* This code makes two assumptions:
3319 * - Input is valid UTF-8. (Any byte with top two bits 10 is skipped,
3320 * whether or not it is a valid UTF-8 continuation byte.)
3321 * - Input contains no combining characters. (Combining characters
3322 * should be skipped but are not.)
3323 */
3324 size_t i = 0, tab = 0;
3325
3326 while (i < size) {
3327 size_t org = i;
3328
3329 while (i < size && line[i] != '\t') {
3330 /* ignore UTF-8 continuation bytes */
3331 if ((line[i] & 0xc0) != 0x80)
3332 tab++;
3333 i++;
3334 }
3335
3336 if (i > org)
3337 hoedown_buffer_put(ob, line + org, i - org);
3338
3339 if (i >= size)
3340 break;
3341
3342 do {
3343 hoedown_buffer_putc(ob, ' '); tab++;
3344 } while (tab % 4);
3345
3346 i++;
3347 }
3348 }
3349
3350 /**********************
3351 * EXPORTED FUNCTIONS *
3352 **********************/
3353
3354 hoedown_document *
hoedown_document_new(const hoedown_renderer * renderer,hoedown_extensions extensions,ext_definition * user_ext,const char * base_folder,size_t max_nesting)3355 hoedown_document_new(
3356 const hoedown_renderer *renderer,
3357 hoedown_extensions extensions,
3358 ext_definition * user_ext,
3359 const char * base_folder,
3360 size_t max_nesting)
3361 {
3362 hoedown_document *doc = NULL;
3363
3364 assert(max_nesting > 0 && renderer);
3365
3366 doc = hoedown_malloc(sizeof(hoedown_document));
3367 memcpy(&doc->md, renderer, sizeof(hoedown_renderer));
3368
3369 doc->extensions = user_ext;
3370 doc->base_folder = (base_folder != NULL) ? strdup (base_folder) : NULL;
3371
3372 doc->counter = (h_counter){0, 0, 0};
3373
3374 doc->floating_references = NULL;
3375 doc->document_metadata = NULL;
3376 doc->table_of_contents = NULL;
3377 doc->data.opaque = renderer->opaque;
3378 doc->data.meta = NULL;
3379
3380 hoedown_stack_init(&doc->work_bufs[BUFFER_BLOCK], 4);
3381 hoedown_stack_init(&doc->work_bufs[BUFFER_SPAN], 8);
3382
3383 memset(doc->active_char, 0x0, 256);
3384
3385 if (extensions & HOEDOWN_EXT_UNDERLINE && doc->md.underline) {
3386 doc->active_char['_'] = MD_CHAR_EMPHASIS;
3387 }
3388
3389 if (doc->md.emphasis || doc->md.double_emphasis || doc->md.triple_emphasis) {
3390 doc->active_char['*'] = MD_CHAR_EMPHASIS;
3391 doc->active_char['_'] = MD_CHAR_EMPHASIS;
3392 if (extensions & HOEDOWN_EXT_STRIKETHROUGH)
3393 doc->active_char['~'] = MD_CHAR_EMPHASIS;
3394 if (extensions & HOEDOWN_EXT_HIGHLIGHT)
3395 doc->active_char['='] = MD_CHAR_EMPHASIS;
3396 }
3397
3398 if (doc->md.codespan)
3399 doc->active_char['`'] = MD_CHAR_CODESPAN;
3400
3401 if (doc->md.linebreak)
3402 doc->active_char['\n'] = MD_CHAR_LINEBREAK;
3403
3404 if (doc->md.image || doc->md.link || doc->md.footnotes || doc->md.footnote_ref) {
3405 doc->active_char['['] = MD_CHAR_LINK;
3406 doc->active_char['!'] = MD_CHAR_IMAGE;
3407 }
3408
3409 doc->active_char['<'] = MD_CHAR_LANGLE;
3410 doc->active_char['\\'] = MD_CHAR_ESCAPE;
3411 doc->active_char['&'] = MD_CHAR_ENTITY;
3412
3413 if (extensions & HOEDOWN_EXT_AUTOLINK) {
3414 doc->active_char[':'] = MD_CHAR_AUTOLINK_URL;
3415 doc->active_char['@'] = MD_CHAR_AUTOLINK_EMAIL;
3416 doc->active_char['w'] = MD_CHAR_AUTOLINK_WWW;
3417 }
3418
3419 if (extensions & HOEDOWN_EXT_SUPERSCRIPT)
3420 doc->active_char['^'] = MD_CHAR_SUPERSCRIPT;
3421
3422 if (extensions & HOEDOWN_EXT_QUOTE)
3423 doc->active_char['"'] = MD_CHAR_QUOTE;
3424
3425 if (extensions & HOEDOWN_EXT_MATH)
3426 doc->active_char['$'] = MD_CHAR_MATH;
3427
3428 doc->active_char['('] = MD_CHAR_REF;
3429
3430 /* Extension data */
3431 doc->ext_flags = extensions;
3432 doc->max_nesting = max_nesting;
3433 doc->in_link_body = 0;
3434
3435 return doc;
3436 }
3437 size_t
skip_yaml(hoedown_document * doc,hoedown_buffer * ob,const uint8_t * data,size_t size)3438 skip_yaml(hoedown_document *doc, hoedown_buffer *ob, const uint8_t *data, size_t size)
3439 {
3440 size_t skip = 0;
3441 if (startsWith("---", (char*)data) && is_separator(data[3])){
3442 skip += 4;
3443 while (skip < size && !(startsWith("\n---", (char*)data+skip) &&
3444 (skip + 4 >= size || is_separator(data[skip+4])))) {
3445 skip ++;
3446 }
3447 if (skip < size)
3448 {
3449 skip += 5;
3450 }
3451 }
3452 return skip;
3453 }
3454
3455 void
sub_render(hoedown_document * doc,hoedown_buffer * ob,const uint8_t * data,size_t size,int position)3456 sub_render(hoedown_document *doc, hoedown_buffer *ob, const uint8_t *data, size_t size, int position)
3457 {
3458 static const uint8_t UTF8_BOM[] = {0xEF, 0xBB, 0xBF};
3459
3460 hoedown_buffer *text;
3461 size_t beg, end;
3462 text = hoedown_buffer_new(64);
3463
3464 /* Preallocate enough space for our buffer to avoid expanding while copying */
3465 hoedown_buffer_grow(text, size);
3466 /* first pass: looking for references, copying everything else */
3467 beg = 0;
3468
3469 int footnotes_enabled = doc->ext_flags & HOEDOWN_EXT_FOOTNOTES;
3470
3471 /* Skip a possible UTF-8 BOM, even though the Unicode standard
3472 * discourages having these in UTF-8 documents */
3473 if (size >= 3 && memcmp(data, UTF8_BOM, 3) == 0)
3474 beg += 3;
3475
3476 while (beg < size) /* iterating over lines */
3477 if (footnotes_enabled && is_footnote(data, beg, size, &end, doc->base_folder, &doc->footnotes_found))
3478 beg = end;
3479 else if (is_ref(data, beg, size, &end, doc->refs))
3480 beg = end;
3481 else { /* skipping to the next line */
3482 end = beg;
3483 while (end < size && data[end] != '\n' && data[end] != '\r')
3484 end++;
3485
3486 /* adding the line body if present */
3487 if (end > beg)
3488 expand_tabs(text, data + beg, end - beg);
3489
3490 while (end < size && (data[end] == '\n' || data[end] == '\r')) {
3491 /* add one \n per newline */
3492 if (data[end] == '\n' || (end + 1 < size && data[end + 1] != '\n'))
3493 hoedown_buffer_putc(text, '\n');
3494 end++;
3495 }
3496
3497 beg = end;
3498 }
3499
3500 /* pre-grow the output buffer to minimize allocations */
3501 hoedown_buffer_grow(ob, text->size + (text->size >> 1));
3502
3503 /* second pass: actual rendering */
3504 if (doc->md.doc_header)
3505 doc->md.doc_header(ob, 0, &doc->data);
3506
3507 if (text->size) {
3508 size_t skip = skip_yaml(doc, ob, text->data, text->size);
3509 /* adding a final newline if not already present */
3510 if (text->data[text->size - 1] != '\n' && text->data[text->size - 1] != '\r')
3511 hoedown_buffer_putc(text, '\n');
3512
3513 parse_block(ob, doc, text->data+skip, text->size-skip, position-skip);
3514 }
3515 hoedown_buffer_free(text);
3516 }
3517
parse_keyword(char * keyword,metadata * meta,const uint8_t * data,size_t size)3518 int parse_keyword(char * keyword, metadata * meta, const uint8_t *data, size_t size)
3519 {
3520 /** clean keyword **/
3521 remove_char(keyword, ' ');
3522 remove_char(keyword, '\n');
3523 remove_char(keyword, '\t');
3524
3525 int j;
3526 int skip = 0;
3527 int text = 0;
3528 for (j = 0 ; j+1 < size && data[j+1] != '\n'; j++){
3529 if (!text && data[j] == ' ')
3530 skip ++;
3531 else if (!text && data[j] != ' ')
3532 text = 1;
3533 }
3534 if (j == 0)
3535 {
3536 return 1;
3537 }
3538 char * word = malloc(sizeof(char) * (j-skip+3));
3539 memset(word, 0, (j-skip+3));
3540 memcpy(word, data+skip, (j-skip+1));
3541
3542
3543 if (!strcmp(keyword, "title")) {
3544 meta->title = word;
3545 } else if (!strcmp(keyword, "author")) {
3546 meta->authors = add_string(meta->authors, word);
3547 } else if (!strcmp(keyword, "keywords")) {
3548 meta->keywords = word;
3549 } else if (!strcmp(keyword, "style")) {
3550 meta->style = word;
3551 } else if (!strcmp(keyword, "affiliation")) {
3552 meta->affiliation = word;
3553 } else if (!strcmp(keyword, "numbering")) {
3554 meta->numbering = !strcmp(word, "true");
3555 } else if (!strcmp(keyword, "paper")) {
3556 meta->paper_size = string_to_paper(word);
3557 } else if (!strcmp(keyword, "class")) {
3558 meta->doc_class = string_to_class(word);
3559 } else if (!strcmp(keyword, "font-size")) {
3560 meta->font_size = atoi(word);
3561 }else {
3562 free(word);
3563 }
3564
3565 return j+1;
3566 }
3567
3568 void
append(reference * head,reference * next)3569 append(reference * head, reference * next)
3570 {
3571 if (!head)
3572 return;
3573 if (head->next)
3574 append(head->next, next);
3575 else
3576 head->next = next;
3577 }
3578
3579 reference *
add_reference(char * id,int counter,float_type type,reference * ref)3580 add_reference(char * id, int counter, float_type type, reference * ref)
3581 {
3582 reference * next = malloc(sizeof(reference));
3583 next->next = NULL;
3584 next->id = id;
3585 next->type = type;
3586 next->counter = counter;
3587 if (ref)
3588 {
3589 append(ref, next);
3590 return ref;
3591 }
3592 return next;
3593 }
3594
3595 metadata *
parse_yaml(const uint8_t * data,size_t size)3596 parse_yaml(const uint8_t *data, size_t size)
3597 {
3598 metadata * meta = malloc(sizeof(metadata));
3599
3600 meta->keywords = NULL;
3601 meta->authors = NULL;
3602 meta->style = NULL;
3603 meta->title = NULL;
3604
3605 meta->paper_size = A4PAPER;
3606 meta->doc_class = CLASS_ARTICLE;
3607 meta->font_size = 10;
3608
3609 meta->numbering = 0;
3610 meta->affiliation = NULL;
3611
3612 if (startsWith("---", (char*)data) && is_separator(data[3])){
3613 int i = 4;
3614 while (i < size){
3615 if (startsWith("---\n", (char*)data+i))
3616 break;
3617 int j;
3618 for (j = 0 ; j+i+1 < size && data[i+j+1] != ':' && data[i+j+1] != '\n'; j++){}
3619 if (data[j+i+1] == ':'){
3620 char type[j+3];
3621 memset(type, 0, j+3);
3622 memcpy(type, data+i, j+1);
3623 j += parse_keyword(type, meta, data+i+j+2, size - i - j - 2);
3624 }
3625
3626 i+=j+3;
3627 }
3628 }
3629 return meta;
3630 }
3631
3632 void
render_metadata(hoedown_document * doc,hoedown_buffer * ob,metadata * meta)3633 render_metadata(hoedown_document *doc, hoedown_buffer *ob, metadata * meta)
3634 {
3635
3636 if (meta->title != NULL && doc->md.title)
3637 {
3638 hoedown_buffer * b = hoedown_buffer_new(1);
3639 hoedown_buffer_puts(b, meta->title);
3640 doc->md.title(ob,b, meta);
3641 hoedown_buffer_free(b);
3642 }
3643 if (meta->authors != NULL && doc->md.authors)
3644 {
3645 hoedown_buffer * b = hoedown_buffer_new(1);
3646
3647 doc->md.authors(ob,meta->authors);
3648 hoedown_buffer_free(b);
3649 }
3650 if (meta->affiliation != NULL && doc->md.affiliation)
3651 {
3652 hoedown_buffer * b = hoedown_buffer_new(1);
3653 hoedown_buffer_puts(b, meta->affiliation);
3654 doc->md.affiliation(ob,b,NULL);
3655 hoedown_buffer_free(b);
3656 }
3657
3658 }
find_ref(reference * refs,char * id,int * counter)3659 int find_ref(reference * refs, char*id, int *counter)
3660 {
3661 if (!refs)
3662 return 0;
3663
3664 if (strcmp(refs->id, id) == 0)
3665 {
3666
3667 *counter = refs->counter;
3668 return 1;
3669 }
3670 return find_ref(refs->next, id, counter);
3671 }
3672
3673 void
check_for_ref(hoedown_document * doc,const uint8_t * data,size_t size,html_counter * counter,float_type type)3674 check_for_ref(hoedown_document *doc, const uint8_t *data, size_t size, html_counter * counter, float_type type)
3675 {
3676 int caption = 0;
3677 size_t i = 0;
3678 while (i < size && !startsWith("@/\n", (char*)data+i)){
3679 i++;
3680 if (startsWith("@caption(", (char*)data+i)){
3681 caption = 1;
3682 }
3683 }
3684 if (caption || type==EQUATION){
3685 int c =0;
3686 switch (type)
3687 {
3688 case EQUATION:
3689 c = ++(counter->equation);
3690 break;
3691 case TABLE:
3692 c = ++(counter->table);
3693 break;
3694 case LISTING:
3695 c = ++(counter->listing);
3696 break;
3697 case FIGURE:
3698 c = ++(counter->figure);
3699 break;
3700 }
3701
3702 if (data[0] == '('){
3703 i = 1;
3704 while (i < size && data[i] != '\n' && data[i] !=')')
3705 {
3706 i ++ ;
3707 }
3708 if (i > 1)
3709 {
3710 char * id = malloc((i)*sizeof(char));
3711 memset(id, 0, i);
3712 memcpy(id, data+1, i-1);
3713 doc->floating_references = add_reference(id, c, type, doc->floating_references);
3714 }
3715 }
3716 }
3717 }
3718
3719
3720 void
look_for_ref(hoedown_document * doc,const uint8_t * data,size_t size,html_counter * counter)3721 look_for_ref(hoedown_document *doc, const uint8_t *data, size_t size, html_counter * counter)
3722 {
3723
3724 if (startsWith("@figure", (char*)data))
3725 {
3726 check_for_ref(doc, data+7, size-7, counter, FIGURE);
3727 }
3728 if (startsWith("@table", (char*)data))
3729 {
3730 check_for_ref(doc, data+6, size-6,counter, TABLE);
3731 }
3732 if (startsWith("@listing", (char*)data))
3733 {
3734 check_for_ref(doc, data+8, size-8,counter, LISTING);
3735 }
3736 if (startsWith("@equation", (char*)data))
3737 {
3738 check_for_ref(doc, data+9, size-9,counter, EQUATION);
3739 }
3740 }
3741
3742 char*
load_text(uint8_t * data,size_t size,char * base_folder,size_t * new_size)3743 load_text(uint8_t *data, size_t size, char* base_folder, size_t * new_size)
3744 {
3745 /* @include(path) */
3746 size_t i = 9;
3747 size_t n = 0;
3748 *new_size = 0;
3749 for (;i < size; i++)
3750 {
3751 if (data[i] == ')')
3752 {
3753 break;
3754 }
3755 n++;
3756 }
3757 if (n){
3758 char * path = malloc((n+1)*sizeof(uint8_t));
3759 path[n] = 0;
3760 memcpy(path, data+9, n);
3761 if (is_regular_file(path, base_folder)){
3762
3763 char * buffer = load_file(path, base_folder, new_size);
3764 free(path);
3765 return buffer;
3766 }
3767 free(path);
3768 }
3769 return NULL;
3770 }
3771
3772
3773
3774 void
find_references(hoedown_document * doc,const uint8_t * data,size_t size,html_counter * counter)3775 find_references(hoedown_document *doc, const uint8_t *data, size_t size, html_counter * counter)
3776 {
3777 size_t i;
3778 for (i = 0; i < size; i++)
3779 {
3780 if (prefix_float((uint8_t*)data+i, size-i))
3781 {
3782 look_for_ref(doc, data+i, size-i, counter);
3783 }
3784 else if (startsWith("@include(", (char*) data+i))
3785 {
3786 size_t text_size;
3787 char * text = load_text((uint8_t*)data+i, size-i, doc->base_folder, &text_size);
3788 if (text_size && text)
3789 {
3790 find_references(doc,(const uint8_t*) text, text_size, counter);
3791 free(text);
3792 }
3793 }
3794 }
3795 }
3796
3797 toc *
generate_toc(hoedown_document * doc,const uint8_t * data,size_t size,toc * parent)3798 generate_toc(hoedown_document * doc, const uint8_t * data, size_t size, toc* parent)
3799 {
3800 if (!data || !size)
3801 return parent;
3802 size_t i = 0;
3803 toc * root = parent;
3804 toc * current = root;
3805 char code_block = 0;
3806
3807 if (size > 4 && startsWith("---", (char*)data) && is_separator(data[3])){
3808 i = 4;
3809 while (i < size) {
3810 if (data[i-1] == '\n' && startsWith("---", (char*)data + i) && is_separator(data[i + 3])) {
3811 i += 3;
3812 break;
3813 }
3814 i++;
3815 }
3816
3817 }
3818
3819 for (; i < size-1; i++)
3820 {
3821 if (i == 0 || data[i-1] == '\n')
3822 {
3823 if (!code_block) {
3824 if (is_atxheader(doc, (uint8_t*)data+i, size-i))
3825 {
3826 size_t level = 0;
3827 uint8_t * title = get_atxheader_info((uint8_t*)data+i, size-i, &level, NULL);
3828 if (level <= 3 && title)
3829 {
3830 toc * next = malloc(sizeof(toc));
3831 next->sibling = NULL;
3832 next->nesting = level;
3833 next->text = (char*) title;
3834 if (!current) {
3835 root = next;
3836 } else {
3837 current->sibling = next;
3838 }
3839 current = next;
3840 }
3841 } else if (i > 0 && is_headerline((uint8_t*)data+i, size-i)){
3842 size_t j = i - 1;
3843 int somechar = 0;
3844 while (data[j - 1] != '\n') {
3845 if (j == 0)
3846 break;
3847 if (!is_separator(data[j -1]))
3848 somechar = 1;
3849 j --;
3850 }
3851 if ((i - j) > 1 && somechar) {
3852 size_t level = data[i] == '-' ? 2 : 1;
3853 char * title = malloc(i - j - 1);
3854 memcpy(title, data+j, i-j-2);
3855 title[i - j - 2] = 0;
3856
3857 toc * next = malloc(sizeof(toc));
3858 next->sibling = NULL;
3859 next->nesting = level;
3860 next->text = (char*) title;
3861 if (!current) {
3862 root = next;
3863 } else {
3864 current->sibling = next;
3865 }
3866 current = next;
3867
3868 }
3869 /* fprintf(stderr, "(document.c: generate_toc()): Headerline not yet implemented\n"); */
3870 //printf("Header line!\n");
3871 } else if (is_codefence((uint8_t*)data+i, size-i, NULL, NULL)) {
3872 code_block = data[i];
3873 }
3874 } else if (data[i] == code_block && is_codefence((uint8_t*)data+i, size-i, NULL, NULL)) {
3875 code_block = 0;
3876 }
3877 }
3878 if (!code_block && data[i] == '@' && startsWith("@include(", (char*)data+i))
3879 {
3880 size_t text_size;
3881 char * text = load_text((uint8_t*)data+i, size-i, doc->base_folder, &text_size);
3882 if (text_size && text)
3883 {
3884
3885 toc * t = generate_toc(doc,(const uint8_t*) text, text_size, current);
3886 if (!root && t)
3887 {
3888 root = t;
3889 }
3890 free(text);
3891 }
3892 }
3893 }
3894 return root;
3895 }
3896
3897
document_metadata(const uint8_t * data,size_t size)3898 metadata* document_metadata(const uint8_t *data, size_t size)
3899 {
3900 return parse_yaml(data, size);
3901 }
3902
3903 void
hoedown_document_render(hoedown_document * doc,hoedown_buffer * ob,const uint8_t * data,size_t size,int position)3904 hoedown_document_render(hoedown_document *doc, hoedown_buffer *ob, const uint8_t *data, size_t size, int position)
3905 {
3906
3907 int footnotes_enabled;
3908
3909 /* reset the references table */
3910 memset(&doc->refs, 0x0, REF_TABLE_SIZE * sizeof(void *));
3911
3912 footnotes_enabled = doc->ext_flags & HOEDOWN_EXT_FOOTNOTES;
3913
3914 /* reset the footnotes lists */
3915 if (footnotes_enabled) {
3916 memset(&doc->footnotes_found, 0x0, sizeof(doc->footnotes_found));
3917 memset(&doc->footnotes_used, 0x0, sizeof(doc->footnotes_used));
3918 }
3919 html_counter counter = {0,0,0,0};
3920 find_references(doc, data, size, &counter);
3921
3922
3923 doc->table_of_contents = generate_toc(doc, data, size, NULL);
3924
3925 metadata * meta = parse_yaml(data, size);
3926 doc->document_metadata = meta;
3927 doc->data.meta = meta;
3928
3929 if (doc->md.head)
3930 doc->md.head(ob, meta, doc->extensions);
3931 if (doc->md.begin)
3932 doc->md.begin(ob, &doc->data);
3933 render_metadata(doc, ob, meta);
3934
3935 if (doc->md.inner)
3936 doc->md.inner(ob, &doc->data);
3937
3938 sub_render(doc, ob, data, size, position);
3939 /* footnotes */
3940 if (footnotes_enabled)
3941 parse_footnote_list(ob, doc, &doc->footnotes_used);
3942
3943 if (doc->md.doc_footer)
3944 doc->md.doc_footer(ob, 0, &doc->data);
3945 if (doc->md.end)
3946 doc->md.end(ob, doc->extensions, &doc->data);
3947 /* clean-up */
3948
3949 free_link_refs(doc->refs);
3950 if (footnotes_enabled) {
3951 free_footnote_list(&doc->footnotes_found, 1);
3952 free_footnote_list(&doc->footnotes_used, 0);
3953 }
3954
3955 assert(doc->work_bufs[BUFFER_SPAN].size == 0);
3956 assert(doc->work_bufs[BUFFER_BLOCK].size == 0);
3957 }
3958
3959 void
hoedown_document_render_inline(hoedown_document * doc,hoedown_buffer * ob,const uint8_t * data,size_t size,int position)3960 hoedown_document_render_inline(hoedown_document *doc, hoedown_buffer *ob, const uint8_t *data, size_t size, int position)
3961 {
3962 size_t i = 0, mark;
3963 hoedown_buffer *text = hoedown_buffer_new(64);
3964
3965 /* reset the references table */
3966 memset(&doc->refs, 0x0, REF_TABLE_SIZE * sizeof(void *));
3967
3968 /* first pass: expand tabs and process newlines */
3969 hoedown_buffer_grow(text, size);
3970 while (1) {
3971 mark = i;
3972 while (i < size && data[i] != '\n' && data[i] != '\r')
3973 i++;
3974
3975 expand_tabs(text, data + mark, i - mark);
3976
3977 if (i >= size)
3978 break;
3979
3980 while (i < size && (data[i] == '\n' || data[i] == '\r')) {
3981 /* add one \n per newline */
3982 if (data[i] == '\n' || (i + 1 < size && data[i + 1] != '\n'))
3983 hoedown_buffer_putc(text, '\n');
3984 i++;
3985 }
3986 }
3987
3988 /* second pass: actual rendering */
3989 hoedown_buffer_grow(ob, text->size + (text->size >> 1));
3990
3991 if (doc->md.doc_header)
3992 doc->md.doc_header(ob, 1, &doc->data);
3993
3994 parse_inline(ob, doc, text->data, text->size);
3995
3996 if (doc->md.doc_footer)
3997 doc->md.doc_footer(ob, 1, &doc->data);
3998
3999 /* clean-up */
4000 hoedown_buffer_free(text);
4001 assert(doc->work_bufs[BUFFER_SPAN].size == 0);
4002 assert(doc->work_bufs[BUFFER_BLOCK].size == 0);
4003 }
4004
4005 void
free_references(reference * ref)4006 free_references(reference * ref)
4007 {
4008 if (ref)
4009 {
4010 free(ref->id);
4011 free_references(ref->next);
4012 free(ref->next);
4013 }
4014 }
4015
4016 void
free_toc(toc * ToC)4017 free_toc(toc * ToC)
4018 {
4019 if (ToC)
4020 {
4021 free(ToC->text);
4022 free_toc(ToC->sibling);
4023 free(ToC);
4024 }
4025 }
4026
4027 void
free_meta(metadata * meta)4028 free_meta(metadata * meta)
4029 {
4030 if (!meta)
4031 return;
4032 if (meta->affiliation)
4033 free(meta->affiliation);
4034 if (meta->keywords)
4035 free(meta->keywords);
4036 if (meta->style)
4037 free(meta->style);
4038 if (meta->title)
4039 free(meta->title);
4040 free_strings(meta->authors);
4041 free(meta);
4042 }
4043
4044 void
hoedown_document_free(hoedown_document * doc)4045 hoedown_document_free(hoedown_document *doc)
4046 {
4047 size_t i;
4048
4049 for (i = 0; i < (size_t)doc->work_bufs[BUFFER_SPAN].asize; ++i)
4050 hoedown_buffer_free(doc->work_bufs[BUFFER_SPAN].item[i]);
4051
4052 for (i = 0; i < (size_t)doc->work_bufs[BUFFER_BLOCK].asize; ++i)
4053 hoedown_buffer_free(doc->work_bufs[BUFFER_BLOCK].item[i]);
4054
4055 hoedown_stack_uninit(&doc->work_bufs[BUFFER_SPAN]);
4056 hoedown_stack_uninit(&doc->work_bufs[BUFFER_BLOCK]);
4057 free_references(doc->floating_references);
4058 free_toc(doc->table_of_contents);
4059 free_meta(doc->document_metadata);
4060 if (doc->base_folder)
4061 free(doc->base_folder);
4062 free(doc);
4063 }
4064