1 #include "document.h"
2 
3 #include <assert.h>
4 #include <string.h>
5 #include <ctype.h>
6 #include <stdio.h>
7 #include <sys/types.h>
8 #include <sys/stat.h>
9 #include <unistd.h>
10 
11 #include "stack.h"
12 
13 #ifndef _MSC_VER
14 #include <strings.h>
15 #else
16 #define strncasecmp	_strnicmp
17 #endif
18 
19 #define REF_TABLE_SIZE 8
20 
21 #define BUFFER_BLOCK 0
22 #define BUFFER_SPAN 1
23 
24 #define HOEDOWN_LI_END 8	/* internal list flag */
25 
26 const char *hoedown_find_block_tag(const char *str, unsigned int len);
27 int find_ref(reference * refs, char*id, int *counter);
28 
29 /***************
30  * LOCAL TYPES *
31  ***************/
32 
33 /* link_ref: reference to a link */
34 struct link_ref {
35 	unsigned int id;
36 
37 	hoedown_buffer *link;
38 	hoedown_buffer *title;
39 
40 	struct link_ref *next;
41 };
42 
43 /* footnote_ref: reference to a footnote */
44 struct footnote_ref {
45 	unsigned int id;
46 
47 	int is_used;
48 	unsigned int num;
49 
50 	hoedown_buffer *contents;
51 };
52 
53 /* footnote_item: an item in a footnote_list */
54 struct footnote_item {
55 	struct footnote_ref *ref;
56 	struct footnote_item *next;
57 };
58 
59 /* footnote_list: linked list of footnote_item */
60 struct footnote_list {
61 	unsigned int count;
62 	struct footnote_item *head;
63 	struct footnote_item *tail;
64 };
65 
66 /* char_trigger: function pointer to render active chars */
67 /*   returns the number of chars taken care of */
68 /*   data is the pointer of the beginning of the span */
69 /*   offset is the number of valid chars before data */
70 typedef size_t
71 (*char_trigger)(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
72 
73 static size_t char_emphasis(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
74 static size_t char_quote(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
75 static size_t char_linebreak(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
76 static size_t char_codespan(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
77 static size_t char_escape(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
78 static size_t char_entity(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
79 static size_t char_langle_tag(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
80 static size_t char_autolink_url(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
81 static size_t char_autolink_email(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
82 static size_t char_autolink_www(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
83 static size_t char_link(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
84 static size_t char_image(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
85 static size_t char_superscript(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
86 static size_t char_math(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
87 static size_t char_ref(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
88 
89 void sub_render(hoedown_document *doc, hoedown_buffer *ob, const uint8_t *data, size_t size, int position);
90 
91 enum markdown_char_t {
92 	MD_CHAR_NONE = 0,
93 	MD_CHAR_EMPHASIS,
94 	MD_CHAR_CODESPAN,
95 	MD_CHAR_LINEBREAK,
96 	MD_CHAR_LINK,
97 	MD_CHAR_IMAGE,
98 	MD_CHAR_LANGLE,
99 	MD_CHAR_ESCAPE,
100 	MD_CHAR_ENTITY,
101 	MD_CHAR_AUTOLINK_URL,
102 	MD_CHAR_AUTOLINK_EMAIL,
103 	MD_CHAR_AUTOLINK_WWW,
104 	MD_CHAR_SUPERSCRIPT,
105 	MD_CHAR_QUOTE,
106 	MD_CHAR_MATH,
107 	MD_CHAR_REF
108 };
109 
110 static char_trigger markdown_char_ptrs[] = {
111 	NULL,
112 	&char_emphasis,
113 	&char_codespan,
114 	&char_linebreak,
115 	&char_link,
116 	&char_image,
117 	&char_langle_tag,
118 	&char_escape,
119 	&char_entity,
120 	&char_autolink_url,
121 	&char_autolink_email,
122 	&char_autolink_www,
123 	&char_superscript,
124 	&char_quote,
125 	&char_math,
126 	&char_ref
127 };
128 
129 struct hoedown_document {
130 	hoedown_renderer md;
131 	hoedown_renderer_data data;
132 	metadata * document_metadata;
133 	reference * floating_references;
134 	ext_definition * extensions;
135 	toc * table_of_contents;
136 	h_counter counter;
137 
138 	char * base_folder;
139 
140 	struct link_ref *refs[REF_TABLE_SIZE];
141 	struct footnote_list footnotes_found;
142 	struct footnote_list footnotes_used;
143 	uint8_t active_char[256];
144 	hoedown_stack work_bufs[2];
145 	hoedown_extensions ext_flags;
146 	size_t max_nesting;
147 	int in_link_body;
148 };
149 
150 /***************************
151  * HELPER FUNCTIONS *
152  ***************************/
153 
154  static int
startsWith(char * pre,char * str)155  startsWith(char *pre, char *str)
156  {
157  	if (!pre || !str)
158  		return 0;
159     size_t lenpre = strlen(pre),
160            lenstr = strlen(str);
161     return lenstr < lenpre ? 0 : strncmp(pre, str, lenpre) == 0;
162  }
163 
164 int
is_separator(uint8_t chr)165 is_separator(uint8_t chr)
166 {
167 	return chr == ' ' || chr == '(' || chr == '\t' || chr == '\n';
168 }
169 
170  static int
is_regular_file(const char * path,char * base_folder)171  is_regular_file(const char *path, char * base_folder)
172  {
173  	if (path[0] != '/') {
174  		char *cwd;
175 
176  		if (base_folder != NULL) {
177 			int n1 = strlen(base_folder);
178  			int n2 = strlen(path);
179  			int n =  n1 + n2 + 2;
180  			cwd = malloc(n*sizeof(char));
181  			cwd[n-1] = 0;
182  			memcpy(cwd, base_folder, n1);
183  			cwd[n1] = '/';
184  			memcpy(cwd+n1+1, path, n2);
185  		} else {
186  			cwd = malloc(256*sizeof(char));
187  			memset(cwd, 0, 256);
188  			getcwd(cwd, 256);
189  			strcat(cwd, "/");
190  			strcat(cwd, path);
191 	 	}
192  		struct stat path_stat;
193 	    stat(cwd, &path_stat);
194 	    free(cwd);
195 	    return S_ISREG(path_stat.st_mode);
196  	}
197 
198     struct stat path_stat;
199     stat(path, &path_stat);
200     return S_ISREG(path_stat.st_mode);
201  }
202 
203 static hoedown_buffer *
newbuf(hoedown_document * doc,int type)204 newbuf(hoedown_document *doc, int type)
205 {
206 	static const size_t buf_size[2] = {256, 64};
207 	hoedown_buffer *work = NULL;
208 	hoedown_stack *pool = &doc->work_bufs[type];
209 
210 	if (pool->size < pool->asize &&
211 		pool->item[pool->size] != NULL) {
212 		work = pool->item[pool->size++];
213 		work->size = 0;
214 	} else {
215 		work = hoedown_buffer_new(buf_size[type]);
216 		hoedown_stack_push(pool, work);
217 	}
218 
219 	return work;
220 }
221 
222 static void
popbuf(hoedown_document * doc,int type)223 popbuf(hoedown_document *doc, int type)
224 {
225 	doc->work_bufs[type].size--;
226 }
227 
228 static void
unscape_text(hoedown_buffer * ob,hoedown_buffer * src)229 unscape_text(hoedown_buffer *ob, hoedown_buffer *src)
230 {
231 	size_t i = 0, org;
232 	while (i < src->size) {
233 		org = i;
234 		while (i < src->size && src->data[i] != '\\')
235 			i++;
236 
237 		if (i > org)
238 			hoedown_buffer_put(ob, src->data + org, i - org);
239 
240 		if (i + 1 >= src->size)
241 			break;
242 
243 		hoedown_buffer_putc(ob, src->data[i + 1]);
244 		i += 2;
245 	}
246 }
247 
248 static unsigned int
hash_link_ref(const uint8_t * link_ref,size_t length)249 hash_link_ref(const uint8_t *link_ref, size_t length)
250 {
251 	size_t i;
252 	unsigned int hash = 0;
253 
254 	for (i = 0; i < length; ++i)
255 		hash = tolower(link_ref[i]) + (hash << 6) + (hash << 16) - hash;
256 
257 	return hash;
258 }
259 
260 static struct link_ref *
add_link_ref(struct link_ref ** references,const uint8_t * name,size_t name_size)261 add_link_ref(
262 	struct link_ref **references,
263 	const uint8_t *name, size_t name_size)
264 {
265 	struct link_ref *ref = hoedown_calloc(1, sizeof(struct link_ref));
266 
267 	ref->id = hash_link_ref(name, name_size);
268 	ref->next = references[ref->id % REF_TABLE_SIZE];
269 
270 	references[ref->id % REF_TABLE_SIZE] = ref;
271 	return ref;
272 }
273 
274 static struct link_ref *
find_link_ref(struct link_ref ** references,uint8_t * name,size_t length)275 find_link_ref(struct link_ref **references, uint8_t *name, size_t length)
276 {
277 	unsigned int hash = hash_link_ref(name, length);
278 	struct link_ref *ref = NULL;
279 
280 	ref = references[hash % REF_TABLE_SIZE];
281 
282 	while (ref != NULL) {
283 		if (ref->id == hash)
284 			return ref;
285 
286 		ref = ref->next;
287 	}
288 
289 	return NULL;
290 }
291 
292 static void
free_link_refs(struct link_ref ** references)293 free_link_refs(struct link_ref **references)
294 {
295 	size_t i;
296 
297 	for (i = 0; i < REF_TABLE_SIZE; ++i) {
298 		struct link_ref *r = references[i];
299 		struct link_ref *next;
300 
301 		while (r) {
302 			next = r->next;
303 			hoedown_buffer_free(r->link);
304 			hoedown_buffer_free(r->title);
305 			free(r);
306 			r = next;
307 		}
308 	}
309 }
310 
311 static struct footnote_ref *
create_footnote_ref(struct footnote_list * list,const uint8_t * name,size_t name_size)312 create_footnote_ref(struct footnote_list *list, const uint8_t *name, size_t name_size)
313 {
314 	struct footnote_ref *ref = hoedown_calloc(1, sizeof(struct footnote_ref));
315 	ref->id = hash_link_ref(name, name_size);
316 
317 	return ref;
318 }
319 
320 static int
add_footnote_ref(struct footnote_list * list,struct footnote_ref * ref)321 add_footnote_ref(struct footnote_list *list, struct footnote_ref *ref)
322 {
323 	struct footnote_item *item = hoedown_calloc(1, sizeof(struct footnote_item));
324 	if (!item)
325 		return 0;
326 	item->ref = ref;
327 
328 	if (list->head == NULL) {
329 		list->head = list->tail = item;
330 	} else {
331 		list->tail->next = item;
332 		list->tail = item;
333 	}
334 	list->count++;
335 
336 	return 1;
337 }
338 
339 static struct footnote_ref *
find_footnote_ref(struct footnote_list * list,uint8_t * name,size_t length)340 find_footnote_ref(struct footnote_list *list, uint8_t *name, size_t length)
341 {
342 	unsigned int hash = hash_link_ref(name, length);
343 	struct footnote_item *item = NULL;
344 
345 	item = list->head;
346 
347 	while (item != NULL) {
348 		if (item->ref->id == hash)
349 			return item->ref;
350 		item = item->next;
351 	}
352 
353 	return NULL;
354 }
355 
356 static void
free_footnote_ref(struct footnote_ref * ref)357 free_footnote_ref(struct footnote_ref *ref)
358 {
359 	hoedown_buffer_free(ref->contents);
360 	free(ref);
361 }
362 
363 static void
free_footnote_list(struct footnote_list * list,int free_refs)364 free_footnote_list(struct footnote_list *list, int free_refs)
365 {
366 	struct footnote_item *item = list->head;
367 	struct footnote_item *next;
368 
369 	while (item) {
370 		next = item->next;
371 		if (free_refs)
372 			free_footnote_ref(item->ref);
373 		free(item);
374 		item = next;
375 	}
376 }
377 
378 
379 /*
380  * Check whether a char is a Markdown spacing char.
381 
382  * Right now we only consider spaces the actual
383  * space and a newline: tabs and carriage returns
384  * are filtered out during the preprocessing phase.
385  *
386  * If we wanted to actually be UTF-8 compliant, we
387  * should instead extract an Unicode codepoint from
388  * this character and check for space properties.
389  */
390 static int
_isspace(int c)391 _isspace(int c)
392 {
393 	return c == ' ' || c == '\n';
394 }
395 
396 /* is_empty_all: verify that all the data is spacing */
397 static int
is_empty_all(const uint8_t * data,size_t size)398 is_empty_all(const uint8_t *data, size_t size)
399 {
400 	size_t i = 0;
401 	while (i < size && _isspace(data[i])) i++;
402 	return i == size;
403 }
404 
405 /*
406  * Replace all spacing characters in data with spaces. As a special
407  * case, this collapses a newline with the previous space, if possible.
408  */
409 static void
replace_spacing(hoedown_buffer * ob,const uint8_t * data,size_t size)410 replace_spacing(hoedown_buffer *ob, const uint8_t *data, size_t size)
411 {
412 	size_t i = 0, mark;
413 	hoedown_buffer_grow(ob, size);
414 	while (1) {
415 		mark = i;
416 		while (i < size && data[i] != '\n') i++;
417 		hoedown_buffer_put(ob, data + mark, i - mark);
418 
419 		if (i >= size) break;
420 
421 		if (!(i > 0 && data[i-1] == ' '))
422 			hoedown_buffer_putc(ob, ' ');
423 		i++;
424 	}
425 }
426 
427 /****************************
428  * INLINE PARSING FUNCTIONS *
429  ****************************/
430 
431 /* is_mail_autolink • looks for the address part of a mail autolink and '>' */
432 /* this is less strict than the original markdown e-mail address matching */
433 static size_t
is_mail_autolink(uint8_t * data,size_t size)434 is_mail_autolink(uint8_t *data, size_t size)
435 {
436 	size_t i = 0, nb = 0;
437 
438 	/* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */
439 	for (i = 0; i < size; ++i) {
440 		if (isalnum(data[i]))
441 			continue;
442 
443 		switch (data[i]) {
444 			case '@':
445 				nb++;
446 
447 			case '-':
448 			case '.':
449 			case '_':
450 				break;
451 
452 			case '>':
453 				return (nb == 1) ? i + 1 : 0;
454 
455 			default:
456 				return 0;
457 		}
458 	}
459 
460 	return 0;
461 }
462 
463 /* tag_length • returns the length of the given tag, or 0 is it's not valid */
464 static size_t
tag_length(uint8_t * data,size_t size,hoedown_autolink_type * autolink)465 tag_length(uint8_t *data, size_t size, hoedown_autolink_type *autolink)
466 {
467 	size_t i, j;
468 
469 	/* a valid tag can't be shorter than 3 chars */
470 	if (size < 3) return 0;
471 
472 	if (data[0] != '<') return 0;
473 
474         /* HTML comment, laxist form */
475         if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') {
476 		i = 5;
477 
478 		while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>'))
479 			i++;
480 
481 		i++;
482 
483 		if (i <= size)
484 			return i;
485         }
486 
487 	/* begins with a '<' optionally followed by '/', followed by letter or number */
488         i = (data[1] == '/') ? 2 : 1;
489 
490 	if (!isalnum(data[i]))
491 		return 0;
492 
493 	/* scheme test */
494 	*autolink = HOEDOWN_AUTOLINK_NONE;
495 
496 	/* try to find the beginning of an URI */
497 	while (i < size && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-'))
498 		i++;
499 
500 	if (i > 1 && data[i] == '@') {
501 		if ((j = is_mail_autolink(data + i, size - i)) != 0) {
502 			*autolink = HOEDOWN_AUTOLINK_EMAIL;
503 			return i + j;
504 		}
505 	}
506 
507 	if (i > 2 && data[i] == ':') {
508 		*autolink = HOEDOWN_AUTOLINK_NORMAL;
509 		i++;
510 	}
511 
512 	/* completing autolink test: no spacing or ' or " */
513 	if (i >= size)
514 		*autolink = HOEDOWN_AUTOLINK_NONE;
515 
516 	else if (*autolink) {
517 		j = i;
518 
519 		while (i < size) {
520 			if (data[i] == '\\') i += 2;
521 			else if (data[i] == '>' || data[i] == '\'' ||
522 					data[i] == '"' || data[i] == ' ' || data[i] == '\n')
523 					break;
524 			else i++;
525 		}
526 
527 		if (i >= size) return 0;
528 		if (i > j && data[i] == '>') return i + 1;
529 		/* one of the forbidden chars has been found */
530 		*autolink = HOEDOWN_AUTOLINK_NONE;
531 	}
532 
533 	/* looking for something looking like a tag end */
534 	while (i < size && data[i] != '>') i++;
535 	if (i >= size) return 0;
536 	return i + 1;
537 }
538 
539 /* parse_inline • parses inline markdown elements */
540 static void
parse_inline(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t size)541 parse_inline(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
542 {
543 	size_t i = 0, end = 0, consumed = 0;
544 	hoedown_buffer work = { 0, 0, 0, 0, NULL, NULL, NULL };
545 	uint8_t *active_char = doc->active_char;
546 
547 	if (doc->work_bufs[BUFFER_SPAN].size +
548 		doc->work_bufs[BUFFER_BLOCK].size > doc->max_nesting)
549 		return;
550 
551 	while (i < size) {
552 		/* copying inactive chars into the output */
553 		while (end < size && active_char[data[end]] == 0)
554 			end++;
555 
556 		if (doc->md.normal_text) {
557 			work.data = data + i;
558 			work.size = end - i;
559 			doc->md.normal_text(ob, &work, &doc->data);
560 		}
561 		else
562 			hoedown_buffer_put(ob, data + i, end - i);
563 
564 		if (end >= size) break;
565 		i = end;
566 
567 		end = markdown_char_ptrs[ (int)active_char[data[end]] ](ob, doc, data + i, i - consumed, size - i);
568 		if (!end) /* no action from the callback */
569 			end = i + 1;
570 		else {
571 			i += end;
572 			end = i;
573 			consumed = i;
574 		}
575 	}
576 }
577 
578 /* is_escaped • returns whether special char at data[loc] is escaped by '\\' */
579 static int
is_escaped(uint8_t * data,size_t loc)580 is_escaped(uint8_t *data, size_t loc)
581 {
582 	size_t i = loc;
583 	while (i >= 1 && data[i - 1] == '\\')
584 		i--;
585 
586 	/* odd numbers of backslashes escapes data[loc] */
587 	return (loc - i) % 2;
588 }
589 
590 /* find_emph_char • looks for the next emph uint8_t, skipping other constructs */
591 static size_t
find_emph_char(uint8_t * data,size_t size,uint8_t c)592 find_emph_char(uint8_t *data, size_t size, uint8_t c)
593 {
594 	size_t i = 0;
595 
596 	while (i < size) {
597 		while (i < size && data[i] != c && data[i] != '[' && data[i] != '`')
598 			i++;
599 
600 		if (i == size)
601 			return 0;
602 
603 		/* not counting escaped chars */
604 		if (is_escaped(data, i)) {
605 			i++; continue;
606 		}
607 
608 		if (data[i] == c)
609 			return i;
610 
611 		/* skipping a codespan */
612 		if (data[i] == '`') {
613 			size_t span_nb = 0, bt;
614 			size_t tmp_i = 0;
615 
616 			/* counting the number of opening backticks */
617 			while (i < size && data[i] == '`') {
618 				i++; span_nb++;
619 			}
620 
621 			if (i >= size) return 0;
622 
623 			/* finding the matching closing sequence */
624 			bt = 0;
625 			while (i < size && bt < span_nb) {
626 				if (!tmp_i && data[i] == c) tmp_i = i;
627 				if (data[i] == '`') bt++;
628 				else bt = 0;
629 				i++;
630 			}
631 
632 			/* not a well-formed codespan; use found matching emph char */
633 			if (bt < span_nb && i >= size) return tmp_i;
634 		}
635 		/* skipping a link */
636 		else if (data[i] == '[') {
637 			size_t tmp_i = 0;
638 			uint8_t cc;
639 
640 			i++;
641 			while (i < size && data[i] != ']') {
642 				if (!tmp_i && data[i] == c) tmp_i = i;
643 				i++;
644 			}
645 
646 			i++;
647 			while (i < size && _isspace(data[i]))
648 				i++;
649 
650 			if (i >= size)
651 				return tmp_i;
652 
653 			switch (data[i]) {
654 			case '[':
655 				cc = ']'; break;
656 
657 			case '(':
658 				cc = ')'; break;
659 
660 			default:
661 				if (tmp_i)
662 					return tmp_i;
663 				else
664 					continue;
665 			}
666 
667 			i++;
668 			while (i < size && data[i] != cc) {
669 				if (!tmp_i && data[i] == c) tmp_i = i;
670 				i++;
671 			}
672 
673 			if (i >= size)
674 				return tmp_i;
675 
676 			i++;
677 		}
678 	}
679 
680 	return 0;
681 }
682 
683 /* parse_emph1 • parsing single emphase */
684 /* closed by a symbol not preceded by spacing and not followed by symbol */
685 static size_t
parse_emph1(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t size,uint8_t c)686 parse_emph1(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, uint8_t c)
687 {
688 	size_t i = 0, len;
689 	hoedown_buffer *work = 0;
690 	int r;
691 
692 	/* skipping one symbol if coming from emph3 */
693 	if (size > 1 && data[0] == c && data[1] == c) i = 1;
694 
695 	while (i < size) {
696 		len = find_emph_char(data + i, size - i, c);
697 		if (!len) return 0;
698 		i += len;
699 		if (i >= size) return 0;
700 
701 		if (data[i] == c && !_isspace(data[i - 1])) {
702 
703 			if (doc->ext_flags & HOEDOWN_EXT_NO_INTRA_EMPHASIS) {
704 				if (i + 1 < size && isalnum(data[i + 1]))
705 					continue;
706 			}
707 
708 			work = newbuf(doc, BUFFER_SPAN);
709 			parse_inline(work, doc, data, i);
710 
711 			if (doc->ext_flags & HOEDOWN_EXT_UNDERLINE && c == '_')
712 				r = doc->md.underline(ob, work, &doc->data);
713 			else
714 				r = doc->md.emphasis(ob, work, &doc->data);
715 
716 			popbuf(doc, BUFFER_SPAN);
717 			return r ? i + 1 : 0;
718 		}
719 	}
720 
721 	return 0;
722 }
723 
724 /* parse_emph2 • parsing single emphase */
725 static size_t
parse_emph2(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t size,uint8_t c)726 parse_emph2(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, uint8_t c)
727 {
728 	size_t i = 0, len;
729 	hoedown_buffer *work = 0;
730 	int r;
731 
732 	while (i < size) {
733 		len = find_emph_char(data + i, size - i, c);
734 		if (!len) return 0;
735 		i += len;
736 
737 		if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !_isspace(data[i - 1])) {
738 			work = newbuf(doc, BUFFER_SPAN);
739 			parse_inline(work, doc, data, i);
740 
741 			if (c == '~')
742 				r = doc->md.strikethrough(ob, work, &doc->data);
743 			else if (c == '=')
744 				r = doc->md.highlight(ob, work, &doc->data);
745 			else
746 				r = doc->md.double_emphasis(ob, work, &doc->data);
747 
748 			popbuf(doc, BUFFER_SPAN);
749 			return r ? i + 2 : 0;
750 		}
751 		i++;
752 	}
753 	return 0;
754 }
755 
756 /* parse_emph3 • parsing single emphase */
757 /* finds the first closing tag, and delegates to the other emph */
758 static size_t
parse_emph3(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t size,uint8_t c)759 parse_emph3(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, uint8_t c)
760 {
761 	size_t i = 0, len;
762 	int r;
763 
764 	while (i < size) {
765 		len = find_emph_char(data + i, size - i, c);
766 		if (!len) return 0;
767 		i += len;
768 
769 		/* skip spacing preceded symbols */
770 		if (data[i] != c || _isspace(data[i - 1]))
771 			continue;
772 
773 		if (i + 2 < size && data[i + 1] == c && data[i + 2] == c && doc->md.triple_emphasis) {
774 			/* triple symbol found */
775 			hoedown_buffer *work = newbuf(doc, BUFFER_SPAN);
776 
777 			parse_inline(work, doc, data, i);
778 			r = doc->md.triple_emphasis(ob, work, &doc->data);
779 			popbuf(doc, BUFFER_SPAN);
780 			return r ? i + 3 : 0;
781 
782 		} else if (i + 1 < size && data[i + 1] == c) {
783 			/* double symbol found, handing over to emph1 */
784 			len = parse_emph1(ob, doc, data - 2, size + 2, c);
785 			if (!len) return 0;
786 			else return len - 2;
787 
788 		} else {
789 			/* single symbol found, handing over to emph2 */
790 			len = parse_emph2(ob, doc, data - 1, size + 1, c);
791 			if (!len) return 0;
792 			else return len - 1;
793 		}
794 	}
795 	return 0;
796 }
797 
798 /* parse_math • parses a math span until the given ending delimiter */
799 static size_t
parse_math(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t offset,size_t size,const char * end,size_t delimsz,int displaymode)800 parse_math(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size, const char *end, size_t delimsz, int displaymode)
801 {
802 	hoedown_buffer text = { NULL, 0, 0, 0, NULL, NULL, NULL };
803 	size_t i = delimsz;
804 
805 	if (!doc->md.math)
806 		return 0;
807 
808 	/* find ending delimiter */
809 	while (1) {
810 		while (i < size && data[i] != (uint8_t)end[0])
811 			i++;
812 
813 		if (i >= size)
814 			return 0;
815 
816 		if (!is_escaped(data, i) && !(i + delimsz > size)
817 			&& memcmp(data + i, end, delimsz) == 0)
818 			break;
819 
820 		i++;
821 	}
822 
823 	/* prepare buffers */
824 	text.data = data + delimsz;
825 	text.size = i - delimsz;
826 
827 	/* if this is a $$ and MATH_EXPLICIT is not active,
828 	 * guess whether displaymode should be enabled from the context */
829 	i += delimsz;
830 	if (delimsz == 2 && !(doc->ext_flags & HOEDOWN_EXT_MATH_EXPLICIT))
831 		displaymode = is_empty_all(data - offset, offset) && is_empty_all(data + i, size - i);
832 
833 	/* call callback */
834 	if (doc->md.math(ob, &text, displaymode, &doc->data))
835 		return i;
836 
837 	return 0;
838 }
839 
840 static char*
load_file(const char * path,char * base_folder,size_t * size)841 load_file(const char* path, char* base_folder, size_t * size)
842 {
843 	if (path == NULL)
844 		return NULL;
845 	FILE *f;
846 	if (path[0] != '/') {
847 		char *cwd;
848 
849 		if (base_folder != NULL) {
850 			int n1 = strlen(base_folder);
851 			int n2 = strlen(path);
852 			int n =  n1 + n2 + 2;
853 			cwd = malloc(n*sizeof(char));
854 			cwd[n-1] = 0;
855 			memcpy(cwd, base_folder, n1);
856 			cwd[n1] = '/';
857 			memcpy(cwd+n1+1, path, n2);
858 		} else {
859 			cwd = malloc(128*sizeof(char));
860 			memset(cwd, 0, 128);
861 			getcwd(cwd, 128);
862 			strcat(cwd, "/");
863 			strcat(cwd, path);
864 		}
865 		f =fopen(cwd, "rb");
866 		free(cwd);
867 	}
868 	else
869 		f = fopen(path, "rb");
870 
871 	fseek(f, 0, SEEK_END);
872 	*size = ftell(f);
873 	fseek(f, 0, SEEK_SET);
874 
875 	char *string = malloc(*size + 1);
876 	fread(string, *size, 1, f);
877 	fclose(f);
878 
879 	string[*size] = 0;
880 	return string;
881 }
882 
883 static size_t
parse_include(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t offset,size_t size)884 parse_include(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
885 {
886 	/* @include(path) */
887 	size_t i = 9;
888 	size_t n = 0;
889 	for (;i < size; i++)
890 	{
891 		if (data[i] == ')')
892 		{
893 			break;
894 		}
895 		n++;
896 	}
897 	if (n){
898 		char * path = malloc((n+1)*sizeof(uint8_t));
899 		path[n] = 0;
900 		memcpy(path, data+9, n);
901 		if (is_regular_file(path, doc->base_folder)){
902 			size_t neu_size = 0;
903 			char * buffer = load_file(path, doc->base_folder, &neu_size);
904 
905 			sub_render(doc, ob, (uint8_t*)buffer, neu_size, 0);
906 
907 		}
908 		free(path);
909 	}
910 	return i+1;
911 }
912 
913 
914 /* char_emphasis • single and double emphasis parsing */
915 static size_t
char_emphasis(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t offset,size_t size)916 char_emphasis(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
917 {
918 	uint8_t c = data[0];
919 	size_t ret;
920 
921 	if (doc->ext_flags & HOEDOWN_EXT_NO_INTRA_EMPHASIS) {
922 		if (offset > 0 && !_isspace(data[-1]) && data[-1] != '>' && data[-1] != '(')
923 			return 0;
924 	}
925 
926 	if (size > 2 && data[1] != c) {
927 		/* spacing cannot follow an opening emphasis;
928 		 * strikethrough and highlight only takes two characters '~~' */
929 		if (c == '~' || c == '=' || _isspace(data[1]) || (ret = parse_emph1(ob, doc, data + 1, size - 1, c)) == 0)
930 			return 0;
931 
932 		return ret + 1;
933 	}
934 
935 	if (size > 3 && data[1] == c && data[2] != c) {
936 		if (_isspace(data[2]) || (ret = parse_emph2(ob, doc, data + 2, size - 2, c)) == 0)
937 			return 0;
938 
939 		return ret + 2;
940 	}
941 
942 	if (size > 4 && data[1] == c && data[2] == c && data[3] != c) {
943 		if (c == '~' || c == '=' || _isspace(data[3]) || (ret = parse_emph3(ob, doc, data + 3, size - 3, c)) == 0)
944 			return 0;
945 
946 		return ret + 3;
947 	}
948 
949 	return 0;
950 }
951 
952 
953 /* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */
954 static size_t
char_linebreak(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t offset,size_t size)955 char_linebreak(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
956 {
957 	if (offset < 2 || data[-1] != ' ' || data[-2] != ' ')
958 		return 0;
959 
960 	/* removing the last space from ob and rendering */
961 	while (ob->size && ob->data[ob->size - 1] == ' ')
962 		ob->size--;
963 
964 	return doc->md.linebreak(ob, &doc->data) ? 1 : 0;
965 }
966 
967 
968 /* char_codespan • '`' parsing a code span (assuming codespan != 0) */
969 static size_t
char_codespan(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t offset,size_t size)970 char_codespan(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
971 {
972 	hoedown_buffer work = { NULL, 0, 0, 0, NULL, NULL, NULL };
973 	size_t end, nb = 0, i, f_begin, f_end;
974 
975 	/* counting the number of backticks in the delimiter */
976 	while (nb < size && data[nb] == '`')
977 		nb++;
978 
979 	/* finding the next delimiter */
980 	i = 0;
981 	for (end = nb; end < size && i < nb; end++) {
982 		if (data[end] == '`') i++;
983 		else i = 0;
984 	}
985 
986 	if (i < nb && end >= size)
987 		return 0; /* no matching delimiter */
988 
989 	/* trimming outside spaces */
990 	f_begin = nb;
991 	while (f_begin < end && data[f_begin] == ' ')
992 		f_begin++;
993 
994 	f_end = end - nb;
995 	while (f_end > nb && data[f_end-1] == ' ')
996 		f_end--;
997 
998 	/* real code span */
999 	if (f_begin < f_end) {
1000 		work.data = data + f_begin;
1001 		work.size = f_end - f_begin;
1002 
1003 		if (!doc->md.codespan(ob, &work, &doc->data))
1004 			end = 0;
1005 	} else {
1006 		if (!doc->md.codespan(ob, 0, &doc->data))
1007 			end = 0;
1008 	}
1009 
1010 	return end;
1011 }
1012 
1013 /* char_quote • '"' parsing a quote */
1014 static size_t
char_quote(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t offset,size_t size)1015 char_quote(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1016 {
1017 	size_t end, nq = 0, i, f_begin, f_end;
1018 
1019 	/* counting the number of quotes in the delimiter */
1020 	while (nq < size && data[nq] == '"')
1021 		nq++;
1022 
1023 	/* finding the next delimiter */
1024 	end = nq;
1025 	while (1) {
1026 		i = end;
1027 		end += find_emph_char(data + end, size - end, '"');
1028 		if (end == i) return 0;		/* no matching delimiter */
1029 		i = end;
1030 		while (end < size && data[end] == '"' && end - i < nq) end++;
1031 		if (end - i >= nq) break;
1032 	}
1033 
1034 	/* trimming outside spaces */
1035 	f_begin = nq;
1036 	while (f_begin < end && data[f_begin] == ' ')
1037 		f_begin++;
1038 
1039 	f_end = end - nq;
1040 	while (f_end > nq && data[f_end-1] == ' ')
1041 		f_end--;
1042 
1043 	/* real quote */
1044 	if (f_begin < f_end) {
1045 		hoedown_buffer *work = newbuf(doc, BUFFER_SPAN);
1046 		parse_inline(work, doc, data + f_begin, f_end - f_begin);
1047 
1048 		if (!doc->md.quote(ob, work, &doc->data))
1049 			end = 0;
1050 		popbuf(doc, BUFFER_SPAN);
1051 	} else {
1052 		if (!doc->md.quote(ob, 0, &doc->data))
1053 			end = 0;
1054 	}
1055 
1056 	return end;
1057 }
1058 
1059 
1060 /* char_escape • '\\' backslash escape */
1061 static size_t
char_escape(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t offset,size_t size)1062 char_escape(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1063 {
1064 	static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>^~=\"$";
1065 	hoedown_buffer work = { 0, 0, 0, 0, NULL, NULL, NULL };
1066 	size_t w;
1067 
1068 	if (size > 1) {
1069 		if (data[1] == '\\' && (doc->ext_flags & HOEDOWN_EXT_MATH) &&
1070 			size > 2 && (data[2] == '(' || data[2] == '[')) {
1071 			const char *end = (data[2] == '[') ? "\\\\]" : "\\\\)";
1072 			w = parse_math(ob, doc, data, offset, size, end, 3, data[2] == '[');
1073 			if (w) return w;
1074 		}
1075 
1076 		if (strchr(escape_chars, data[1]) == NULL)
1077 			return 0;
1078 
1079 		if (doc->md.normal_text) {
1080 			work.data = data + 1;
1081 			work.size = 1;
1082 			doc->md.normal_text(ob, &work, &doc->data);
1083 		}
1084 		else hoedown_buffer_putc(ob, data[1]);
1085 	} else if (size == 1) {
1086 		if (doc->md.normal_text) {
1087 			work.data = data;
1088 			work.size = 1;
1089 			doc->md.normal_text(ob, &work, &doc->data);
1090 		}
1091 		else hoedown_buffer_putc(ob, data[0]);
1092 	}
1093 
1094 	return 2;
1095 }
1096 
1097 /* char_entity • '&' escaped when it doesn't belong to an entity */
1098 /* valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; */
1099 static size_t
char_entity(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t offset,size_t size)1100 char_entity(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1101 {
1102 	size_t end = 1;
1103 	hoedown_buffer work = { 0, 0, 0, 0, NULL, NULL, NULL };
1104 
1105 	if (end < size && data[end] == '#')
1106 		end++;
1107 
1108 	while (end < size && isalnum(data[end]))
1109 		end++;
1110 
1111 	if (end < size && data[end] == ';')
1112 		end++; /* real entity */
1113 	else
1114 		return 0; /* lone '&' */
1115 
1116 	if (doc->md.entity) {
1117 		work.data = data;
1118 		work.size = end;
1119 		doc->md.entity(ob, &work, &doc->data);
1120 	}
1121 	else hoedown_buffer_put(ob, data, end);
1122 
1123 	return end;
1124 }
1125 
1126 /* char_langle_tag • '<' when tags or autolinks are allowed */
1127 static size_t
char_langle_tag(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t offset,size_t size)1128 char_langle_tag(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1129 {
1130 	hoedown_buffer work = { NULL, 0, 0, 0, NULL, NULL, NULL };
1131 	hoedown_autolink_type altype = HOEDOWN_AUTOLINK_NONE;
1132 	size_t end = tag_length(data, size, &altype);
1133 	int ret = 0;
1134 
1135 	work.data = data;
1136 	work.size = end;
1137 
1138 	if (end > 2) {
1139 		if (doc->md.autolink && altype != HOEDOWN_AUTOLINK_NONE) {
1140 			hoedown_buffer *u_link = newbuf(doc, BUFFER_SPAN);
1141 			work.data = data + 1;
1142 			work.size = end - 2;
1143 			unscape_text(u_link, &work);
1144 			ret = doc->md.autolink(ob, u_link, altype, &doc->data);
1145 			popbuf(doc, BUFFER_SPAN);
1146 		}
1147 		else if (doc->md.raw_html)
1148 			ret = doc->md.raw_html(ob, &work, &doc->data);
1149 	}
1150 
1151 	if (!ret) return 0;
1152 	else return end;
1153 }
1154 
1155 static size_t
char_autolink_www(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t offset,size_t size)1156 char_autolink_www(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1157 {
1158 	hoedown_buffer *link, *link_url, *link_text;
1159 	size_t link_len, rewind;
1160 
1161 	if (!doc->md.link || doc->in_link_body)
1162 		return 0;
1163 
1164 	link = newbuf(doc, BUFFER_SPAN);
1165 
1166 	if ((link_len = hoedown_autolink__www(&rewind, link, data, offset, size, HOEDOWN_AUTOLINK_SHORT_DOMAINS)) > 0) {
1167 		link_url = newbuf(doc, BUFFER_SPAN);
1168 		HOEDOWN_BUFPUTSL(link_url, "http://");
1169 		hoedown_buffer_put(link_url, link->data, link->size);
1170 
1171 		if (ob->size > rewind)
1172 			ob->size -= rewind;
1173 		else
1174 			ob->size = 0;
1175 
1176 		if (doc->md.normal_text) {
1177 			link_text = newbuf(doc, BUFFER_SPAN);
1178 			doc->md.normal_text(link_text, link, &doc->data);
1179 			doc->md.link(ob, link_text, link_url, NULL, &doc->data);
1180 			popbuf(doc, BUFFER_SPAN);
1181 		} else {
1182 			doc->md.link(ob, link, link_url, NULL, &doc->data);
1183 		}
1184 		popbuf(doc, BUFFER_SPAN);
1185 	}
1186 
1187 	popbuf(doc, BUFFER_SPAN);
1188 	return link_len;
1189 }
1190 
1191 static size_t
char_ref(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t offset,size_t size)1192 char_ref(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1193 {
1194 
1195 	if (startsWith("(#", (char*)data)){
1196 		size_t i;
1197 		for (i=2; i < size; i++)
1198 		{
1199 			if (data[i]==')')
1200 				break;
1201 		}
1202 		char * ref_id = malloc((i-1)*sizeof(char));
1203 		ref_id[i-2] = 0;
1204 		memcpy(ref_id, data+2, i-2);
1205 		int count = 0;
1206 		if (find_ref(doc->floating_references, ref_id, &count))
1207 		{
1208 			if (doc->md.ref)
1209 				doc->md.ref(ob, ref_id, count);
1210 			return i+1;
1211 		} else {
1212 			if (doc->md.ref)
1213 				doc->md.ref(ob, ref_id, -1);
1214 			return i+1;
1215 		}
1216 	}
1217 	return 0;
1218 }
1219 
1220 static size_t
char_autolink_email(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t offset,size_t size)1221 char_autolink_email(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1222 {
1223 
1224 	if (startsWith("@include(", (char*)data))
1225     {
1226     	return parse_include(ob, doc, data, offset, size);
1227     }
1228     if (startsWith("@\\", (char*)data) && is_separator(data[2])){
1229 	    if (doc->md.linebreak)
1230 	    {
1231 	    	doc->md.linebreak(ob, &doc->data);
1232 	    }
1233 	    return 3;
1234     }
1235     if (startsWith("@pagebreak", (char*)data))
1236    	{
1237 	   	if (doc->md.pagebreak)
1238    		{
1239   			doc->md.pagebreak(ob);
1240    		}
1241    		return 10;
1242    	}
1243     if (startsWith("@caption(", (char*)data))
1244    	{
1245    		/** skip it **/
1246    		size_t i;
1247    		for (i=9; data[i] != '\n' && i < size; i++){
1248    			if (data[i] == ')' && data[i-1] != '\\')
1249    				break;
1250    		}
1251    		return i+1;
1252    	}
1253 	hoedown_buffer *link;
1254 	size_t link_len, rewind;
1255 
1256 	if (!doc->md.autolink || doc->in_link_body)
1257 		return 0;
1258 
1259 	link = newbuf(doc, BUFFER_SPAN);
1260 
1261 	if ((link_len = hoedown_autolink__email(&rewind, link, data, offset, size, 0)) > 0) {
1262 		if (ob->size > rewind)
1263 			ob->size -= rewind;
1264 		else
1265 			ob->size = 0;
1266 
1267 		doc->md.autolink(ob, link, HOEDOWN_AUTOLINK_EMAIL, &doc->data);
1268 	}
1269 
1270 	popbuf(doc, BUFFER_SPAN);
1271 	return link_len;
1272 }
1273 
1274 static size_t
char_autolink_url(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t offset,size_t size)1275 char_autolink_url(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1276 {
1277 	hoedown_buffer *link;
1278 	size_t link_len, rewind;
1279 
1280 	if (!doc->md.autolink || doc->in_link_body)
1281 		return 0;
1282 
1283 	link = newbuf(doc, BUFFER_SPAN);
1284 
1285 	if ((link_len = hoedown_autolink__url(&rewind, link, data, offset, size, 0)) > 0) {
1286 		if (ob->size > rewind)
1287 			ob->size -= rewind;
1288 		else
1289 			ob->size = 0;
1290 
1291 		doc->md.autolink(ob, link, HOEDOWN_AUTOLINK_NORMAL, &doc->data);
1292 	}
1293 
1294 	popbuf(doc, BUFFER_SPAN);
1295 	return link_len;
1296 }
1297 
1298 static size_t
char_image(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t offset,size_t size)1299 char_image(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size) {
1300 	size_t ret;
1301 
1302 	if (size < 2 || data[1] != '[') return 0;
1303 
1304 	ret = char_link(ob, doc, data + 1, offset + 1, size - 1);
1305 	if (!ret) return 0;
1306 	return ret + 1;
1307 }
1308 
1309 /* char_link • '[': parsing a link, a footnote or an image */
1310 static size_t
char_link(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t offset,size_t size)1311 char_link(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1312 {
1313 	int is_img = (offset && data[-1] == '!' && !is_escaped(data - offset, offset - 1));
1314 	int is_footnote = (doc->ext_flags & HOEDOWN_EXT_FOOTNOTES && data[1] == '^');
1315 	size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0;
1316 	hoedown_buffer *content = NULL;
1317 	hoedown_buffer *link = NULL;
1318 	hoedown_buffer *title = NULL;
1319 	hoedown_buffer *u_link = NULL;
1320 	size_t org_work_size = doc->work_bufs[BUFFER_SPAN].size;
1321 	int ret = 0, in_title = 0, qtype = 0;
1322 
1323 	/* checking whether the correct renderer exists */
1324 	if ((is_footnote && !doc->md.footnote_ref) || (is_img && !doc->md.image)
1325 		|| (!is_img && !is_footnote && !doc->md.link))
1326 		goto cleanup;
1327 
1328 	/* looking for the matching closing bracket */
1329 	i += find_emph_char(data + i, size - i, ']');
1330 	txt_e = i;
1331 
1332 	if (i < size && data[i] == ']') i++;
1333 	else goto cleanup;
1334 
1335 	/* footnote link */
1336 	if (is_footnote) {
1337 		hoedown_buffer id = { NULL, 0, 0, 0, NULL, NULL, NULL };
1338 		struct footnote_ref *fr;
1339 
1340 		if (txt_e < 3)
1341 			goto cleanup;
1342 
1343 		id.data = data + 2;
1344 		id.size = txt_e - 2;
1345 
1346 		fr = find_footnote_ref(&doc->footnotes_found, id.data, id.size);
1347 
1348 		/* mark footnote used */
1349 		if (fr && !fr->is_used) {
1350 			if(!add_footnote_ref(&doc->footnotes_used, fr))
1351 				goto cleanup;
1352 			fr->is_used = 1;
1353 			fr->num = doc->footnotes_used.count;
1354 
1355 			/* render */
1356 			if (doc->md.footnote_ref)
1357 				ret = doc->md.footnote_ref(ob, fr->num, &doc->data);
1358 		} else if (doc->md.footnote_ref) {
1359 			ret = doc->md.footnote_ref(ob, -1, &doc->data);
1360 		}
1361 
1362 		goto cleanup;
1363 	}
1364 
1365 	/* skip any amount of spacing */
1366 	/* (this is much more laxist than original markdown syntax) */
1367 	while (i < size && _isspace(data[i]))
1368 		i++;
1369 
1370 	/* inline style link */
1371 	if (i < size && data[i] == '(') {
1372 		size_t nb_p;
1373 
1374 		/* skipping initial spacing */
1375 		i++;
1376 
1377 		while (i < size && _isspace(data[i]))
1378 			i++;
1379 
1380 		link_b = i;
1381 
1382 		/* looking for link end: ' " ) */
1383 		/* Count the number of open parenthesis */
1384 		nb_p = 0;
1385 
1386 		while (i < size) {
1387 			if (data[i] == '\\') i += 2;
1388 			else if (data[i] == '(' && i != 0) {
1389 				nb_p++; i++;
1390 			}
1391 			else if (data[i] == ')') {
1392 				if (nb_p == 0) break;
1393 				else nb_p--;
1394 				i++;
1395 			} else if (i >= 1 && _isspace(data[i-1]) && (data[i] == '\'' || data[i] == '"')) break;
1396 			else i++;
1397 		}
1398 
1399 		if (i >= size) goto cleanup;
1400 		link_e = i;
1401 
1402 		/* looking for title end if present */
1403 		if (data[i] == '\'' || data[i] == '"') {
1404 			qtype = data[i];
1405 			in_title = 1;
1406 			i++;
1407 			title_b = i;
1408 
1409 			while (i < size) {
1410 				if (data[i] == '\\') i += 2;
1411 				else if (data[i] == qtype) {in_title = 0; i++;}
1412 				else if ((data[i] == ')') && !in_title) break;
1413 				else i++;
1414 			}
1415 
1416 			if (i >= size) goto cleanup;
1417 
1418 			/* skipping spacing after title */
1419 			title_e = i - 1;
1420 			while (title_e > title_b && _isspace(data[title_e]))
1421 				title_e--;
1422 
1423 			/* checking for closing quote presence */
1424 			if (data[title_e] != '\'' &&  data[title_e] != '"') {
1425 				title_b = title_e = 0;
1426 				link_e = i;
1427 			}
1428 		}
1429 
1430 		/* remove spacing at the end of the link */
1431 		while (link_e > link_b && _isspace(data[link_e - 1]))
1432 			link_e--;
1433 
1434 		/* remove optional angle brackets around the link */
1435 		if (data[link_b] == '<' && data[link_e - 1] == '>') {
1436 			link_b++;
1437 			link_e--;
1438 		}
1439 
1440 		/* building escaped link and title */
1441 		if (link_e > link_b) {
1442 			link = newbuf(doc, BUFFER_SPAN);
1443 			hoedown_buffer_put(link, data + link_b, link_e - link_b);
1444 		}
1445 
1446 		if (title_e > title_b) {
1447 			title = newbuf(doc, BUFFER_SPAN);
1448 			hoedown_buffer_put(title, data + title_b, title_e - title_b);
1449 		}
1450 
1451 		i++;
1452 	}
1453 
1454 	/* reference style link */
1455 	else if (i < size && data[i] == '[') {
1456 		hoedown_buffer *id = newbuf(doc, BUFFER_SPAN);
1457 		struct link_ref *lr;
1458 
1459 		/* looking for the id */
1460 		i++;
1461 		link_b = i;
1462 		while (i < size && data[i] != ']') i++;
1463 		if (i >= size) goto cleanup;
1464 		link_e = i;
1465 
1466 		/* finding the link_ref */
1467 		if (link_b == link_e)
1468 			replace_spacing(id, data + 1, txt_e - 1);
1469 		else
1470 			hoedown_buffer_put(id, data + link_b, link_e - link_b);
1471 
1472 		lr = find_link_ref(doc->refs, id->data, id->size);
1473 		if (!lr)
1474 			goto cleanup;
1475 
1476 		/* keeping link and title from link_ref */
1477 		link = lr->link;
1478 		title = lr->title;
1479 		i++;
1480 	}
1481 
1482 	/* shortcut reference style link */
1483 	else {
1484 		hoedown_buffer *id = newbuf(doc, BUFFER_SPAN);
1485 		struct link_ref *lr;
1486 
1487 		/* crafting the id */
1488 		replace_spacing(id, data + 1, txt_e - 1);
1489 
1490 		/* finding the link_ref */
1491 		lr = find_link_ref(doc->refs, id->data, id->size);
1492 		if (!lr)
1493 			goto cleanup;
1494 
1495 		/* keeping link and title from link_ref */
1496 		link = lr->link;
1497 		title = lr->title;
1498 
1499 		/* rewinding the spacing */
1500 		i = txt_e + 1;
1501 	}
1502 
1503 	/* building content: img alt is kept, only link content is parsed */
1504 	if (txt_e > 1) {
1505 		content = newbuf(doc, BUFFER_SPAN);
1506 		if (is_img) {
1507 			hoedown_buffer_put(content, data + 1, txt_e - 1);
1508 		} else {
1509 			/* disable autolinking when parsing inline the
1510 			 * content of a link */
1511 			doc->in_link_body = 1;
1512 			parse_inline(content, doc, data + 1, txt_e - 1);
1513 			doc->in_link_body = 0;
1514 		}
1515 	}
1516 
1517 	if (link) {
1518 		u_link = newbuf(doc, BUFFER_SPAN);
1519 		unscape_text(u_link, link);
1520 	}
1521 
1522 	/* calling the relevant rendering function */
1523 	if (is_img) {
1524 		ret = doc->md.image(ob, u_link, title, content, &doc->data);
1525 	} else {
1526 		ret = doc->md.link(ob, content, u_link, title, &doc->data);
1527 	}
1528 
1529 	/* cleanup */
1530 cleanup:
1531 	doc->work_bufs[BUFFER_SPAN].size = (int)org_work_size;
1532 	return ret ? i : 0;
1533 }
1534 
1535 static size_t
char_superscript(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t offset,size_t size)1536 char_superscript(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1537 {
1538 	size_t sup_start, sup_len;
1539 	hoedown_buffer *sup;
1540 
1541 	if (!doc->md.superscript)
1542 		return 0;
1543 
1544 	if (size < 2)
1545 		return 0;
1546 
1547 	if (data[1] == '(') {
1548 		sup_start = 2;
1549 		sup_len = find_emph_char(data + 2, size - 2, ')') + 2;
1550 
1551 		if (sup_len == size)
1552 			return 0;
1553 	} else {
1554 		sup_start = sup_len = 1;
1555 
1556 		while (sup_len < size && !_isspace(data[sup_len]))
1557 			sup_len++;
1558 	}
1559 
1560 	if (sup_len - sup_start == 0)
1561 		return (sup_start == 2) ? 3 : 0;
1562 
1563 	sup = newbuf(doc, BUFFER_SPAN);
1564 	parse_inline(sup, doc, data + sup_start, sup_len - sup_start);
1565 	doc->md.superscript(ob, sup, &doc->data);
1566 	popbuf(doc, BUFFER_SPAN);
1567 
1568 	return (sup_start == 2) ? sup_len + 1 : sup_len;
1569 }
1570 
1571 static size_t
char_math(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t offset,size_t size)1572 char_math(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1573 {
1574 	/* double dollar */
1575 	if (size > 1 && data[1] == '$')
1576 		return parse_math(ob, doc, data, offset, size, "$$", 2, 1);
1577 
1578 	/* single dollar allowed only with MATH_EXPLICIT flag */
1579 	if (doc->ext_flags & HOEDOWN_EXT_MATH_EXPLICIT)
1580 		return parse_math(ob, doc, data, offset, size, "$", 1, 0);
1581 
1582 	return 0;
1583 }
1584 
1585 /*********************************
1586  * BLOCK-LEVEL PARSING FUNCTIONS *
1587  *********************************/
1588 
1589 /* is_empty • returns the line length when it is empty, 0 otherwise */
1590 static size_t
is_empty(const uint8_t * data,size_t size)1591 is_empty(const uint8_t *data, size_t size)
1592 {
1593 	size_t i;
1594 
1595 	for (i = 0; i < size && data[i] != '\n'; i++)
1596 		if (data[i] != ' ')
1597 			return 0;
1598 
1599 	return i + 1;
1600 }
1601 
1602 /* is_hrule • returns whether a line is a horizontal rule */
1603 static int
is_hrule(uint8_t * data,size_t size)1604 is_hrule(uint8_t *data, size_t size)
1605 {
1606 	size_t i = 0, n = 0;
1607 	uint8_t c;
1608 
1609 	/* skipping initial spaces */
1610 	if (size < 3) return 0;
1611 	if (data[0] == ' ') { i++;
1612 	if (data[1] == ' ') { i++;
1613 	if (data[2] == ' ') { i++; } } }
1614 
1615 	/* looking at the hrule uint8_t */
1616 	if (i + 2 >= size
1617 	|| (data[i] != '*' && data[i] != '-' && data[i] != '_'))
1618 		return 0;
1619 	c = data[i];
1620 
1621 	/* the whole line must be the char or space */
1622 	while (i < size && data[i] != '\n') {
1623 		if (data[i] == c) n++;
1624 		else if (data[i] != ' ')
1625 			return 0;
1626 
1627 		i++;
1628 	}
1629 
1630 	return n >= 3;
1631 }
1632 
1633 /* check if a line is a code fence; return the
1634  * end of the code fence. if passed, width of
1635  * the fence rule and character will be returned */
1636 static size_t
is_codefence(uint8_t * data,size_t size,size_t * width,uint8_t * chr)1637 is_codefence(uint8_t *data, size_t size, size_t *width, uint8_t *chr)
1638 {
1639 	size_t i = 0, n = 1;
1640 	uint8_t c;
1641 
1642 	/* skipping initial spaces */
1643 	if (size < 3)
1644 		return 0;
1645 
1646 	if (data[0] == ' ') { i++;
1647 	if (data[1] == ' ') { i++;
1648 	if (data[2] == ' ') { i++; } } }
1649 
1650 	/* looking at the hrule uint8_t */
1651 	c = data[i];
1652 	if (i + 2 >= size || !(c=='~' || c=='`'))
1653 		return 0;
1654 
1655 	/* the fence must be that same character */
1656 	while (++i < size && data[i] == c)
1657 		++n;
1658 
1659 	if (n < 3)
1660 		return 0;
1661 
1662 	if (width) *width = n;
1663 	if (chr) *chr = c;
1664 	return i;
1665 }
1666 
1667 /* expects single line, checks if it's a codefence and extracts language */
1668 static size_t
parse_codefence(uint8_t * data,size_t size,hoedown_buffer * lang,size_t * width,uint8_t * chr)1669 parse_codefence(uint8_t *data, size_t size, hoedown_buffer *lang, size_t *width, uint8_t *chr)
1670 {
1671 	size_t i, w, lang_start;
1672 
1673 	i = w = is_codefence(data, size, width, chr);
1674 	if (i == 0)
1675 		return 0;
1676 
1677 	while (i < size && _isspace(data[i]))
1678 		i++;
1679 
1680 	lang_start = i;
1681 
1682 	while (i < size && !_isspace(data[i]))
1683 		i++;
1684 
1685 	lang->data = data + lang_start;
1686 	lang->size = i - lang_start;
1687 
1688 	/* Avoid parsing a codespan as a fence */
1689 	i = lang_start + 2;
1690 	while (i < size && !(data[i] == *chr && data[i-1] == *chr && data[i-2] == *chr)) i++;
1691 	if (i < size) return 0;
1692 
1693 	return w;
1694 }
1695 
1696 /* is_atxheader • returns whether the line is a hash-prefixed header */
1697 static int
is_atxheader(hoedown_document * doc,uint8_t * data,size_t size)1698 is_atxheader(hoedown_document *doc, uint8_t *data, size_t size)
1699 {
1700 	if (data[0] != '#')
1701 		return 0;
1702 
1703 	if (doc->ext_flags & HOEDOWN_EXT_SPACE_HEADERS) {
1704 		size_t level = 0;
1705 
1706 		while (level < size && level < 6 && data[level] == '#')
1707 			level++;
1708 
1709 		if (level < size && data[level] != ' ')
1710 			return 0;
1711 	}
1712 
1713 	return 1;
1714 }
1715 
1716 /* is_headerline • returns whether the line is a setext-style hdr underline */
1717 static int
is_headerline(uint8_t * data,size_t size)1718 is_headerline(uint8_t *data, size_t size)
1719 {
1720 	size_t i = 0;
1721 
1722 	/* test of level 1 header */
1723 	if (data[i] == '=') {
1724 		for (i = 1; i < size && data[i] == '='; i++);
1725 		while (i < size && data[i] == ' ') i++;
1726 		return (i >= size || data[i] == '\n') ? 1 : 0; }
1727 
1728 	/* test of level 2 header */
1729 	if (data[i] == '-') {
1730 		for (i = 1; i < size && data[i] == '-'; i++);
1731 		while (i < size && data[i] == ' ') i++;
1732 		return (i >= size || data[i] == '\n') ? 2 : 0; }
1733 
1734 	return 0;
1735 }
1736 
1737 static int
is_next_headerline(uint8_t * data,size_t size)1738 is_next_headerline(uint8_t *data, size_t size)
1739 {
1740 	size_t i = 0;
1741 
1742 	while (i < size && data[i] != '\n')
1743 		i++;
1744 
1745 	if (++i >= size)
1746 		return 0;
1747 
1748 	return is_headerline(data + i, size - i);
1749 }
1750 
1751 /* prefix_quote • returns blockquote prefix length */
1752 static size_t
prefix_quote(uint8_t * data,size_t size)1753 prefix_quote(uint8_t *data, size_t size)
1754 {
1755 	size_t i = 0;
1756 	if (i < size && data[i] == ' ') i++;
1757 	if (i < size && data[i] == ' ') i++;
1758 	if (i < size && data[i] == ' ') i++;
1759 
1760 	if (i < size && data[i] == '>') {
1761 		if (i + 1 < size && data[i + 1] == ' ')
1762 			return i + 2;
1763 
1764 		return i + 1;
1765 	}
1766 
1767 	return 0;
1768 }
1769 
1770 /* prefix_code • returns prefix length for block code*/
1771 static size_t
prefix_code(uint8_t * data,size_t size)1772 prefix_code(uint8_t *data, size_t size)
1773 {
1774 	if (size > 3 && data[0] == ' ' && data[1] == ' '
1775 		&& data[2] == ' ' && data[3] == ' ') return 4;
1776 
1777 	return 0;
1778 }
1779 
1780 /* prefix_oli • returns ordered list item prefix */
1781 static size_t
prefix_oli(uint8_t * data,size_t size)1782 prefix_oli(uint8_t *data, size_t size)
1783 {
1784 	size_t i = 0;
1785 
1786 	if (i < size && data[i] == ' ') i++;
1787 	if (i < size && data[i] == ' ') i++;
1788 	if (i < size && data[i] == ' ') i++;
1789 
1790 	if (i >= size || data[i] < '0' || data[i] > '9')
1791 		return 0;
1792 
1793 	while (i < size && data[i] >= '0' && data[i] <= '9')
1794 		i++;
1795 
1796 	if (i + 1 >= size || data[i] != '.' || data[i + 1] != ' ')
1797 		return 0;
1798 
1799 	if (is_next_headerline(data + i, size - i))
1800 		return 0;
1801 
1802 	return i + 2;
1803 }
1804 
1805 /* prefix_checkbox_open returns open checkbox prefix*/
1806 static size_t
prefix_checkbox(uint8_t * data,size_t size)1807 prefix_checkbox(uint8_t *data, size_t size)
1808 {
1809 	size_t i = 0;
1810 	if (i < size && data[i] == ' ') i++;
1811 	if (i < size && data[i] == ' ') i++;
1812 	if (i < size && data[i] == ' ') i++;
1813 
1814 	if (i + 3 >= size ||
1815 		(data[i] != '-') ||
1816 		data[i + 1] != ' ' || data[i+2] != '[' || data[i+3] != ' ' || data[i+4] != ']' || data[i+5] != ' ')
1817 		return 0;
1818 
1819 	if (is_next_headerline(data + i, size - i))
1820 		return 0;
1821 	return i + 6;
1822 }
1823 
1824 /* prefix_checkbox_open returns checked checkbox prefix*/
1825 static size_t
prefix_checkbox_checked(uint8_t * data,size_t size)1826 prefix_checkbox_checked(uint8_t *data, size_t size)
1827 {
1828 	size_t i = 0;
1829 	if (i < size && data[i] == ' ') i++;
1830 	if (i < size && data[i] == ' ') i++;
1831 	if (i < size && data[i] == ' ') i++;
1832 
1833 	if (i + 3 >= size ||
1834 		(data[i] != '-') ||
1835 		data[i + 1] != ' ' || data[i+2] != '[' || data[i+3] != 'x' || data[i+4] != ']' || data[i+5] != ' ')
1836 		return 0;
1837 
1838 	if (is_next_headerline(data + i, size - i))
1839 		return 0;
1840 	return i + 6;
1841 }
1842 
1843 /* prefix_uli • returns unordered list item prefix */
1844 static size_t
prefix_uli(uint8_t * data,size_t size)1845 prefix_uli(uint8_t *data, size_t size)
1846 {
1847 	size_t i = 0;
1848 
1849 	if (i < size && data[i] == ' ') i++;
1850 	if (i < size && data[i] == ' ') i++;
1851 	if (i < size && data[i] == ' ') i++;
1852 
1853 	if (i + 1 >= size ||
1854 		(data[i] != '*' && data[i] != '+' && data[i] != '-') ||
1855 		data[i + 1] != ' ')
1856 		return 0;
1857 
1858 	if (is_next_headerline(data + i, size - i))
1859 		return 0;
1860 
1861 	return i + 2;
1862 }
1863 
1864 static size_t
prefix_float(uint8_t * data,size_t size)1865 prefix_float(uint8_t * data, size_t size)
1866 {
1867 	char * txt = (char*) data;
1868 	return (startsWith("@figure", txt) || startsWith("@table",txt) ||
1869 	        startsWith("@code", txt) || startsWith("@listing",txt) ||
1870 	        startsWith("@abstract", txt) || startsWith("@equation", txt) ||
1871 	        startsWith("@toc", txt));
1872 }
1873 
1874 /* parse_block • parsing of one block, returning next uint8_t to parse */
1875 static void parse_block(hoedown_buffer *ob, hoedown_document *doc,
1876 			uint8_t *data, size_t size, int position);
1877 
1878 
1879 /* parse_blockquote • handles parsing of a blockquote fragment */
1880 static size_t
parse_blockquote(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t size)1881 parse_blockquote(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
1882 {
1883 	size_t beg, end = 0, pre, work_size = 0;
1884 	uint8_t *work_data = 0;
1885 	hoedown_buffer *out = 0;
1886 
1887 	out = newbuf(doc, BUFFER_BLOCK);
1888 	beg = 0;
1889 	while (beg < size) {
1890 		for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
1891 
1892 		pre = prefix_quote(data + beg, end - beg);
1893 
1894 		if (pre)
1895 			beg += pre; /* skipping prefix */
1896 
1897 		/* empty line followed by non-quote line */
1898 		else if (is_empty(data + beg, end - beg) &&
1899 				(end >= size || (prefix_quote(data + end, size - end) == 0 &&
1900 				!is_empty(data + end, size - end))))
1901 			break;
1902 
1903 		if (beg < end) { /* copy into the in-place working buffer */
1904 			/* hoedown_buffer_put(work, data + beg, end - beg); */
1905 			if (!work_data)
1906 				work_data = data + beg;
1907 			else if (data + beg != work_data + work_size)
1908 				memmove(work_data + work_size, data + beg, end - beg);
1909 			work_size += end - beg;
1910 		}
1911 		beg = end;
1912 	}
1913 
1914 	parse_block(out, doc, work_data, work_size, -1);
1915 	if (doc->md.blockquote)
1916 		doc->md.blockquote(ob, out, &doc->data);
1917 	popbuf(doc, BUFFER_BLOCK);
1918 	return end;
1919 }
1920 
1921 static size_t
1922 parse_htmlblock(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, int do_render);
1923 
1924 /* parse_blockquote • handles parsing of a regular paragraph */
1925 static size_t
parse_paragraph(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t size)1926 parse_paragraph(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
1927 {
1928 	hoedown_buffer work = { NULL, 0, 0, 0, NULL, NULL, NULL };
1929 	size_t i = 0, end = 0;
1930 	int level = 0;
1931 
1932 	work.data = data;
1933 
1934 	while (i < size) {
1935 		for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */;
1936 
1937 		if (is_empty(data + i, size - i))
1938 			break;
1939 
1940 		if ((level = is_headerline(data + i, size - i)) != 0)
1941 			break;
1942 
1943 		if (is_atxheader(doc, data + i, size - i) ||
1944 			is_hrule(data + i, size - i) ||
1945 			prefix_quote(data + i, size - i)) {
1946 			end = i;
1947 			break;
1948 		}
1949 
1950 		i = end;
1951 	}
1952 
1953 	work.size = i;
1954 	while (work.size && data[work.size - 1] == '\n')
1955 		work.size--;
1956 
1957 	if (!level) {
1958 		hoedown_buffer *tmp = newbuf(doc, BUFFER_BLOCK);
1959 		parse_inline(tmp, doc, work.data, work.size);
1960 		if (doc->md.paragraph)
1961 			doc->md.paragraph(ob, tmp, &doc->data);
1962 		popbuf(doc, BUFFER_BLOCK);
1963 	} else {
1964 		hoedown_buffer *header_work;
1965 
1966 		if (work.size) {
1967 			size_t beg;
1968 			i = work.size;
1969 			work.size -= 1;
1970 
1971 			while (work.size && data[work.size] != '\n')
1972 				work.size -= 1;
1973 
1974 			beg = work.size + 1;
1975 			while (work.size && data[work.size - 1] == '\n')
1976 				work.size -= 1;
1977 
1978 			if (work.size > 0) {
1979 				hoedown_buffer *tmp = newbuf(doc, BUFFER_BLOCK);
1980 				parse_inline(tmp, doc, work.data, work.size);
1981 
1982 				if (doc->md.paragraph)
1983 					doc->md.paragraph(ob, tmp, &doc->data);
1984 
1985 				popbuf(doc, BUFFER_BLOCK);
1986 				work.data += beg;
1987 				work.size = i - beg;
1988 			}
1989 			else work.size = i;
1990 		}
1991 
1992 		header_work = newbuf(doc, BUFFER_SPAN);
1993 		parse_inline(header_work, doc, work.data, work.size);
1994 		if (level == 1)
1995 		{
1996 			doc->counter.chapter++;
1997 			doc->counter.section = 0;
1998 			doc->counter.subsection = 0;
1999 		} else if (level == 2) {
2000 			doc->counter.section++;
2001 			doc->counter.subsection=0;
2002 		} else if (level == 3) {
2003 			doc->counter.subsection++;
2004 		}
2005 
2006 		if (doc->md.header){
2007 
2008 			doc->md.header(ob, header_work, (int)level, &doc->data, doc->counter, doc->document_metadata->numbering);
2009 		}
2010 		popbuf(doc, BUFFER_SPAN);
2011 	}
2012 
2013 	return end;
2014 }
2015 
2016 /* parse_fencedcode • handles parsing of a block-level code fragment */
2017 static size_t
parse_fencedcode(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t size)2018 parse_fencedcode(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
2019 {
2020 	hoedown_buffer text = { 0, 0, 0, 0, NULL, NULL, NULL };
2021 	hoedown_buffer lang = { 0, 0, 0, 0, NULL, NULL, NULL };
2022 	size_t i = 0, text_start, line_start;
2023 	size_t w, w2;
2024 	size_t width, width2;
2025 	uint8_t chr, chr2;
2026 
2027 	/* parse codefence line */
2028 	while (i < size && data[i] != '\n')
2029 		i++;
2030 
2031 	w = parse_codefence(data, i, &lang, &width, &chr);
2032 	if (!w)
2033 		return 0;
2034 
2035 	/* search for end */
2036 	i++;
2037 	text_start = i;
2038 	while ((line_start = i) < size) {
2039 		while (i < size && data[i] != '\n')
2040 			i++;
2041 
2042 		w2 = is_codefence(data + line_start, i - line_start, &width2, &chr2);
2043 		if (w == w2 && width == width2 && chr == chr2 &&
2044 		    is_empty(data + (line_start+w), i - (line_start+w)))
2045 			break;
2046 
2047 		i++;
2048 	}
2049 
2050 	text.data = data + text_start;
2051 	text.size = line_start - text_start;
2052 
2053 	if (doc->md.blockcode)
2054 		doc->md.blockcode(ob, text.size ? &text : NULL, lang.size ? &lang : NULL, &doc->data);
2055 
2056 	return i;
2057 }
2058 
2059 static size_t
parse_blockcode(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t size)2060 parse_blockcode(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
2061 {
2062 	size_t beg, end, pre;
2063 	hoedown_buffer *work = 0;
2064 
2065 	work = newbuf(doc, BUFFER_BLOCK);
2066 
2067 	beg = 0;
2068 	while (beg < size) {
2069 		for (end = beg + 1; end < size && data[end - 1] != '\n'; end++) {};
2070 		pre = prefix_code(data + beg, end - beg);
2071 
2072 		if (pre)
2073 			beg += pre; /* skipping prefix */
2074 		else if (!is_empty(data + beg, end - beg))
2075 			/* non-empty non-prefixed line breaks the pre */
2076 			break;
2077 
2078 		if (beg < end) {
2079 			/* verbatim copy to the working buffer,
2080 				escaping entities */
2081 			if (is_empty(data + beg, end - beg))
2082 				hoedown_buffer_putc(work, '\n');
2083 			else hoedown_buffer_put(work, data + beg, end - beg);
2084 		}
2085 		beg = end;
2086 	}
2087 
2088 	while (work->size && work->data[work->size - 1] == '\n')
2089 		work->size -= 1;
2090 
2091 	hoedown_buffer_putc(work, '\n');
2092 
2093 	if (doc->md.blockcode)
2094 		doc->md.blockcode(ob, work, NULL, &doc->data);
2095 
2096 	popbuf(doc, BUFFER_BLOCK);
2097 	return beg;
2098 }
2099 
2100 
2101 
2102 /* parse_listitem • parsing of a single list item */
2103 /*	assuming initial prefix is already removed */
2104 static size_t
parse_listitem(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t size,hoedown_list_flags * flags)2105 parse_listitem(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, hoedown_list_flags *flags)
2106 {
2107 	hoedown_buffer *work = 0, *inter = 0;
2108 	size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i;
2109 	int in_empty = 0, has_inside_empty = 0, in_fence = 0;
2110 
2111 	/* keeping track of the first indentation prefix */
2112 	while (orgpre < 3 && orgpre < size && data[orgpre] == ' ')
2113 		orgpre++;
2114 
2115 	beg = prefix_checkbox(data, size);
2116 	if (!beg)
2117 		beg=prefix_checkbox_checked(data,size);
2118 	if (!beg)
2119 		beg = prefix_uli(data, size);
2120 	if (!beg)
2121 		beg = prefix_oli(data, size);
2122 
2123 	if (!beg)
2124 		return 0;
2125 
2126 	/* skipping to the beginning of the following line */
2127 	end = beg;
2128 	while (end < size && data[end - 1] != '\n')
2129 		end++;
2130 
2131 	/* getting working buffers */
2132 	work = newbuf(doc, BUFFER_SPAN);
2133 	inter = newbuf(doc, BUFFER_SPAN);
2134 
2135 	/* putting the first line into the working buffer */
2136 	hoedown_buffer_put(work, data + beg, end - beg);
2137 	beg = end;
2138 
2139 	/* process the following lines */
2140 	while (beg < size) {
2141 		size_t has_next_uli = 0, has_next_oli = 0;
2142 
2143 		end++;
2144 
2145 		while (end < size && data[end - 1] != '\n')
2146 			end++;
2147 
2148 		/* process an empty line */
2149 		if (is_empty(data + beg, end - beg)) {
2150 			in_empty = 1;
2151 			beg = end;
2152 			continue;
2153 		}
2154 
2155 		/* calculating the indentation */
2156 		i = 0;
2157 		while (i < 4 && beg + i < end && data[beg + i] == ' ')
2158 			i++;
2159 
2160 		pre = i;
2161 
2162 		if (doc->ext_flags & HOEDOWN_EXT_FENCED_CODE) {
2163 			if (is_codefence(data + beg + i, end - beg - i, NULL, NULL))
2164 				in_fence = !in_fence;
2165 		}
2166 
2167 		/* Only check for new list items if we are **not** inside
2168 		 * a fenced code block */
2169 		if (!in_fence) {
2170 			has_next_uli = prefix_uli(data + beg + i, end - beg - i);
2171 			has_next_oli = prefix_oli(data + beg + i, end - beg - i);
2172 		}
2173 
2174 		/* checking for a new item */
2175 		if ((has_next_uli && !is_hrule(data + beg + i, end - beg - i)) || has_next_oli) {
2176 			if (in_empty)
2177 				has_inside_empty = 1;
2178 
2179 			/* the following item must have the same (or less) indentation */
2180 			if (pre <= orgpre) {
2181 				/* if the following item has different list type, we end this list */
2182 				if (in_empty && (
2183 					((*flags & HOEDOWN_LIST_ORDERED) && has_next_uli) ||
2184 					(!(*flags & HOEDOWN_LIST_ORDERED) && has_next_oli)))
2185 					*flags |= HOEDOWN_LI_END;
2186 
2187 				break;
2188 			}
2189 
2190 			if (!sublist)
2191 				sublist = work->size;
2192 		}
2193 		/* joining only indented stuff after empty lines;
2194 		 * note that now we only require 1 space of indentation
2195 		 * to continue a list */
2196 		else if (in_empty && pre == 0) {
2197 			*flags |= HOEDOWN_LI_END;
2198 			break;
2199 		}
2200 
2201 		if (in_empty) {
2202 			hoedown_buffer_putc(work, '\n');
2203 			has_inside_empty = 1;
2204 			in_empty = 0;
2205 		}
2206 
2207 		/* adding the line without prefix into the working buffer */
2208 		hoedown_buffer_put(work, data + beg + i, end - beg - i);
2209 		beg = end;
2210 	}
2211 
2212 	/* render of li contents */
2213 	if (has_inside_empty)
2214 		*flags |= HOEDOWN_LI_BLOCK;
2215 
2216 	if (*flags & HOEDOWN_LI_BLOCK) {
2217 		/* intermediate render of block li */
2218 		if (sublist && sublist < work->size) {
2219 			parse_block(inter, doc, work->data, sublist, -1);
2220 			parse_block(inter, doc, work->data + sublist, work->size - sublist, -1);
2221 		}
2222 		else
2223 			parse_block(inter, doc, work->data, work->size, -1);
2224 	} else {
2225 		/* intermediate render of inline li */
2226 		if (sublist && sublist < work->size) {
2227 			parse_inline(inter, doc, work->data, sublist);
2228 			parse_block(inter, doc, work->data + sublist, work->size - sublist, -1);
2229 		}
2230 		else
2231 			parse_inline(inter, doc, work->data, work->size);
2232 	}
2233 
2234 	/* render of li itself */
2235 	if (doc->md.listitem)
2236 		doc->md.listitem(ob, inter, *flags, &doc->data);
2237 
2238 	popbuf(doc, BUFFER_SPAN);
2239 	popbuf(doc, BUFFER_SPAN);
2240 	return beg;
2241 }
2242 
2243 
2244 /* parse_list • parsing ordered or unordered list block */
2245 static size_t
parse_list(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t size,hoedown_list_flags flags)2246 parse_list(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, hoedown_list_flags flags)
2247 {
2248 	hoedown_buffer *work = 0;
2249 	size_t i = 0, j;
2250 
2251 	work = newbuf(doc, BUFFER_BLOCK);
2252 
2253 	while (i < size) {
2254 		j = parse_listitem(work, doc, data + i, size - i, &flags);
2255 		i += j;
2256 
2257 		if (!j || (flags & HOEDOWN_LI_END))
2258 			break;
2259 	}
2260 
2261 	if (doc->md.list)
2262 		doc->md.list(ob, work, flags, &doc->data);
2263 	popbuf(doc, BUFFER_BLOCK);
2264 	return i;
2265 }
2266 
2267 uint8_t *
get_atxheader_info(uint8_t * data,size_t size,size_t * level,size_t * skip)2268 get_atxheader_info(uint8_t *data, size_t size, size_t * level, size_t * skip)
2269 {
2270 	*level = 0;
2271 	size_t i, end;
2272 
2273 	while (*level < size && *level < 6 && data[*level] == '#'){
2274 		(*level)++;
2275 	}
2276 
2277 	for (i = *level; i < size && data[i] == ' '; i++);
2278 	for (end = i; end < size && data[end] != '\n'; end++);
2279 	if (skip)
2280 		*skip = end;
2281 
2282 	while (end && data[end - 1] == '#')
2283 		end--;
2284 
2285 	while (end && data[end - 1] == ' ')
2286 		end--;
2287 
2288 	if (end <= i)
2289 		return NULL;
2290 
2291 	uint8_t * title =  malloc(sizeof(uint8_t)*(end - i + 1));
2292 	title[end-i] = 0;
2293 	memcpy(title, data+i, end-i);
2294 	return title;
2295 }
2296 
2297 /* parse_atxheader • parsing of atx-style headers */
2298 static size_t
parse_atxheader(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t size)2299 parse_atxheader(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
2300 {
2301 	size_t level = 0;
2302 	size_t skip = 0;
2303 
2304 	uint8_t * title = get_atxheader_info(data, size, &level, &skip);
2305 
2306 	if (level == 1)
2307 	{
2308 		doc->counter.chapter ++ ;
2309 		doc->counter.section = 0;
2310 		doc->counter.subsection = 0;
2311 	} else if (level == 2)
2312 	{
2313 		doc->counter.section ++;
2314 		doc->counter.subsection = 0;
2315 	} else if (level == 3)
2316 	{
2317 		doc->counter.subsection ++;
2318 	}
2319 
2320 	if (title) {
2321 		hoedown_buffer *work = newbuf(doc, BUFFER_SPAN);
2322 
2323 		parse_inline(work, doc, title, strlen((char*)title));
2324 
2325 		if (doc->md.header)
2326 		{
2327 			doc->md.header(ob, work, (int)level, &doc->data, doc->counter, doc->document_metadata->numbering);
2328 		}
2329 		popbuf(doc, BUFFER_SPAN);
2330 	}
2331 
2332 	return skip;
2333 }
2334 
2335 /* parse_footnote_def • parse a single footnote definition */
2336 static void
parse_footnote_def(hoedown_buffer * ob,hoedown_document * doc,unsigned int num,uint8_t * data,size_t size)2337 parse_footnote_def(hoedown_buffer *ob, hoedown_document *doc, unsigned int num, uint8_t *data, size_t size)
2338 {
2339 	hoedown_buffer *work = 0;
2340 	work = newbuf(doc, BUFFER_SPAN);
2341 
2342 	parse_block(work, doc, data, size, -1);
2343 
2344 	if (doc->md.footnote_def)
2345 	doc->md.footnote_def(ob, work, num, &doc->data);
2346 	popbuf(doc, BUFFER_SPAN);
2347 }
2348 
2349 /* parse_footnote_list • render the contents of the footnotes */
2350 static void
parse_footnote_list(hoedown_buffer * ob,hoedown_document * doc,struct footnote_list * footnotes)2351 parse_footnote_list(hoedown_buffer *ob, hoedown_document *doc, struct footnote_list *footnotes)
2352 {
2353 	hoedown_buffer *work = 0;
2354 	struct footnote_item *item;
2355 	struct footnote_ref *ref;
2356 
2357 	if (footnotes->count == 0)
2358 		return;
2359 
2360 	work = newbuf(doc, BUFFER_BLOCK);
2361 
2362 	item = footnotes->head;
2363 	while (item) {
2364 		ref = item->ref;
2365 		parse_footnote_def(work, doc, ref->num, ref->contents->data, ref->contents->size);
2366 		item = item->next;
2367 	}
2368 
2369 	if (doc->md.footnotes)
2370 		doc->md.footnotes(ob, work, &doc->data);
2371 	popbuf(doc, BUFFER_BLOCK);
2372 }
2373 
2374 /* htmlblock_is_end • check for end of HTML block : </tag>( *)\n */
2375 /*	returns tag length on match, 0 otherwise */
2376 /*	assumes data starts with "<" */
2377 static size_t
htmlblock_is_end(const char * tag,size_t tag_len,hoedown_document * doc,uint8_t * data,size_t size)2378 htmlblock_is_end(
2379 	const char *tag,
2380 	size_t tag_len,
2381 	hoedown_document *doc,
2382 	uint8_t *data,
2383 	size_t size)
2384 {
2385 	size_t i = tag_len + 3, w;
2386 
2387 	/* try to match the end tag */
2388 	/* note: we're not considering tags like "</tag >" which are still valid */
2389 	if (i > size ||
2390 		data[1] != '/' ||
2391 		strncasecmp((char *)data + 2, tag, tag_len) != 0 ||
2392 		data[tag_len + 2] != '>')
2393 		return 0;
2394 
2395 	/* rest of the line must be empty */
2396 	if ((w = is_empty(data + i, size - i)) == 0 && i < size)
2397 		return 0;
2398 
2399 	return i + w;
2400 }
2401 
2402 /* htmlblock_find_end • try to find HTML block ending tag */
2403 /*	returns the length on match, 0 otherwise */
2404 static size_t
htmlblock_find_end(const char * tag,size_t tag_len,hoedown_document * doc,uint8_t * data,size_t size)2405 htmlblock_find_end(
2406 	const char *tag,
2407 	size_t tag_len,
2408 	hoedown_document *doc,
2409 	uint8_t *data,
2410 	size_t size)
2411 {
2412 	size_t i = 0, w;
2413 
2414 	while (1) {
2415 		while (i < size && data[i] != '<') i++;
2416 		if (i >= size) return 0;
2417 
2418 		w = htmlblock_is_end(tag, tag_len, doc, data + i, size - i);
2419 		if (w) return i + w;
2420 		i++;
2421 	}
2422 }
2423 
2424 /* htmlblock_find_end_strict • try to find end of HTML block in strict mode */
2425 /*	(it must be an unindented line, and have a blank line afterwads) */
2426 /*	returns the length on match, 0 otherwise */
2427 static size_t
htmlblock_find_end_strict(const char * tag,size_t tag_len,hoedown_document * doc,uint8_t * data,size_t size)2428 htmlblock_find_end_strict(
2429 	const char *tag,
2430 	size_t tag_len,
2431 	hoedown_document *doc,
2432 	uint8_t *data,
2433 	size_t size)
2434 {
2435 	size_t i = 0, mark;
2436 
2437 	while (1) {
2438 		mark = i;
2439 		while (i < size && data[i] != '\n') i++;
2440 		if (i < size) i++;
2441 		if (i == mark) return 0;
2442 
2443 		if (data[mark] == ' ' && mark > 0) continue;
2444 		mark += htmlblock_find_end(tag, tag_len, doc, data + mark, i - mark);
2445 		if (mark == i && (is_empty(data + i, size - i) || i >= size)) break;
2446 	}
2447 
2448 	return i;
2449 }
2450 
2451 /* parse_htmlblock • parsing of inline HTML block */
2452 static size_t
parse_htmlblock(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t size,int do_render)2453 parse_htmlblock(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, int do_render)
2454 {
2455 	hoedown_buffer work = { NULL, 0, 0, 0, NULL, NULL, NULL };
2456 	size_t i, j = 0, tag_len, tag_end;
2457 	const char *curtag = NULL;
2458 
2459 	work.data = data;
2460 
2461 	/* identification of the opening tag */
2462 	if (size < 2 || data[0] != '<')
2463 		return 0;
2464 
2465 	i = 1;
2466 	while (i < size && data[i] != '>' && data[i] != ' ')
2467 		i++;
2468 
2469 	if (i < size)
2470 		curtag = hoedown_find_block_tag((char *)data + 1, (int)i - 1);
2471 
2472 	/* handling of special cases */
2473 	if (!curtag) {
2474 
2475 		/* HTML comment, laxist form */
2476 		if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') {
2477 			i = 5;
2478 
2479 			while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>'))
2480 				i++;
2481 
2482 			i++;
2483 
2484 			if (i < size)
2485 				j = is_empty(data + i, size - i);
2486 
2487 			if (j) {
2488 				work.size = i + j;
2489 				if (do_render && doc->md.blockhtml)
2490 					doc->md.blockhtml(ob, &work, &doc->data);
2491 				return work.size;
2492 			}
2493 		}
2494 
2495 		/* HR, which is the only self-closing block tag considered */
2496 		if (size > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R')) {
2497 			i = 3;
2498 			while (i < size && data[i] != '>')
2499 				i++;
2500 
2501 			if (i + 1 < size) {
2502 				i++;
2503 				j = is_empty(data + i, size - i);
2504 				if (j) {
2505 					work.size = i + j;
2506 					if (do_render && doc->md.blockhtml)
2507 						doc->md.blockhtml(ob, &work, &doc->data);
2508 					return work.size;
2509 				}
2510 			}
2511 		}
2512 
2513 		/* no special case recognised */
2514 		return 0;
2515 	}
2516 
2517 	/* looking for a matching closing tag in strict mode */
2518 	tag_len = strlen(curtag);
2519 	tag_end = htmlblock_find_end_strict(curtag, tag_len, doc, data, size);
2520 
2521 	/* if not found, trying a second pass looking for indented match */
2522 	/* but not if tag is "ins" or "del" (following original Markdown.pl) */
2523 	if (!tag_end && strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0)
2524 		tag_end = htmlblock_find_end(curtag, tag_len, doc, data, size);
2525 
2526 	if (!tag_end)
2527 		return 0;
2528 
2529 	/* the end of the block has been found */
2530 	work.size = tag_end;
2531 	if (do_render && doc->md.blockhtml)
2532 		doc->md.blockhtml(ob, &work, &doc->data);
2533 
2534 	return tag_end;
2535 }
2536 
2537 static void
parse_table_row(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t size,size_t columns,hoedown_table_flags * col_data,hoedown_table_flags header_flag)2538 parse_table_row(
2539 	hoedown_buffer *ob,
2540 	hoedown_document *doc,
2541 	uint8_t *data,
2542 	size_t size,
2543 	size_t columns,
2544 	hoedown_table_flags *col_data,
2545 	hoedown_table_flags header_flag)
2546 {
2547 	size_t i = 0, col, len;
2548 	hoedown_buffer *row_work = 0;
2549 
2550 	if (!doc->md.table_cell || !doc->md.table_row)
2551 		return;
2552 
2553 	row_work = newbuf(doc, BUFFER_SPAN);
2554 
2555 	if (i < size && data[i] == '|')
2556 		i++;
2557 
2558 	for (col = 0; col < columns && i < size; ++col) {
2559 		size_t cell_start, cell_end;
2560 		hoedown_buffer *cell_work;
2561 
2562 		cell_work = newbuf(doc, BUFFER_SPAN);
2563 
2564 		while (i < size && _isspace(data[i]))
2565 			i++;
2566 
2567 		cell_start = i;
2568 
2569 		len = find_emph_char(data + i, size - i, '|');
2570 
2571 		/* Two possibilities for len == 0:
2572 		   1) No more pipe char found in the current line.
2573 		   2) The next pipe is right after the current one, i.e. empty cell.
2574 		   For case 1, we skip to the end of line; for case 2 we just continue.
2575 		*/
2576 		if (len == 0 && i < size && data[i] != '|')
2577 			len = size - i;
2578 		i += len;
2579 
2580 		cell_end = i - 1;
2581 
2582 		while (cell_end > cell_start && _isspace(data[cell_end]))
2583 			cell_end--;
2584 
2585 		parse_inline(cell_work, doc, data + cell_start, 1 + cell_end - cell_start);
2586 		doc->md.table_cell(row_work, cell_work, col_data[col] | header_flag, &doc->data);
2587 
2588 		popbuf(doc, BUFFER_SPAN);
2589 		i++;
2590 	}
2591 
2592 	for (; col < columns; ++col) {
2593 		hoedown_buffer empty_cell = { 0, 0, 0, 0, NULL, NULL, NULL };
2594 		doc->md.table_cell(row_work, &empty_cell, col_data[col] | header_flag, &doc->data);
2595 	}
2596 
2597 	doc->md.table_row(ob, row_work, &doc->data);
2598 
2599 	popbuf(doc, BUFFER_SPAN);
2600 }
2601 
2602 static size_t
parse_table_header(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t size,size_t * columns,hoedown_table_flags ** column_data)2603 parse_table_header(
2604 	hoedown_buffer *ob,
2605 	hoedown_document *doc,
2606 	uint8_t *data,
2607 	size_t size,
2608 	size_t *columns,
2609 	hoedown_table_flags **column_data)
2610 {
2611 	int pipes;
2612 	size_t i = 0, col, header_end, under_end;
2613 
2614 	pipes = 0;
2615 	while (i < size && data[i] != '\n')
2616 		if (data[i++] == '|')
2617 			pipes++;
2618 
2619 	if (i == size || pipes == 0)
2620 		return 0;
2621 
2622 	header_end = i;
2623 
2624 	while (header_end > 0 && _isspace(data[header_end - 1]))
2625 		header_end--;
2626 
2627 	if (data[0] == '|')
2628 		pipes--;
2629 
2630 	if (header_end && data[header_end - 1] == '|')
2631 		pipes--;
2632 
2633 	if (pipes < 0)
2634 		return 0;
2635 
2636 	*columns = pipes + 1;
2637 	*column_data = hoedown_calloc(*columns, sizeof(hoedown_table_flags));
2638 
2639 	/* Parse the header underline */
2640 	i++;
2641 	if (i < size && data[i] == '|')
2642 		i++;
2643 
2644 	under_end = i;
2645 	while (under_end < size && data[under_end] != '\n')
2646 		under_end++;
2647 
2648 	for (col = 0; col < *columns && i < under_end; ++col) {
2649 		size_t dashes = 0;
2650 
2651 		while (i < under_end && data[i] == ' ')
2652 			i++;
2653 
2654 		if (data[i] == ':') {
2655 			i++; (*column_data)[col] |= HOEDOWN_TABLE_ALIGN_LEFT;
2656 			dashes++;
2657 		}
2658 
2659 		while (i < under_end && data[i] == '-') {
2660 			i++; dashes++;
2661 		}
2662 
2663 		if (i < under_end && data[i] == ':') {
2664 			i++; (*column_data)[col] |= HOEDOWN_TABLE_ALIGN_RIGHT;
2665 			dashes++;
2666 		}
2667 
2668 		while (i < under_end && data[i] == ' ')
2669 			i++;
2670 
2671 		if (i < under_end && data[i] != '|' && data[i] != '+')
2672 			break;
2673 
2674 		if (dashes < 3)
2675 			break;
2676 
2677 		i++;
2678 	}
2679 
2680 	if (col < *columns)
2681 		return 0;
2682 
2683 	parse_table_row(
2684 		ob, doc, data,
2685 		header_end,
2686 		*columns,
2687 		*column_data,
2688 		HOEDOWN_TABLE_HEADER
2689 	);
2690 
2691 	return under_end + 1;
2692 }
2693 
2694 static size_t
parse_table(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t size)2695 parse_table(
2696 	hoedown_buffer *ob,
2697 	hoedown_document *doc,
2698 	uint8_t *data,
2699 	size_t size)
2700 {
2701 	size_t i;
2702 
2703 	hoedown_buffer *work = 0;
2704 	hoedown_buffer *header_work = 0;
2705 	hoedown_buffer *body_work = 0;
2706 
2707 	size_t columns;
2708 	hoedown_table_flags *col_data = NULL;
2709 
2710 	work = newbuf(doc, BUFFER_BLOCK);
2711 	header_work = newbuf(doc, BUFFER_SPAN);
2712 	body_work = newbuf(doc, BUFFER_BLOCK);
2713 
2714 	i = parse_table_header(header_work, doc, data, size, &columns, &col_data);
2715 	if (i > 0) {
2716 
2717 		while (i < size) {
2718 			size_t row_start;
2719 			int pipes = 0;
2720 
2721 			row_start = i;
2722 
2723 			while (i < size && data[i] != '\n')
2724 				if (data[i++] == '|')
2725 					pipes++;
2726 
2727 			if (pipes == 0 || i == size) {
2728 				i = row_start;
2729 				break;
2730 			}
2731 
2732 			parse_table_row(
2733 				body_work,
2734 				doc,
2735 				data + row_start,
2736 				i - row_start,
2737 				columns,
2738 				col_data, 0
2739 			);
2740 
2741 			i++;
2742 		}
2743 
2744         if (doc->md.table_header)
2745             doc->md.table_header(work, header_work, &doc->data);
2746 
2747         if (doc->md.table_body)
2748             doc->md.table_body(work, body_work, &doc->data);
2749 
2750 		if (doc->md.table)
2751 			doc->md.table(ob, work, &doc->data, col_data, columns);
2752 	}
2753 
2754 	free(col_data);
2755 	popbuf(doc, BUFFER_SPAN);
2756 	popbuf(doc, BUFFER_BLOCK);
2757 	popbuf(doc, BUFFER_BLOCK);
2758 	return i;
2759 }
2760 
2761 static size_t
parse_abstract(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t size)2762 parse_abstract(
2763 	hoedown_buffer *ob,
2764 	hoedown_document *doc,
2765 	uint8_t *data,
2766 	size_t size)
2767 {
2768 	size_t skip = 0;
2769 	while (skip < size && !startsWith("\n@/\n", (char*)data+skip))
2770 	{
2771 		skip ++;
2772 	}
2773 
2774 
2775 	if (doc->md.abstract)
2776 	{
2777 		doc->md.abstract(ob);
2778 		parse_block(ob, doc, data, skip, -1);
2779 		if (doc->md.keywords && doc->document_metadata->keywords)
2780 		{
2781 			hoedown_buffer * b = hoedown_buffer_new(1);
2782 			hoedown_buffer_puts(b, doc->document_metadata->keywords);
2783 			doc->md.keywords(ob,b,NULL);
2784 			hoedown_buffer_free(b);
2785 
2786 		}
2787 		doc->md.close(ob);
2788 	}
2789 	if (skip < size)
2790 	{
2791 		skip += 4;
2792 	}
2793 	return skip;
2794 }
2795 uint8_t *
parse_caption(hoedown_document * doc,uint8_t * data,size_t size)2796 parse_caption(hoedown_document *doc,
2797               uint8_t *data,
2798               size_t size)
2799 {
2800 	if (!data || size <= 0)
2801 		return NULL;
2802 	uint32_t i=0;
2803 	while (i < size && data[i] !='\n'){
2804 		if (data[i] == ')' && (i==0 || data[i-1] != '\\'))
2805 			break;
2806 		i++;
2807 	}
2808 	if (i) {
2809 		hoedown_buffer * buf = hoedown_buffer_new(1);
2810 		parse_inline(buf, doc, data, i);
2811 		uint8_t * tmp = malloc(sizeof(uint8_t) * (buf->size+1));
2812 		tmp[buf->size] = 0;
2813 		memcpy(tmp, buf->data, buf->size);
2814 		// clean escape chars
2815 		tmp = (uint8_t*)clean_string((char*)tmp, buf->size);
2816 		hoedown_buffer_free(buf);
2817 		return tmp;
2818 	}
2819 	return NULL;
2820 }
2821 
2822 static size_t
parse_fl(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t size,float_type type)2823 parse_fl(
2824 	hoedown_buffer *ob,
2825 	hoedown_document *doc,
2826 	uint8_t *data,
2827 	size_t size,
2828     float_type type)
2829 {
2830 	size_t begin = 0;
2831 	size_t skip = 0;
2832 	float_args args = {};
2833 	args.type = type;
2834 	args.caption = NULL;
2835 
2836 	if (data[0] == '(')
2837 	{
2838 		begin ++;
2839 		while (begin < size && (data[begin] !=')' && data[begin] !='\n')){
2840 			begin ++;
2841 		}
2842 		if (begin > 2){
2843 			args.id = malloc(sizeof(char)*(begin));
2844 			args.id[begin-1] = 0;
2845 			memcpy(args.id, data+1, begin-1);
2846 		}
2847 		begin++;
2848 
2849 	}
2850 	while (skip+begin < size && !startsWith("\n@/", (char*)data+skip+begin))
2851 	{
2852 		if (startsWith("\n@caption(",(char*) data+skip+begin))
2853 		{
2854 			args.caption = (char*)parse_caption(doc, data+skip+begin+10, size-begin-skip-10);
2855 		}
2856 		skip ++;
2857 	}
2858 
2859 
2860 	if (doc->md.open_float)
2861 	{
2862 		doc->md.open_float(ob, args, &doc->data);
2863 		parse_block(ob, doc, data+begin, skip, -1);
2864 		doc->md.close_float(ob, args, &doc->data);
2865 	}
2866 	if (skip < size)
2867 	{
2868 		skip += 4;
2869 	}
2870 	return skip + begin;
2871 }
2872 
2873 static size_t
parse_eq(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t size)2874 parse_eq(
2875 	hoedown_buffer *ob,
2876 	hoedown_document *doc,
2877 	uint8_t *data,
2878 	size_t size)
2879 {
2880 	size_t begin = 0;
2881 	size_t skip = 0;
2882 	float_args args = {};
2883 	args.type = EQUATION;
2884 
2885 	if (data[0] == '(')
2886 	{
2887 		begin ++;
2888 		while (begin < size && (data[begin] !=')' && data[begin] !='\n')){
2889 			begin ++;
2890 		}
2891 		args.id = malloc(sizeof(char)*(begin));
2892 		args.id[begin-1] = 0;
2893 		memcpy(args.id, data+1, begin-1);
2894 		begin++;
2895 	}
2896 	while (skip+begin < size && !startsWith("\n@/", (char*)data+skip+begin))
2897 	{
2898 		skip ++;
2899 	}
2900 
2901 	if (doc->md.opn_equation && skip)
2902 	{
2903 		doc->md.opn_equation(ob, args.id, &doc->data);
2904 		hoedown_buffer * text = hoedown_buffer_new(skip);
2905 		hoedown_buffer_put(text, data+begin, skip);
2906 		if (doc->md.eq_math)
2907 			doc->md.eq_math(ob, text, 2, &doc->data);
2908 		doc->md.cls_equation(ob, &doc->data);
2909 	}
2910 	if (skip < size)
2911 	{
2912 		skip += 4;
2913 	}
2914 	return skip + begin;
2915 }
2916 
2917 
2918 static size_t
parse_float(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t size)2919 parse_float(
2920 	hoedown_buffer *ob,
2921 	hoedown_document *doc,
2922 	uint8_t *data,
2923 	size_t size)
2924 {
2925 	if (startsWith("@abstract", (char*)data) && is_separator(data[9])) {
2926 		return parse_abstract(ob, doc, data+9,size-9)+9;
2927 	}
2928 	if (startsWith("@figure", (char*)data) && is_separator(data[7])) {
2929 		return parse_fl(ob, doc, data+7, size-7, FIGURE)+7;
2930 	}
2931 	if (startsWith("@table", (char*)data) && is_separator(data[6])) {
2932 		return parse_fl(ob, doc, data+6, size-6, TABLE)+6;
2933 	}
2934 	if (startsWith("@listing", (char*)data) && is_separator(data[8])) {
2935 		return parse_fl(ob, doc, data+8, size-8, LISTING)+8;
2936 	}
2937 	if (startsWith("@equation", (char*)data) && is_separator(data[9])) {
2938 		return parse_eq(ob, doc, data+9, size-9) + 9;
2939 	}
2940 	if (startsWith("@toc", (char*)data) && is_separator(data[4]))
2941 	{
2942 		if (doc->md.toc && doc->table_of_contents)
2943 			doc->md.toc(ob, doc->table_of_contents, doc->document_metadata->numbering);
2944 		return 4;
2945 	}
2946 
2947 	return 1;
2948 }
2949 
2950 static void
parse_position(hoedown_buffer * ob,hoedown_document * doc)2951 parse_position(hoedown_buffer *ob, hoedown_document *doc){
2952 	if (doc->md.position){
2953 		doc->md.position(ob);
2954 	}
2955 }
2956 
2957 /* parse_block • parsing of one block, returning next uint8_t to parse */
2958 static void
parse_block(hoedown_buffer * ob,hoedown_document * doc,uint8_t * data,size_t size,int position)2959 parse_block(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, int position)
2960 {
2961 	size_t beg, end, i;
2962 	uint8_t *txt_data;
2963 	beg = 0;
2964 
2965 	if (doc->work_bufs[BUFFER_SPAN].size +
2966 		doc->work_bufs[BUFFER_BLOCK].size > doc->max_nesting)
2967 		return;
2968 
2969 	while (beg < size) {
2970 		if (position >= 0 && beg >= position) {
2971 			position = -1;
2972 			parse_position(ob, doc);
2973 		}
2974 		txt_data = data + beg;
2975 		end = size - beg;
2976 
2977 		if (is_atxheader(doc, txt_data, end))
2978 			beg += parse_atxheader(ob, doc, txt_data, end);
2979 
2980 		else if (data[beg] == '<' && doc->md.blockhtml &&
2981 				(i = parse_htmlblock(ob, doc, txt_data, end, 1)) != 0)
2982 			beg += i;
2983 
2984 		else if ((i = is_empty(txt_data, end)) != 0)
2985 			beg += i;
2986 
2987 		else if (is_hrule(txt_data, end)) {
2988 			if (doc->md.hrule)
2989 				doc->md.hrule(ob, &doc->data);
2990 
2991 			while (beg < size && data[beg] != '\n')
2992 				beg++;
2993 
2994 			beg++;
2995 		}
2996 
2997 		else if ((doc->ext_flags & HOEDOWN_EXT_FENCED_CODE) != 0 &&
2998 			(i = parse_fencedcode(ob, doc, txt_data, end)) != 0)
2999 			beg += i;
3000 
3001 		else if ((doc->ext_flags & HOEDOWN_EXT_TABLES) != 0 &&
3002 			(i = parse_table(ob, doc, txt_data, end)) != 0)
3003 			beg += i;
3004 
3005 		else if (prefix_quote(txt_data, end))
3006 			beg += parse_blockquote(ob, doc, txt_data, end);
3007 
3008 		else if (!(doc->ext_flags & HOEDOWN_EXT_DISABLE_INDENTED_CODE) && prefix_code(txt_data, end))
3009 			beg += parse_blockcode(ob, doc, txt_data, end);
3010 
3011 		else if (prefix_float(txt_data, end))
3012 			beg += parse_float(ob, doc, txt_data, end);
3013 
3014 		else if (prefix_uli(txt_data, end))
3015 			beg += parse_list(ob, doc, txt_data, end, 0);
3016 
3017 		else if (prefix_oli(txt_data, end))
3018 			beg += parse_list(ob, doc, txt_data, end, HOEDOWN_LIST_ORDERED);
3019 
3020 		else
3021 			beg += parse_paragraph(ob, doc, txt_data, end);
3022 	}
3023 	if (position > 0) {
3024 		parse_position(ob, doc);
3025 	}
3026 }
3027 
3028 
3029 
3030 /*********************
3031  * REFERENCE PARSING *
3032  *********************/
3033 void load_notes(const uint8_t * text, size_t size,  char* base_folder, struct footnote_list *list);
3034 
3035 /* is_footnote • returns whether a line is a footnote definition or not */
3036 static int
is_footnote(const uint8_t * data,size_t beg,size_t end,size_t * last,char * base_folder,struct footnote_list * list)3037 is_footnote(const uint8_t *data, size_t beg, size_t end, size_t *last, char* base_folder, struct footnote_list *list)
3038 {
3039 	if (startsWith("@bib(", (char*)data+beg))
3040 		{
3041 
3042 			size_t n = 0;
3043 			size_t i = 5+beg;
3044 			while(data[i] != '\n' && i != end)
3045 			{
3046 				if (data[i]==')')
3047 					break;
3048 				n++;
3049 				i++;
3050 			}
3051 
3052 			if (n){
3053 				char * path = malloc((n+1)*sizeof(char));
3054 				path[n] = 0;
3055 				strncpy(path, (char*)data+beg+5, n);
3056 				if (is_regular_file(path, base_folder)){
3057 					size_t size = 0;
3058 					char * bib = load_file(path, base_folder, &size);
3059 					load_notes((uint8_t*)bib, size, base_folder, list);
3060 					free(bib);
3061 				}
3062 				free(path);
3063 			}
3064 
3065 	        i = beg;
3066 	        while(data[i]!='\n')
3067 		      	i ++;
3068 	        *last = i;
3069 
3070 			return 1;
3071 		}
3072 	size_t i = 0;
3073 	hoedown_buffer *contents = 0;
3074 	size_t ind = 0;
3075 	int in_empty = 0;
3076 	size_t start = 0;
3077 
3078 	size_t id_offset, id_end;
3079 
3080 	/* up to 3 optional leading spaces */
3081 	if (beg + 3 >= end) return 0;
3082 	if (data[beg] == ' ') { i = 1;
3083 	if (data[beg + 1] == ' ') { i = 2;
3084 	if (data[beg + 2] == ' ') { i = 3;
3085 	if (data[beg + 3] == ' ') return 0; } } }
3086 	i += beg;
3087 
3088 	/* id part: caret followed by anything between brackets */
3089 	if (data[i] != '[') return 0;
3090 	i++;
3091 	if (i >= end || data[i] != '^') return 0;
3092 	i++;
3093 	id_offset = i;
3094 	while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']')
3095 		i++;
3096 	if (i >= end || data[i] != ']') return 0;
3097 	id_end = i;
3098 
3099 	/* spacer: colon (space | tab)* newline? (space | tab)* */
3100 	i++;
3101 	if (i >= end || data[i] != ':') return 0;
3102 	i++;
3103 
3104 	/* getting content buffer */
3105 	contents = hoedown_buffer_new(64);
3106 
3107 	start = i;
3108 
3109 	/* process lines similar to a list item */
3110 	while (i < end) {
3111 		while (i < end && data[i] != '\n' && data[i] != '\r') i++;
3112 
3113 		/* process an empty line */
3114 		if (is_empty(data + start, i - start)) {
3115 			in_empty = 1;
3116 			if (i < end && (data[i] == '\n' || data[i] == '\r')) {
3117 				i++;
3118 				if (i < end && data[i] == '\n' && data[i - 1] == '\r') i++;
3119 			}
3120 			start = i;
3121 			continue;
3122 		}
3123 
3124 		/* calculating the indentation */
3125 		ind = 0;
3126 		while (ind < 4 && start + ind < end && data[start + ind] == ' ')
3127 			ind++;
3128 
3129 		/* joining only indented stuff after empty lines;
3130 		 * note that now we only require 1 space of indentation
3131 		 * to continue, just like lists */
3132 		if (ind == 0) {
3133 			if (start == id_end + 2 && data[start] == '\t') {}
3134 			else break;
3135 		}
3136 		else if (in_empty) {
3137 			hoedown_buffer_putc(contents, '\n');
3138 		}
3139 
3140 		in_empty = 0;
3141 
3142 		/* adding the line into the content buffer */
3143 		hoedown_buffer_put(contents, data + start + ind, i - start - ind);
3144 		/* add carriage return */
3145 		if (i < end) {
3146 			hoedown_buffer_putc(contents, '\n');
3147 			if (i < end && (data[i] == '\n' || data[i] == '\r')) {
3148 				i++;
3149 				if (i < end && data[i] == '\n' && data[i - 1] == '\r') i++;
3150 			}
3151 		}
3152 		start = i;
3153 	}
3154 
3155 	if (last)
3156 		*last = start;
3157 
3158 	if (list) {
3159 		struct footnote_ref *ref;
3160 		ref = create_footnote_ref(list, data + id_offset, id_end - id_offset);
3161 		if (!ref)
3162 			return 0;
3163 		if (!add_footnote_ref(list, ref)) {
3164 			free_footnote_ref(ref);
3165 			return 0;
3166 		}
3167 		ref->contents = contents;
3168 	}
3169 
3170 	return 1;
3171 }
3172 
3173 /* is_ref • returns whether a line is a reference or not */
3174 static int
is_ref(const uint8_t * data,size_t beg,size_t end,size_t * last,struct link_ref ** refs)3175 is_ref(const uint8_t *data, size_t beg, size_t end, size_t *last, struct link_ref **refs)
3176 {
3177 /*	int n; */
3178 
3179 	size_t i = 0;
3180 	size_t id_offset, id_end;
3181 	size_t link_offset, link_end;
3182 	size_t title_offset, title_end;
3183 	size_t line_end;
3184 
3185 	/* up to 3 optional leading spaces */
3186 	if (beg + 3 >= end) return 0;
3187 	if (data[beg] == ' ') { i = 1;
3188 	if (data[beg + 1] == ' ') { i = 2;
3189 	if (data[beg + 2] == ' ') { i = 3;
3190 	if (data[beg + 3] == ' ') return 0; } } }
3191 	i += beg;
3192 
3193 	/* id part: anything but a newline between brackets */
3194 	if (data[i] != '[') return 0;
3195 	i++;
3196 	id_offset = i;
3197 	while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']')
3198 		i++;
3199 	if (i >= end || data[i] != ']') return 0;
3200 	id_end = i;
3201 
3202 	/* spacer: colon (space | tab)* newline? (space | tab)* */
3203 	i++;
3204 	if (i >= end || data[i] != ':') return 0;
3205 	i++;
3206 	while (i < end && data[i] == ' ') i++;
3207 	if (i < end && (data[i] == '\n' || data[i] == '\r')) {
3208 		i++;
3209 		if (i < end && data[i] == '\r' && data[i - 1] == '\n') i++; }
3210 	while (i < end && data[i] == ' ') i++;
3211 	if (i >= end) return 0;
3212 
3213 	/* link: spacing-free sequence, optionally between angle brackets */
3214 	if (data[i] == '<')
3215 		i++;
3216 
3217 	link_offset = i;
3218 
3219 	while (i < end && data[i] != ' ' && data[i] != '\n' && data[i] != '\r')
3220 		i++;
3221 
3222 	if (data[i - 1] == '>') link_end = i - 1;
3223 	else link_end = i;
3224 
3225 	/* optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) */
3226 	while (i < end && data[i] == ' ') i++;
3227 	if (i < end && data[i] != '\n' && data[i] != '\r'
3228 			&& data[i] != '\'' && data[i] != '"' && data[i] != '(')
3229 		return 0;
3230 	line_end = 0;
3231 	/* computing end-of-line */
3232 	if (i >= end || data[i] == '\r' || data[i] == '\n') line_end = i;
3233 	if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
3234 		line_end = i + 1;
3235 
3236 	/* optional (space|tab)* spacer after a newline */
3237 	if (line_end) {
3238 		i = line_end + 1;
3239 		while (i < end && data[i] == ' ') i++; }
3240 
3241 	/* optional title: any non-newline sequence enclosed in '"()
3242 					alone on its line */
3243 	title_offset = title_end = 0;
3244 	if (i + 1 < end
3245 	&& (data[i] == '\'' || data[i] == '"' || data[i] == '(')) {
3246 		i++;
3247 		title_offset = i;
3248 		/* looking for EOL */
3249 		while (i < end && data[i] != '\n' && data[i] != '\r') i++;
3250 		if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
3251 			title_end = i + 1;
3252 		else	title_end = i;
3253 		/* stepping back */
3254 		i -= 1;
3255 		while (i > title_offset && data[i] == ' ')
3256 			i -= 1;
3257 		if (i > title_offset
3258 		&& (data[i] == '\'' || data[i] == '"' || data[i] == ')')) {
3259 			line_end = title_end;
3260 			title_end = i; } }
3261 
3262 	if (!line_end || link_end == link_offset)
3263 		return 0; /* garbage after the link empty link */
3264 
3265 	/* a valid ref has been found, filling-in return structures */
3266 	if (last)
3267 		*last = line_end;
3268 
3269 	if (refs) {
3270 		struct link_ref *ref;
3271 
3272 		ref = add_link_ref(refs, data + id_offset, id_end - id_offset);
3273 		if (!ref)
3274 			return 0;
3275 
3276 		ref->link = hoedown_buffer_new(link_end - link_offset);
3277 		hoedown_buffer_put(ref->link, data + link_offset, link_end - link_offset);
3278 
3279 		if (title_end > title_offset) {
3280 			ref->title = hoedown_buffer_new(title_end - title_offset);
3281 			hoedown_buffer_put(ref->title, data + title_offset, title_end - title_offset);
3282 		}
3283 	}
3284 
3285 	return 1;
3286 }
3287 
3288 
3289 void
load_notes(const uint8_t * data,size_t size,char * base_folder,struct footnote_list * list)3290 load_notes(const uint8_t * data, size_t size,  char* base_folder, struct footnote_list *list)
3291 {
3292 	static const uint8_t UTF8_BOM[] = {0xEF, 0xBB, 0xBF};
3293 	size_t beg, end;
3294 	beg = 0;
3295 	/* Skip a possible UTF-8 BOM, even though the Unicode standard
3296 	 * discourages having these in UTF-8 documents */
3297 	if (size >= 3 && memcmp(data, UTF8_BOM, 3) == 0)
3298 		beg += 3;
3299 
3300 	while (beg < size) /* iterating over lines */
3301 	{
3302 		if (is_footnote(data, beg, size, &end, base_folder, list))
3303 			beg = end;
3304 		else { /* skipping to the next line */
3305 			end = beg;
3306 			while (end < size && data[end] != '\n' && data[end] != '\r')
3307 				end++;
3308 			while (end < size && (data[end] == '\n' || data[end] == '\r')) {
3309 
3310 				end++;
3311 			}
3312 			beg = end;
3313 		}
3314 	}
3315 }
expand_tabs(hoedown_buffer * ob,const uint8_t * line,size_t size)3316 static void expand_tabs(hoedown_buffer *ob, const uint8_t *line, size_t size)
3317 {
3318 	/* This code makes two assumptions:
3319 	 * - Input is valid UTF-8.  (Any byte with top two bits 10 is skipped,
3320 	 *   whether or not it is a valid UTF-8 continuation byte.)
3321 	 * - Input contains no combining characters.  (Combining characters
3322 	 *   should be skipped but are not.)
3323 	 */
3324 	size_t  i = 0, tab = 0;
3325 
3326 	while (i < size) {
3327 		size_t org = i;
3328 
3329 		while (i < size && line[i] != '\t') {
3330 			/* ignore UTF-8 continuation bytes */
3331 			if ((line[i] & 0xc0) != 0x80)
3332 				tab++;
3333 			i++;
3334 		}
3335 
3336 		if (i > org)
3337 			hoedown_buffer_put(ob, line + org, i - org);
3338 
3339 		if (i >= size)
3340 			break;
3341 
3342 		do {
3343 			hoedown_buffer_putc(ob, ' '); tab++;
3344 		} while (tab % 4);
3345 
3346 		i++;
3347 	}
3348 }
3349 
3350 /**********************
3351  * EXPORTED FUNCTIONS *
3352  **********************/
3353 
3354 hoedown_document *
hoedown_document_new(const hoedown_renderer * renderer,hoedown_extensions extensions,ext_definition * user_ext,const char * base_folder,size_t max_nesting)3355 hoedown_document_new(
3356 	const hoedown_renderer *renderer,
3357 	hoedown_extensions extensions,
3358     ext_definition * user_ext,
3359     const char * base_folder,
3360 	size_t max_nesting)
3361 {
3362 	hoedown_document *doc = NULL;
3363 
3364 	assert(max_nesting > 0 && renderer);
3365 
3366 	doc = hoedown_malloc(sizeof(hoedown_document));
3367 	memcpy(&doc->md, renderer, sizeof(hoedown_renderer));
3368 
3369 	doc->extensions = user_ext;
3370 	doc->base_folder = (base_folder != NULL) ? strdup (base_folder) : NULL;
3371 
3372 	doc->counter = (h_counter){0, 0, 0};
3373 
3374 	doc->floating_references = NULL;
3375 	doc->document_metadata = NULL;
3376 	doc->table_of_contents = NULL;
3377 	doc->data.opaque = renderer->opaque;
3378 	doc->data.meta = NULL;
3379 
3380 	hoedown_stack_init(&doc->work_bufs[BUFFER_BLOCK], 4);
3381 	hoedown_stack_init(&doc->work_bufs[BUFFER_SPAN], 8);
3382 
3383 	memset(doc->active_char, 0x0, 256);
3384 
3385 	if (extensions & HOEDOWN_EXT_UNDERLINE && doc->md.underline) {
3386 		doc->active_char['_'] = MD_CHAR_EMPHASIS;
3387 	}
3388 
3389 	if (doc->md.emphasis || doc->md.double_emphasis || doc->md.triple_emphasis) {
3390 		doc->active_char['*'] = MD_CHAR_EMPHASIS;
3391 		doc->active_char['_'] = MD_CHAR_EMPHASIS;
3392 		if (extensions & HOEDOWN_EXT_STRIKETHROUGH)
3393 			doc->active_char['~'] = MD_CHAR_EMPHASIS;
3394 		if (extensions & HOEDOWN_EXT_HIGHLIGHT)
3395 			doc->active_char['='] = MD_CHAR_EMPHASIS;
3396 	}
3397 
3398 	if (doc->md.codespan)
3399 		doc->active_char['`'] = MD_CHAR_CODESPAN;
3400 
3401 	if (doc->md.linebreak)
3402 		doc->active_char['\n'] = MD_CHAR_LINEBREAK;
3403 
3404 	if (doc->md.image || doc->md.link || doc->md.footnotes || doc->md.footnote_ref) {
3405 		doc->active_char['['] = MD_CHAR_LINK;
3406 		doc->active_char['!'] = MD_CHAR_IMAGE;
3407 	}
3408 
3409 	doc->active_char['<'] = MD_CHAR_LANGLE;
3410 	doc->active_char['\\'] = MD_CHAR_ESCAPE;
3411 	doc->active_char['&'] = MD_CHAR_ENTITY;
3412 
3413 	if (extensions & HOEDOWN_EXT_AUTOLINK) {
3414 		doc->active_char[':'] = MD_CHAR_AUTOLINK_URL;
3415 		doc->active_char['@'] = MD_CHAR_AUTOLINK_EMAIL;
3416 		doc->active_char['w'] = MD_CHAR_AUTOLINK_WWW;
3417 	}
3418 
3419 	if (extensions & HOEDOWN_EXT_SUPERSCRIPT)
3420 		doc->active_char['^'] = MD_CHAR_SUPERSCRIPT;
3421 
3422 	if (extensions & HOEDOWN_EXT_QUOTE)
3423 		doc->active_char['"'] = MD_CHAR_QUOTE;
3424 
3425 	if (extensions & HOEDOWN_EXT_MATH)
3426 		doc->active_char['$'] = MD_CHAR_MATH;
3427 
3428 	doc->active_char['('] = MD_CHAR_REF;
3429 
3430 	/* Extension data */
3431 	doc->ext_flags = extensions;
3432 	doc->max_nesting = max_nesting;
3433 	doc->in_link_body = 0;
3434 
3435 	return doc;
3436 }
3437 size_t
skip_yaml(hoedown_document * doc,hoedown_buffer * ob,const uint8_t * data,size_t size)3438 skip_yaml(hoedown_document *doc, hoedown_buffer *ob, const uint8_t *data, size_t size)
3439 {
3440 	size_t skip = 0;
3441 	if (startsWith("---", (char*)data) && is_separator(data[3])){
3442 		skip += 4;
3443 		while (skip < size && !(startsWith("\n---", (char*)data+skip) &&
3444 		       (skip + 4 >= size || is_separator(data[skip+4])))) {
3445 			skip ++;
3446 		}
3447 		if (skip < size)
3448 		{
3449 			skip += 5;
3450 		}
3451 	}
3452 	return skip;
3453 }
3454 
3455 void
sub_render(hoedown_document * doc,hoedown_buffer * ob,const uint8_t * data,size_t size,int position)3456 sub_render(hoedown_document *doc, hoedown_buffer *ob, const uint8_t *data, size_t size, int position)
3457 {
3458 	static const uint8_t UTF8_BOM[] = {0xEF, 0xBB, 0xBF};
3459 
3460 	hoedown_buffer *text;
3461 	size_t beg, end;
3462 	text = hoedown_buffer_new(64);
3463 
3464 	/* Preallocate enough space for our buffer to avoid expanding while copying */
3465 	hoedown_buffer_grow(text, size);
3466 	/* first pass: looking for references, copying everything else */
3467 	beg = 0;
3468 
3469 	int footnotes_enabled = doc->ext_flags & HOEDOWN_EXT_FOOTNOTES;
3470 
3471 	/* Skip a possible UTF-8 BOM, even though the Unicode standard
3472 	 * discourages having these in UTF-8 documents */
3473 	if (size >= 3 && memcmp(data, UTF8_BOM, 3) == 0)
3474 		beg += 3;
3475 
3476 	while (beg < size) /* iterating over lines */
3477 		if (footnotes_enabled && is_footnote(data, beg, size, &end, doc->base_folder, &doc->footnotes_found))
3478 			beg = end;
3479 		else if (is_ref(data, beg, size, &end, doc->refs))
3480 			beg = end;
3481 		else { /* skipping to the next line */
3482 			end = beg;
3483 			while (end < size && data[end] != '\n' && data[end] != '\r')
3484 				end++;
3485 
3486 			/* adding the line body if present */
3487 			if (end > beg)
3488 				expand_tabs(text, data + beg, end - beg);
3489 
3490 			while (end < size && (data[end] == '\n' || data[end] == '\r')) {
3491 				/* add one \n per newline */
3492 				if (data[end] == '\n' || (end + 1 < size && data[end + 1] != '\n'))
3493 					hoedown_buffer_putc(text, '\n');
3494 				end++;
3495 			}
3496 
3497 			beg = end;
3498 		}
3499 
3500 	/* pre-grow the output buffer to minimize allocations */
3501 	hoedown_buffer_grow(ob, text->size + (text->size >> 1));
3502 
3503 	/* second pass: actual rendering */
3504 	if (doc->md.doc_header)
3505 		doc->md.doc_header(ob, 0, &doc->data);
3506 
3507 	if (text->size) {
3508 		size_t skip = skip_yaml(doc, ob, text->data, text->size);
3509 		/* adding a final newline if not already present */
3510 		if (text->data[text->size - 1] != '\n' &&  text->data[text->size - 1] != '\r')
3511 			hoedown_buffer_putc(text, '\n');
3512 
3513 		parse_block(ob, doc, text->data+skip, text->size-skip, position-skip);
3514 	}
3515 	hoedown_buffer_free(text);
3516 }
3517 
parse_keyword(char * keyword,metadata * meta,const uint8_t * data,size_t size)3518 int parse_keyword(char * keyword, metadata * meta,  const uint8_t *data, size_t size)
3519 {
3520 	/** clean keyword **/
3521 	remove_char(keyword, ' ');
3522 	remove_char(keyword, '\n');
3523 	remove_char(keyword, '\t');
3524 
3525 	int j;
3526 	int skip = 0;
3527 	int text = 0;
3528 	for (j = 0 ; j+1 < size && data[j+1] != '\n'; j++){
3529 		if (!text && data[j] == ' ')
3530 			skip ++;
3531 		else if (!text && data[j] != ' ')
3532 			text = 1;
3533 	}
3534 	if (j == 0)
3535 	{
3536 		return 1;
3537 	}
3538 	char * word = malloc(sizeof(char) * (j-skip+3));
3539 	memset(word, 0, (j-skip+3));
3540 	memcpy(word, data+skip, (j-skip+1));
3541 
3542 
3543 	if (!strcmp(keyword, "title")) {
3544 		meta->title = word;
3545 	} else if (!strcmp(keyword, "author")) {
3546 		meta->authors = add_string(meta->authors, word);
3547 	} else if (!strcmp(keyword, "keywords")) {
3548 		meta->keywords = word;
3549 	} else if (!strcmp(keyword, "style")) {
3550 		meta->style = word;
3551 	} else if (!strcmp(keyword, "affiliation")) {
3552 		meta->affiliation = word;
3553 	} else if (!strcmp(keyword, "numbering")) {
3554 		meta->numbering = !strcmp(word, "true");
3555 	} else if (!strcmp(keyword, "paper")) {
3556 		meta->paper_size = string_to_paper(word);
3557 	} else if (!strcmp(keyword, "class")) {
3558 		meta->doc_class = string_to_class(word);
3559 	} else if (!strcmp(keyword, "font-size")) {
3560 		meta->font_size = atoi(word);
3561 	}else {
3562 		free(word);
3563 	}
3564 
3565 	return j+1;
3566 }
3567 
3568 void
append(reference * head,reference * next)3569 append(reference * head, reference * next)
3570 {
3571 	if (!head)
3572 		return;
3573 	if (head->next)
3574 		append(head->next, next);
3575 	else
3576 		head->next = next;
3577 }
3578 
3579 reference *
add_reference(char * id,int counter,float_type type,reference * ref)3580 add_reference(char * id, int counter, float_type type, reference * ref)
3581 {
3582 	reference * next = malloc(sizeof(reference));
3583 	next->next = NULL;
3584 	next->id = id;
3585 	next->type = type;
3586 	next->counter = counter;
3587 	if (ref)
3588 	{
3589 		append(ref, next);
3590 		return ref;
3591 	}
3592 	return next;
3593 }
3594 
3595 metadata *
parse_yaml(const uint8_t * data,size_t size)3596 parse_yaml(const uint8_t *data, size_t size)
3597 {
3598 	metadata * meta = malloc(sizeof(metadata));
3599 
3600 	meta->keywords = NULL;
3601 	meta->authors = NULL;
3602 	meta->style = NULL;
3603 	meta->title = NULL;
3604 
3605 	meta->paper_size = A4PAPER;
3606 	meta->doc_class = CLASS_ARTICLE;
3607 	meta->font_size = 10;
3608 
3609 	meta->numbering = 0;
3610 	meta->affiliation = NULL;
3611 
3612 	if (startsWith("---", (char*)data) && is_separator(data[3])){
3613 		int i = 4;
3614 		while (i < size){
3615 			if (startsWith("---\n", (char*)data+i))
3616 				break;
3617 			int j;
3618 			for (j = 0 ; j+i+1 < size && data[i+j+1] != ':' && data[i+j+1] != '\n'; j++){}
3619 			if (data[j+i+1] == ':'){
3620 				char type[j+3];
3621 				memset(type, 0, j+3);
3622 				memcpy(type, data+i, j+1);
3623 				j += parse_keyword(type, meta, data+i+j+2, size - i - j - 2);
3624 	       }
3625 
3626             i+=j+3;
3627 		}
3628 	}
3629 	return meta;
3630 }
3631 
3632 void
render_metadata(hoedown_document * doc,hoedown_buffer * ob,metadata * meta)3633 render_metadata(hoedown_document *doc, hoedown_buffer *ob, metadata * meta)
3634 {
3635 
3636 	if (meta->title != NULL && doc->md.title)
3637 	{
3638 		hoedown_buffer * b = hoedown_buffer_new(1);
3639 		hoedown_buffer_puts(b, meta->title);
3640 		doc->md.title(ob,b, meta);
3641 		hoedown_buffer_free(b);
3642 	}
3643 	if (meta->authors != NULL && doc->md.authors)
3644 	{
3645 		hoedown_buffer * b = hoedown_buffer_new(1);
3646 
3647 		doc->md.authors(ob,meta->authors);
3648 		hoedown_buffer_free(b);
3649 	}
3650 	if (meta->affiliation != NULL && doc->md.affiliation)
3651 	{
3652 		hoedown_buffer * b = hoedown_buffer_new(1);
3653 		hoedown_buffer_puts(b, meta->affiliation);
3654 		doc->md.affiliation(ob,b,NULL);
3655 		hoedown_buffer_free(b);
3656 	}
3657 
3658 }
find_ref(reference * refs,char * id,int * counter)3659 int find_ref(reference * refs, char*id, int *counter)
3660 {
3661 	if (!refs)
3662 		return 0;
3663 
3664 	if (strcmp(refs->id, id) == 0)
3665 	{
3666 
3667 		*counter = refs->counter;
3668 		return 1;
3669 	}
3670 	return find_ref(refs->next, id, counter);
3671 }
3672 
3673 void
check_for_ref(hoedown_document * doc,const uint8_t * data,size_t size,html_counter * counter,float_type type)3674 check_for_ref(hoedown_document *doc, const uint8_t *data, size_t size, html_counter * counter, float_type type)
3675 {
3676 	int caption = 0;
3677 	size_t i = 0;
3678 	while (i < size && !startsWith("@/\n", (char*)data+i)){
3679 		i++;
3680 		if (startsWith("@caption(", (char*)data+i)){
3681 			caption = 1;
3682 		}
3683 	}
3684 	if (caption || type==EQUATION){
3685 		int c =0;
3686 		switch (type)
3687 		{
3688 		case EQUATION:
3689 			c = ++(counter->equation);
3690 			break;
3691 		case TABLE:
3692 			c = ++(counter->table);
3693 			break;
3694 		case LISTING:
3695 			c = ++(counter->listing);
3696 			break;
3697 		case FIGURE:
3698 			c = ++(counter->figure);
3699 			break;
3700 		}
3701 
3702 		if (data[0] == '('){
3703 			i = 1;
3704 			while (i < size && data[i] != '\n' && data[i] !=')')
3705 			{
3706 				i ++ ;
3707 			}
3708 			if (i > 1)
3709 			{
3710 				char * id = malloc((i)*sizeof(char));
3711 				memset(id, 0, i);
3712 				memcpy(id, data+1, i-1);
3713 				doc->floating_references = add_reference(id, c, type, doc->floating_references);
3714 			}
3715 		}
3716 	}
3717 }
3718 
3719 
3720 void
look_for_ref(hoedown_document * doc,const uint8_t * data,size_t size,html_counter * counter)3721 look_for_ref(hoedown_document *doc, const uint8_t *data, size_t size, html_counter * counter)
3722 {
3723 
3724 	if (startsWith("@figure", (char*)data))
3725 	{
3726 		check_for_ref(doc, data+7, size-7, counter, FIGURE);
3727 	}
3728 	if (startsWith("@table", (char*)data))
3729 	{
3730 		check_for_ref(doc, data+6, size-6,counter, TABLE);
3731 	}
3732 	if (startsWith("@listing", (char*)data))
3733 	{
3734 		check_for_ref(doc, data+8, size-8,counter,  LISTING);
3735 	}
3736 	if (startsWith("@equation", (char*)data))
3737 	{
3738 		check_for_ref(doc, data+9, size-9,counter, EQUATION);
3739 	}
3740 }
3741 
3742 char*
load_text(uint8_t * data,size_t size,char * base_folder,size_t * new_size)3743 load_text(uint8_t *data, size_t size, char* base_folder, size_t * new_size)
3744 {
3745 	/* @include(path) */
3746 	size_t i = 9;
3747 	size_t n = 0;
3748 	*new_size = 0;
3749 	for (;i < size; i++)
3750 	{
3751 		if (data[i] == ')')
3752 		{
3753 			break;
3754 		}
3755 		n++;
3756 	}
3757 	if (n){
3758 		char * path = malloc((n+1)*sizeof(uint8_t));
3759 		path[n] = 0;
3760 		memcpy(path, data+9, n);
3761 		if (is_regular_file(path, base_folder)){
3762 
3763 			char * buffer = load_file(path, base_folder, new_size);
3764 			free(path);
3765 			return buffer;
3766 		}
3767 		free(path);
3768 	}
3769 	return NULL;
3770 }
3771 
3772 
3773 
3774 void
find_references(hoedown_document * doc,const uint8_t * data,size_t size,html_counter * counter)3775 find_references(hoedown_document *doc, const uint8_t *data, size_t size, html_counter * counter)
3776 {
3777 	size_t i;
3778 	for (i = 0; i < size; i++)
3779 	{
3780 		if (prefix_float((uint8_t*)data+i, size-i))
3781 		{
3782 			look_for_ref(doc, data+i, size-i, counter);
3783 		}
3784 		else if (startsWith("@include(", (char*) data+i))
3785 		{
3786 			size_t text_size;
3787 			char * text = load_text((uint8_t*)data+i, size-i, doc->base_folder, &text_size);
3788 			if (text_size && text)
3789 			{
3790 				find_references(doc,(const uint8_t*) text, text_size, counter);
3791 				free(text);
3792 			}
3793 		}
3794 	}
3795 }
3796 
3797 toc *
generate_toc(hoedown_document * doc,const uint8_t * data,size_t size,toc * parent)3798 generate_toc(hoedown_document * doc, const uint8_t * data, size_t size, toc* parent)
3799 {
3800 	if (!data || !size)
3801 		return parent;
3802 	size_t i = 0;
3803 	toc * root = parent;
3804 	toc * current = root;
3805 	char code_block = 0;
3806 
3807 	if (size > 4 && startsWith("---", (char*)data) && is_separator(data[3])){
3808 		i  = 4;
3809 		while (i < size) {
3810 			if (data[i-1] == '\n' && startsWith("---", (char*)data + i) &&  is_separator(data[i + 3])) {
3811 				i += 3;
3812 				break;
3813 			}
3814 			i++;
3815 		}
3816 
3817 	}
3818 
3819 	for (; i < size-1; i++)
3820 	{
3821 		if (i == 0 || data[i-1] == '\n')
3822 		{
3823 			if (!code_block) {
3824 				if (is_atxheader(doc, (uint8_t*)data+i, size-i))
3825 				{
3826 					size_t level = 0;
3827 					uint8_t * title = get_atxheader_info((uint8_t*)data+i, size-i, &level, NULL);
3828 					if (level <= 3 && title)
3829 					{
3830 						toc * next = malloc(sizeof(toc));
3831 						next->sibling = NULL;
3832 						next->nesting = level;
3833 						next->text = (char*) title;
3834 						if (!current) {
3835 							root = next;
3836 						} else {
3837 							current->sibling = next;
3838 						}
3839 						current = next;
3840 					}
3841 				} else if (i > 0 && is_headerline((uint8_t*)data+i, size-i)){
3842 					size_t j = i - 1;
3843 					int somechar = 0;
3844 					while (data[j - 1] != '\n') {
3845 						if (j == 0)
3846 							break;
3847 						if (!is_separator(data[j -1]))
3848 							somechar = 1;
3849 						j --;
3850 					}
3851 					if ((i - j) > 1 && somechar) {
3852 						size_t level = data[i] == '-' ? 2 : 1;
3853 						char * title = malloc(i - j - 1);
3854 						memcpy(title, data+j, i-j-2);
3855 						title[i - j - 2] = 0;
3856 
3857 						toc * next = malloc(sizeof(toc));
3858 						next->sibling = NULL;
3859 						next->nesting = level;
3860 						next->text = (char*) title;
3861 						if (!current) {
3862 							root = next;
3863 						} else {
3864 							current->sibling = next;
3865 						}
3866 						current = next;
3867 
3868 					}
3869 					/* fprintf(stderr, "(document.c: generate_toc()): Headerline not yet implemented\n"); */
3870 					//printf("Header line!\n");
3871 				} else if (is_codefence((uint8_t*)data+i, size-i, NULL, NULL)) {
3872 					code_block = data[i];
3873 				}
3874 			} else if (data[i] == code_block && is_codefence((uint8_t*)data+i, size-i, NULL, NULL)) {
3875 				code_block = 0;
3876 			}
3877 		}
3878 		if (!code_block && data[i] == '@' && startsWith("@include(", (char*)data+i))
3879 		{
3880 			size_t text_size;
3881 			char * text = load_text((uint8_t*)data+i, size-i, doc->base_folder, &text_size);
3882 			if (text_size && text)
3883 			{
3884 
3885 				toc * t = generate_toc(doc,(const uint8_t*) text, text_size, current);
3886 				if (!root && t)
3887 				{
3888 					root = t;
3889 				}
3890 				free(text);
3891 			}
3892 		}
3893 	}
3894 	return root;
3895 }
3896 
3897 
document_metadata(const uint8_t * data,size_t size)3898 metadata* document_metadata(const uint8_t *data, size_t size)
3899 {
3900 	return parse_yaml(data, size);
3901 }
3902 
3903 void
hoedown_document_render(hoedown_document * doc,hoedown_buffer * ob,const uint8_t * data,size_t size,int position)3904 hoedown_document_render(hoedown_document *doc, hoedown_buffer *ob, const uint8_t *data, size_t size, int position)
3905 {
3906 
3907 	int footnotes_enabled;
3908 
3909 	/* reset the references table */
3910 	memset(&doc->refs, 0x0, REF_TABLE_SIZE * sizeof(void *));
3911 
3912 	footnotes_enabled = doc->ext_flags & HOEDOWN_EXT_FOOTNOTES;
3913 
3914 	/* reset the footnotes lists */
3915 	if (footnotes_enabled) {
3916 		memset(&doc->footnotes_found, 0x0, sizeof(doc->footnotes_found));
3917 		memset(&doc->footnotes_used, 0x0, sizeof(doc->footnotes_used));
3918 	}
3919 	html_counter counter = {0,0,0,0};
3920 	find_references(doc, data, size, &counter);
3921 
3922 
3923 	doc->table_of_contents = generate_toc(doc, data, size, NULL);
3924 
3925 	metadata * meta = parse_yaml(data, size);
3926 	doc->document_metadata = meta;
3927 	doc->data.meta = meta;
3928 
3929 	if (doc->md.head)
3930 		doc->md.head(ob, meta, doc->extensions);
3931 	if (doc->md.begin)
3932 		doc->md.begin(ob, &doc->data);
3933 	render_metadata(doc, ob, meta);
3934 
3935 	if (doc->md.inner)
3936 		doc->md.inner(ob, &doc->data);
3937 
3938 	sub_render(doc, ob, data, size, position);
3939 	/* footnotes */
3940 	if (footnotes_enabled)
3941 		parse_footnote_list(ob, doc, &doc->footnotes_used);
3942 
3943 	if (doc->md.doc_footer)
3944 		doc->md.doc_footer(ob, 0, &doc->data);
3945 	if (doc->md.end)
3946 		doc->md.end(ob, doc->extensions, &doc->data);
3947 	/* clean-up */
3948 
3949 	free_link_refs(doc->refs);
3950 	if (footnotes_enabled) {
3951 		free_footnote_list(&doc->footnotes_found, 1);
3952 		free_footnote_list(&doc->footnotes_used, 0);
3953 	}
3954 
3955 	assert(doc->work_bufs[BUFFER_SPAN].size == 0);
3956 	assert(doc->work_bufs[BUFFER_BLOCK].size == 0);
3957 }
3958 
3959 void
hoedown_document_render_inline(hoedown_document * doc,hoedown_buffer * ob,const uint8_t * data,size_t size,int position)3960 hoedown_document_render_inline(hoedown_document *doc, hoedown_buffer *ob, const uint8_t *data, size_t size, int position)
3961 {
3962 	size_t i = 0, mark;
3963 	hoedown_buffer *text = hoedown_buffer_new(64);
3964 
3965 	/* reset the references table */
3966 	memset(&doc->refs, 0x0, REF_TABLE_SIZE * sizeof(void *));
3967 
3968 	/* first pass: expand tabs and process newlines */
3969 	hoedown_buffer_grow(text, size);
3970 	while (1) {
3971 		mark = i;
3972 		while (i < size && data[i] != '\n' && data[i] != '\r')
3973 			i++;
3974 
3975 		expand_tabs(text, data + mark, i - mark);
3976 
3977 		if (i >= size)
3978 			break;
3979 
3980 		while (i < size && (data[i] == '\n' || data[i] == '\r')) {
3981 			/* add one \n per newline */
3982 			if (data[i] == '\n' || (i + 1 < size && data[i + 1] != '\n'))
3983 				hoedown_buffer_putc(text, '\n');
3984 			i++;
3985 		}
3986 	}
3987 
3988 	/* second pass: actual rendering */
3989 	hoedown_buffer_grow(ob, text->size + (text->size >> 1));
3990 
3991 	if (doc->md.doc_header)
3992 		doc->md.doc_header(ob, 1, &doc->data);
3993 
3994 	parse_inline(ob, doc, text->data, text->size);
3995 
3996 	if (doc->md.doc_footer)
3997 		doc->md.doc_footer(ob, 1, &doc->data);
3998 
3999 	/* clean-up */
4000 	hoedown_buffer_free(text);
4001 	assert(doc->work_bufs[BUFFER_SPAN].size == 0);
4002 	assert(doc->work_bufs[BUFFER_BLOCK].size == 0);
4003 }
4004 
4005 void
free_references(reference * ref)4006 free_references(reference * ref)
4007 {
4008 	if (ref)
4009 	{
4010 		free(ref->id);
4011 		free_references(ref->next);
4012 		free(ref->next);
4013 	}
4014 }
4015 
4016 void
free_toc(toc * ToC)4017 free_toc(toc * ToC)
4018 {
4019 	if (ToC)
4020 	{
4021 		free(ToC->text);
4022 		free_toc(ToC->sibling);
4023 		free(ToC);
4024 	}
4025 }
4026 
4027 void
free_meta(metadata * meta)4028 free_meta(metadata * meta)
4029 {
4030 	if (!meta)
4031 		return;
4032 	if (meta->affiliation)
4033 		free(meta->affiliation);
4034 	if (meta->keywords)
4035 		free(meta->keywords);
4036 	if (meta->style)
4037 		free(meta->style);
4038 	if (meta->title)
4039 		free(meta->title);
4040 	free_strings(meta->authors);
4041 	free(meta);
4042 }
4043 
4044 void
hoedown_document_free(hoedown_document * doc)4045 hoedown_document_free(hoedown_document *doc)
4046 {
4047 	size_t i;
4048 
4049 	for (i = 0; i < (size_t)doc->work_bufs[BUFFER_SPAN].asize; ++i)
4050 		hoedown_buffer_free(doc->work_bufs[BUFFER_SPAN].item[i]);
4051 
4052 	for (i = 0; i < (size_t)doc->work_bufs[BUFFER_BLOCK].asize; ++i)
4053 		hoedown_buffer_free(doc->work_bufs[BUFFER_BLOCK].item[i]);
4054 
4055 	hoedown_stack_uninit(&doc->work_bufs[BUFFER_SPAN]);
4056 	hoedown_stack_uninit(&doc->work_bufs[BUFFER_BLOCK]);
4057 	free_references(doc->floating_references);
4058 	free_toc(doc->table_of_contents);
4059 	free_meta(doc->document_metadata);
4060 	if (doc->base_folder)
4061 		free(doc->base_folder);
4062 	free(doc);
4063 }
4064