1 /* markdown: a C implementation of John Gruber's Markdown markup language.
2  *
3  * Copyright (C) 2007 David L Parsons.
4  * The redistribution terms are provided in the COPYRIGHT file that must
5  * be distributed with this source code.
6  */
7 #include <stdio.h>
8 #include <string.h>
9 #include <stdarg.h>
10 #include <stdlib.h>
11 #include <time.h>
12 #include <ctype.h>
13 
14 #include "config.h"
15 
16 #include "cstring.h"
17 #include "markdown.h"
18 #include "amalloc.h"
19 
20 typedef int (*stfu)(const void*,const void*);
21 typedef void (*spanhandler)(MMIOT*,int);
22 
23 /* forward declarations */
24 static void text(MMIOT *f);
25 static Paragraph *display(Paragraph*, MMIOT*);
26 
27 /* externals from markdown.c */
28 int __mkd_footsort(Footnote *, Footnote *);
29 
30 /*
31  * push text into the generator input buffer
32  */
33 static void
push(char * bfr,int size,MMIOT * f)34 push(char *bfr, int size, MMIOT *f)
35 {
36     while ( size-- > 0 )
37 	EXPAND(f->in) = *bfr++;
38 }
39 
40 
41 /*
42  * push a character into the generator input buffer
43  */
44 static void
pushc(char c,MMIOT * f)45 pushc(char c, MMIOT *f)
46 {
47     EXPAND(f->in) = c;
48 }
49 
50 
51 /* look <i> characters ahead of the cursor.
52  */
53 static inline int
peek(MMIOT * f,int i)54 peek(MMIOT *f, int i)
55 {
56 
57     i += (f->isp-1);
58 
59     return (i >= 0) && (i < S(f->in)) ? (unsigned char)T(f->in)[i] : EOF;
60 }
61 
62 
63 /* pull a byte from the input buffer
64  */
65 static inline unsigned int
pull(MMIOT * f)66 pull(MMIOT *f)
67 {
68     return ( f->isp < S(f->in) ) ? (unsigned char)T(f->in)[f->isp++] : EOF;
69 }
70 
71 
72 /* return a pointer to the current position in the input buffer.
73  */
74 static inline char*
cursor(MMIOT * f)75 cursor(MMIOT *f)
76 {
77     return T(f->in) + f->isp;
78 }
79 
80 
81 static inline int
isthisspace(MMIOT * f,int i)82 isthisspace(MMIOT *f, int i)
83 {
84     int c = peek(f, i);
85 
86     if ( c == EOF )
87 	return 1;
88     if ( c & 0x80 )
89 	return 0;
90     return isspace(c) || (c < ' ');
91 }
92 
93 
94 static inline int
isthisalnum(MMIOT * f,int i)95 isthisalnum(MMIOT *f, int i)
96 {
97     int c = peek(f, i);
98 
99     return (c != EOF) && isalnum(c);
100 }
101 
102 
103 static inline int
isthisnonword(MMIOT * f,int i)104 isthisnonword(MMIOT *f, int i)
105 {
106     return isthisspace(f, i) || ispunct(peek(f,i));
107 }
108 
109 
110 /* return/set the current cursor position
111  * (when setting the current cursor position we also need to flush the
112  * last character written cache)
113  */
114 #define mmiotseek(f,x)	((f->isp = x), (f->last = 0))
115 #define mmiottell(f)	(f->isp)
116 
117 
118 /* move n characters forward ( or -n characters backward) in the input buffer.
119  */
120 static void
shift(MMIOT * f,int i)121 shift(MMIOT *f, int i)
122 {
123     if (f->isp + i >= 0 )
124 	f->isp += i;
125 }
126 
127 
128 /* Qchar()
129  */
130 static void
Qchar(int c,MMIOT * f)131 Qchar(int c, MMIOT *f)
132 {
133     block *cur;
134 
135     if ( S(f->Q) == 0 ) {
136 	cur = &EXPAND(f->Q);
137 	memset(cur, 0, sizeof *cur);
138 	cur->b_type = bTEXT;
139     }
140     else
141 	cur = &T(f->Q)[S(f->Q)-1];
142 
143     EXPAND(cur->b_text) = c;
144 
145 }
146 
147 
148 /* Qstring()
149  */
150 static void
Qstring(char * s,MMIOT * f)151 Qstring(char *s, MMIOT *f)
152 {
153     while (*s)
154 	Qchar(*s++, f);
155 }
156 
157 
158 /* Qwrite()
159  */
160 static void
Qwrite(char * s,int size,MMIOT * f)161 Qwrite(char *s, int size, MMIOT *f)
162 {
163     while (size-- > 0)
164 	Qchar(*s++, f);
165 }
166 
167 
168 /* Qprintf()
169  */
170 static void
Qprintf(MMIOT * f,char * fmt,...)171 Qprintf(MMIOT *f, char *fmt, ...)
172 {
173     char bfr[80];
174     va_list ptr;
175 
176     va_start(ptr,fmt);
177     vsnprintf(bfr, sizeof bfr, fmt, ptr);
178     va_end(ptr);
179     Qstring(bfr, f);
180 }
181 
182 
183 /* Qanchor() prints out a suitable-for-id-tag version of a string
184  */
185 static void
Qanchor(struct line * p,MMIOT * f)186 Qanchor(struct line *p, MMIOT *f)
187 {
188     mkd_string_to_anchor(T(p->text), S(p->text),
189 			 (mkd_sta_function_t)Qchar, f, 1, f);
190 }
191 
192 
193 /* Qem()
194  */
195 static void
Qem(MMIOT * f,char c,int count)196 Qem(MMIOT *f, char c, int count)
197 {
198     block *p = &EXPAND(f->Q);
199 
200     memset(p, 0, sizeof *p);
201     p->b_type = (c == '*') ? bSTAR : bUNDER;
202     p->b_char = c;
203     p->b_count = count;
204 
205     memset(&EXPAND(f->Q), 0, sizeof(block));
206 }
207 
208 
209 /* generate html from a markup fragment
210  */
211 void
___mkd_reparse(char * bfr,int size,mkd_flag_t flags,MMIOT * f,char * esc)212 ___mkd_reparse(char *bfr, int size, mkd_flag_t flags, MMIOT *f, char *esc)
213 {
214     MMIOT sub;
215     struct escaped e;
216 
217     ___mkd_initmmiot(&sub, f->footnotes);
218 
219     sub.flags = f->flags | flags;
220     sub.cb = f->cb;
221     sub.ref_prefix = f->ref_prefix;
222 
223     if ( esc ) {
224 	sub.esc = &e;
225 	e.up = f->esc;
226 	e.text = esc;
227     }
228     else
229 	sub.esc = f->esc;
230 
231     push(bfr, size, &sub);
232     pushc(0, &sub);
233     S(sub.in)--;
234 
235     text(&sub);
236     ___mkd_emblock(&sub);
237 
238     Qwrite(T(sub.out), S(sub.out), f);
239     /* inherit the last character printed from the reparsed
240      * text;  this way superscripts can work when they're
241      * applied to something embedded in a link
242      */
243     f->last = sub.last;
244 
245     ___mkd_freemmiot(&sub, f->footnotes);
246 }
247 
248 
249 /*
250  * check the escape list for special cases
251  */
252 static int
escaped(MMIOT * f,char c)253 escaped(MMIOT *f, char c)
254 {
255     struct escaped *thing = f->esc;
256 
257     while ( thing ) {
258 	if ( strchr(thing->text, c) )
259 	    return 1;
260 	thing = thing->up;
261     }
262     return 0;
263 }
264 
265 
266 /*
267  * write out a url, escaping problematic characters
268  */
269 static void
puturl(char * s,int size,MMIOT * f,int display)270 puturl(char *s, int size, MMIOT *f, int display)
271 {
272     unsigned char c;
273 
274     while ( size-- > 0 ) {
275 	c = *s++;
276 
277 	if ( c == '\\' && size-- > 0 ) {
278 	    c = *s++;
279 
280 	    if ( !( ispunct(c) || isspace(c) ) )
281 		Qchar('\\', f);
282 	}
283 
284 	if ( c == '&' )
285 	    Qstring("&amp;", f);
286 	else if ( c == '<' )
287 	    Qstring("&lt;", f);
288 	else if ( c == '"' )
289 	    Qstring("%22", f);
290 	else if ( isalnum(c) || ispunct(c) || (display && isspace(c)) )
291 	    Qchar(c, f);
292 	else if ( c == MKD_EOLN )	/* untokenize hard return */
293 	    Qstring("  ", f);
294 	else
295 	    Qprintf(f, "%%%02X", c);
296     }
297 }
298 
299 
300 /* advance forward until the next character is not whitespace
301  */
302 static int
eatspace(MMIOT * f)303 eatspace(MMIOT *f)
304 {
305     int c;
306 
307     for ( ; ((c=peek(f, 1)) != EOF) && isspace(c); pull(f) )
308 	;
309     return c;
310 }
311 
312 
313 /* (match (a (nested (parenthetical (string.)))))
314  */
315 static int
parenthetical(int in,int out,MMIOT * f)316 parenthetical(int in, int out, MMIOT *f)
317 {
318     int size, indent, c;
319 
320     for ( indent=1,size=0; indent; size++ ) {
321 	if ( (c = pull(f)) == EOF )
322 	    return EOF;
323 	else if ( (c == '\\') && (peek(f,1) == out || peek(f,1) == in) ) {
324 	    ++size;
325 	    pull(f);
326 	}
327 	else if ( c == in )
328 	    ++indent;
329 	else if ( c == out )
330 	    --indent;
331     }
332     return size ? (size-1) : 0;
333 }
334 
335 
336 /* extract a []-delimited label from the input stream.
337  */
338 static int
linkylabel(MMIOT * f,Cstring * res)339 linkylabel(MMIOT *f, Cstring *res)
340 {
341     char *ptr = cursor(f);
342     int size;
343 
344     if ( (size = parenthetical('[',']',f)) != EOF ) {
345 	T(*res) = ptr;
346 	S(*res) = size;
347 	return 1;
348     }
349     return 0;
350 }
351 
352 
353 /* see if the quote-prefixed linky segment is actually a title.
354  */
355 static int
linkytitle(MMIOT * f,char quote,Footnote * ref)356 linkytitle(MMIOT *f, char quote, Footnote *ref)
357 {
358     int whence = mmiottell(f);
359     char *title = cursor(f);
360     char *e;
361     register int c;
362 
363     while ( (c = pull(f)) != EOF ) {
364 	e = cursor(f);
365 	if ( c == quote ) {
366 	    if ( (c = eatspace(f)) == ')' ) {
367 		T(ref->title) = 1+title;
368 		S(ref->title) = (e-title)-2;
369 		return 1;
370 	    }
371 	}
372     }
373     mmiotseek(f, whence);
374     return 0;
375 }
376 
377 
378 /* extract a =HHHxWWW size from the input stream
379  */
380 static int
linkysize(MMIOT * f,Footnote * ref)381 linkysize(MMIOT *f, Footnote *ref)
382 {
383     int height=0, width=0;
384     int whence = mmiottell(f);
385     int c;
386 
387     if ( isspace(peek(f,0)) ) {
388 	pull(f);	/* eat '=' */
389 
390 	for ( c = pull(f); isdigit(c); c = pull(f))
391 	    width = (width * 10) + (c - '0');
392 
393 	if ( c == 'x' ) {
394 	    for ( c = pull(f); isdigit(c); c = pull(f))
395 		height = (height*10) + (c - '0');
396 
397 	    if ( isspace(c) )
398 		c = eatspace(f);
399 
400 	    if ( (c == ')') || ((c == '\'' || c == '"') && linkytitle(f, c, ref)) ) {
401 		ref->height = height;
402 		ref->width  = width;
403 		return 1;
404 	    }
405 	}
406     }
407     mmiotseek(f, whence);
408     return 0;
409 }
410 
411 
412 /* extract a <...>-encased url from the input stream.
413  * (markdown 1.0.2b8 compatibility; older versions
414  * of markdown treated the < and > as syntactic
415  * sugar that didn't have to be there.  1.0.2b8
416  * requires a closing >, and then falls into the
417  * title or closing )
418  */
419 static int
linkybroket(MMIOT * f,int image,Footnote * p)420 linkybroket(MMIOT *f, int image, Footnote *p)
421 {
422     int c;
423     int good = 0;
424 
425     T(p->link) = cursor(f);
426     for ( S(p->link)=0; (c = pull(f)) != '>'; ++S(p->link) ) {
427 	/* pull in all input until a '>' is found, or die trying.
428 	 */
429 	if ( c == EOF )
430 	    return 0;
431 	else if ( (c == '\\') && ispunct(peek(f,2)) ) {
432 	    ++S(p->link);
433 	    pull(f);
434 	}
435     }
436 
437     c = eatspace(f);
438 
439     /* next nonspace needs to be a title, a size, or )
440      */
441     if ( ( c == '\'' || c == '"' ) && linkytitle(f,c,p) )
442 	good=1;
443     else if ( image && (c == '=') && linkysize(f,p) )
444 	good=1;
445     else
446 	good=( c == ')' );
447 
448     if ( good ) {
449 	if ( peek(f, 1) == ')' )
450 	    pull(f);
451 
452 	___mkd_tidy(&p->link);
453     }
454 
455     return good;
456 } /* linkybroket */
457 
458 
459 /* extract a (-prefixed url from the input stream.
460  * the label is either of the format `<link>`, where I
461  * extract until I find a >, or it is of the format
462  * `text`, where I extract until I reach a ')', a quote,
463  * or (if image) a '='
464  */
465 static int
linkyurl(MMIOT * f,int image,Footnote * p)466 linkyurl(MMIOT *f, int image, Footnote *p)
467 {
468     int c;
469     int mayneedtotrim=0;
470 
471     if ( (c = eatspace(f)) == EOF )
472 	return 0;
473 
474     if ( c == '<' ) {
475 	pull(f);
476 	if ( !is_flag_set(f->flags, MKD_1_COMPAT) )
477 	    return linkybroket(f,image,p);
478 	mayneedtotrim=1;
479     }
480 
481     T(p->link) = cursor(f);
482     for ( S(p->link)=0; (c = peek(f,1)) != ')'; ++S(p->link) ) {
483 	if ( c == EOF )
484 	    return 0;
485 	else if ( (c == '"' || c == '\'') && linkytitle(f, c, p) )
486 	    break;
487 	else if ( image && (c == '=') && linkysize(f, p) )
488 	    break;
489 	else if ( (c == '\\') && ispunct(peek(f,2)) ) {
490 	    ++S(p->link);
491 	    pull(f);
492 	}
493 	pull(f);
494     }
495     if ( peek(f, 1) == ')' )
496 	pull(f);
497 
498     ___mkd_tidy(&p->link);
499 
500     if ( mayneedtotrim && (T(p->link)[S(p->link)-1] == '>') )
501 	--S(p->link);
502 
503     return 1;
504 }
505 
506 
507 
508 /* prefixes for <automatic links>
509  */
510 static struct _protocol {
511     char *name;
512     int   nlen;
513 } protocol[] = {
514 #define _aprotocol(x)	{ x, (sizeof x)-1 }
515     _aprotocol( "https:" ),
516     _aprotocol( "http:" ),
517     _aprotocol( "news:" ),
518     _aprotocol( "ftp:" ),
519 #undef _aprotocol
520 };
521 #define NRPROTOCOLS	(sizeof protocol / sizeof protocol[0])
522 
523 
524 static int
isautoprefix(char * text,int size)525 isautoprefix(char *text, int size)
526 {
527     int i;
528     struct _protocol *p;
529 
530     for (i=0, p=protocol; i < NRPROTOCOLS; i++, p++)
531 	if ( (size >= p->nlen) && strncasecmp(text, p->name, p->nlen) == 0 )
532 	    return 1;
533     return 0;
534 }
535 
536 
537 /*
538  * all the tag types that linkylinky can produce are
539  * defined by this structure.
540  */
541 typedef struct linkytype {
542     char      *pat;
543     int      szpat;
544     char *link_pfx;	/* tag prefix and link pointer  (eg: "<a href="\"" */
545     char *link_sfx;	/* link suffix			(eg: "\""          */
546     int        WxH;	/* this tag allows width x height arguments */
547     char *text_pfx;	/* text prefix                  (eg: ">"           */
548     char *text_sfx;	/* text suffix			(eg: "</a>"        */
549     int      flags;	/* reparse flags */
550     int      kind;	/* tag is url or something else? */
551 #define IS_URL	0x01
552 } linkytype;
553 
554 static linkytype imaget = { 0, 0, "<img src=\"", "\"",
555 			     1, " alt=\"", "\" />", MKD_NOIMAGE|MKD_TAGTEXT, IS_URL };
556 static linkytype linkt  = { 0, 0, "<a href=\"", "\"",
557                              0, ">", "</a>", MKD_NOLINKS, IS_URL };
558 
559 /*
560  * pseudo-protocols for [][];
561  *
562  * id: generates <a id="link">tag</a>
563  * class: generates <span class="link">tag</span>
564  * raw: just dump the link without any processing
565  */
566 static linkytype specials[] = {
567     { "id:", 3, "<span id=\"", "\"", 0, ">", "</span>", 0, 0 },
568     { "raw:", 4, 0, 0, 0, 0, 0, MKD_NOHTML, 0 },
569     { "lang:", 5, "<span lang=\"", "\"", 0, ">", "</span>", 0, 0 },
570     { "abbr:", 5, "<abbr title=\"", "\"", 0, ">", "</abbr>", 0, 0 },
571     { "class:", 6, "<span class=\"", "\"", 0, ">", "</span>", 0, 0 },
572 } ;
573 
574 #define NR(x)	(sizeof x / sizeof x[0])
575 
576 /* see if t contains one of our pseudo-protocols.
577  */
578 static linkytype *
pseudo(Cstring t)579 pseudo(Cstring t)
580 {
581     int i;
582     linkytype *r;
583 
584     for ( i=0, r=specials; i < NR(specials); i++,r++ ) {
585 	if ( (S(t) > r->szpat) && (strncasecmp(T(t), r->pat, r->szpat) == 0) )
586 	    return r;
587     }
588     return 0;
589 }
590 
591 
592 /* print out the start of an `img' or `a' tag, applying callbacks as needed.
593  */
594 static void
printlinkyref(MMIOT * f,linkytype * tag,char * link,int size)595 printlinkyref(MMIOT *f, linkytype *tag, char *link, int size)
596 {
597     char *edit;
598 
599     if ( is_flag_set(f->flags, IS_LABEL) )
600 	return;
601 
602     Qstring(tag->link_pfx, f);
603 
604     if ( tag->kind & IS_URL ) {
605 	if ( f->cb && f->cb->e_url && (edit = (*f->cb->e_url)(link, size, f->cb->e_data)) ) {
606 	    puturl(edit, strlen(edit), f, 0);
607 	    if ( f->cb->e_free ) (*f->cb->e_free)(edit, f->cb->e_data);
608 	}
609 	else
610 	    puturl(link + tag->szpat, size - tag->szpat, f, 0);
611     }
612     else
613 	___mkd_reparse(link + tag->szpat, size - tag->szpat, MKD_TAGTEXT, f, 0);
614 
615     Qstring(tag->link_sfx, f);
616 
617     if ( f->cb && f->cb->e_flags && (edit = (*f->cb->e_flags)(link, size, f->cb->e_data)) ) {
618 	Qchar(' ', f);
619 	Qstring(edit, f);
620 	if ( f->cb->e_free ) (*f->cb->e_free)(edit, f->cb->e_data);
621     }
622 } /* printlinkyref */
623 
624 
625 /* helper function for php markdown extra footnotes; allow the user to
626  * define a prefix tag instead of just `fn`
627  */
628 static char *
p_or_nothing(p)629 p_or_nothing(p)
630 MMIOT *p;
631 {
632     return p->ref_prefix ? p->ref_prefix : "fn";
633 }
634 
635 
636 /* php markdown extra/daring fireball style print footnotes
637  */
638 static int
extra_linky(MMIOT * f,Cstring text,Footnote * ref)639 extra_linky(MMIOT *f, Cstring text, Footnote *ref)
640 {
641     if ( ref->flags & REFERENCED )
642 	return 0;
643 
644     if ( f->flags & IS_LABEL )
645     	___mkd_reparse(T(text), S(text), linkt.flags, f, 0);
646     else {
647 	ref->flags |= REFERENCED;
648 	ref->refnumber = ++ f->footnotes->reference;
649 	Qprintf(f, "<sup id=\"%sref:%d\"><a href=\"#%s:%d\" rel=\"footnote\">%d</a></sup>",
650 		p_or_nothing(f), ref->refnumber,
651 		p_or_nothing(f), ref->refnumber, ref->refnumber);
652     }
653     return 1;
654 } /* extra_linky */
655 
656 
657 
658 /* check a url (or url fragment to see that it begins with a known good
659  * protocol (or no protocol at all)
660  */
661 static int
safelink(Cstring link)662 safelink(Cstring link)
663 {
664     char *p, *colon;
665 
666     if ( T(link) == 0 )	/* no link; safe */
667 	return 1;
668 
669     p = T(link);
670     if ( (colon = memchr(p, ':', S(link))) == 0 )
671 	return 1; /* no protocol specified: safe */
672 
673     if ( !isalpha(*p) )	/* protocol/method is [alpha][alnum or '+.-'] */
674 	return 1;
675     while ( ++p < colon )
676 	if ( !(isalnum(*p) || *p == '.' || *p == '+' || *p == '-') )
677 	    return 1;
678 
679     return isautoprefix(T(link), S(link));
680 }
681 
682 
683 /* print out a linky (or fail if it's Not Allowed)
684  */
685 static int
linkyformat(MMIOT * f,Cstring text,int image,Footnote * ref)686 linkyformat(MMIOT *f, Cstring text, int image, Footnote *ref)
687 {
688     linkytype *tag;
689 
690 
691     if ( image )
692 	tag = &imaget;
693     else if ( tag = pseudo(ref->link) ) {
694 	if ( is_flag_set(f->flags, MKD_NO_EXT) || is_flag_set(f->flags, MKD_SAFELINK) )
695 	    return 0;
696     }
697     else if ( is_flag_set(f->flags, MKD_SAFELINK) && !safelink(ref->link) )
698 	/* if MKD_SAFELINK, only accept links that are local or
699 	 * a well-known protocol
700 	 */
701 	return 0;
702     else
703 	tag = &linkt;
704 
705     if ( f->flags & tag->flags )
706 	return 0;
707 
708     if ( is_flag_set(f->flags, IS_LABEL) )
709 	___mkd_reparse(T(text), S(text), tag->flags, f, 0);
710     else if ( tag->link_pfx ) {
711 	printlinkyref(f, tag, T(ref->link), S(ref->link));
712 
713 	if ( tag->WxH ) {
714 	    if ( ref->height ) Qprintf(f," height=\"%d\"", ref->height);
715 	    if ( ref->width ) Qprintf(f, " width=\"%d\"", ref->width);
716 	}
717 
718 	if ( S(ref->title) ) {
719 	    Qstring(" title=\"", f);
720 	    ___mkd_reparse(T(ref->title), S(ref->title), MKD_TAGTEXT, f, 0);
721 	    Qchar('"', f);
722 	}
723 
724 	Qstring(tag->text_pfx, f);
725 	___mkd_reparse(T(text), S(text), tag->flags, f, 0);
726 	Qstring(tag->text_sfx, f);
727     }
728     else
729 	Qwrite(T(ref->link) + tag->szpat, S(ref->link) - tag->szpat, f);
730 
731     return 1;
732 } /* linkyformat */
733 
734 
735 /*
736  * process embedded links and images
737  */
738 static int
linkylinky(int image,MMIOT * f)739 linkylinky(int image, MMIOT *f)
740 {
741     int start = mmiottell(f);
742     Cstring name;
743     Footnote key, *ref;
744 
745     int status = 0;
746     int extra_footnote = 0;
747 
748     CREATE(name);
749     memset(&key, 0, sizeof key);
750 
751     if ( linkylabel(f, &name) ) {
752 	if ( peek(f,1) == '(' ) {
753 	    pull(f);
754 	    if ( linkyurl(f, image, &key) )
755 		status = linkyformat(f, name, image, &key);
756 	}
757 	else {
758 	    int goodlink, implicit_mark = mmiottell(f);
759 
760 	    if ( isspace(peek(f,1)) )
761 		pull(f);
762 
763 	    if ( peek(f,1) == '[' ) {
764 		pull(f);	/* consume leading '[' */
765 		goodlink = linkylabel(f, &key.tag);
766 	    }
767 	    else {
768 		/* new markdown implicit name syntax doesn't
769 		 * require a second []
770 		 */
771 		mmiotseek(f, implicit_mark);
772 		goodlink = !is_flag_set(f->flags, MKD_1_COMPAT);
773 
774 		if ( is_flag_set(f->flags, MKD_EXTRA_FOOTNOTE) && (!image) && S(name) && T(name)[0] == '^' )
775 		    extra_footnote = 1;
776 	    }
777 
778 	    if ( goodlink ) {
779 		if ( !S(key.tag) ) {
780 		    DELETE(key.tag);
781 		    T(key.tag) = T(name);
782 		    S(key.tag) = S(name);
783 		}
784 
785 		if ( ref = bsearch(&key, T(f->footnotes->note),
786 					 S(f->footnotes->note),
787 					 sizeof key, (stfu)__mkd_footsort) ) {
788 		    if ( extra_footnote )
789 			status = extra_linky(f,name,ref);
790 		    else
791 			status = linkyformat(f, name, image, ref);
792 		}
793 	    }
794 	}
795     }
796 
797     DELETE(name);
798     ___mkd_freefootnote(&key);
799 
800     if ( status == 0 )
801 	mmiotseek(f, start);
802 
803     return status;
804 }
805 
806 
807 /* write a character to output, doing text escapes ( & -> &amp;,
808  *                                          > -> &gt; < -> &lt; )
809  */
810 static void
cputc(int c,MMIOT * f)811 cputc(int c, MMIOT *f)
812 {
813     switch (c) {
814     case '&':   Qstring("&amp;", f); break;
815     case '>':   Qstring("&gt;", f); break;
816     case '<':   Qstring("&lt;", f); break;
817     default :   Qchar(c, f); break;
818     }
819 }
820 
821 
822 /*
823  * convert an email address to a string of nonsense
824  */
825 static void
mangle(char * s,int len,MMIOT * f)826 mangle(char *s, int len, MMIOT *f)
827 {
828     while ( len-- > 0 ) {
829 #if DEBIAN_GLITCH
830 	Qprintf(f, "&#%02d;", *((unsigned char*)(s++)) );
831 #else
832 	Qstring("&#", f);
833 	Qprintf(f, COINTOSS() ? "x%02x;" : "%02d;", *((unsigned char*)(s++)) );
834 #endif
835     }
836 }
837 
838 
839 /* nrticks() -- count up a row of tick marks
840  */
841 static int
nrticks(int offset,int tickchar,MMIOT * f)842 nrticks(int offset, int tickchar, MMIOT *f)
843 {
844     int  tick = 0;
845 
846     while ( peek(f, offset+tick) == tickchar ) tick++;
847 
848     return tick;
849 } /* nrticks */
850 
851 
852 /* matchticks() -- match a certain # of ticks, and if that fails
853  *                 match the largest subset of those ticks.
854  *
855  *                 if a subset was matched, return the # of ticks
856  *		   that were matched.
857  */
858 static int
matchticks(MMIOT * f,int tickchar,int ticks,int * endticks)859 matchticks(MMIOT *f, int tickchar, int ticks, int *endticks)
860 {
861     int size, count, c;
862     int subsize=0, subtick=0;
863 
864     *endticks = ticks;
865     for (size = 0; (c=peek(f,size+ticks)) != EOF; size ++) {
866 	if ( (c == tickchar) && ( count = nrticks(size+ticks,tickchar,f)) ) {
867 	    if ( count == ticks )
868 		return size;
869 	    else if ( count ) {
870 		if ( (count > subtick) && (count < ticks) ) {
871 		    subsize = size;
872 		    subtick = count;
873 		}
874 		size += count;
875 	    }
876 	}
877     }
878     if ( subsize ) {
879 	*endticks = subtick;
880 	return subsize;
881     }
882     return 0;
883 } /* matchticks */
884 
885 
886 /* code() -- write a string out as code. The only characters that have
887  *           special meaning in a code block are * `<' and `&' , which
888  *           are /always/ expanded to &lt; and &amp;
889  */
890 static void
code(MMIOT * f,char * s,int length)891 code(MMIOT *f, char *s, int length)
892 {
893     int i,c;
894 
895     for ( i=0; i < length; i++ )
896 	if ( (c = s[i]) == MKD_EOLN)  /* expand back to 2 spaces */
897 	    Qstring("  ", f);
898 	else if ( c == '\\' && (i < length-1) && escaped(f, s[i+1]) )
899 	    cputc(s[++i], f);
900 	else
901 	    cputc(c, f);
902 } /* code */
903 
904 /*  delspan() -- write out a chunk of text, blocking with <del>...</del>
905  */
906 static void
delspan(MMIOT * f,int size)907 delspan(MMIOT *f, int size)
908 {
909     Qstring("<del>", f);
910     ___mkd_reparse(cursor(f)-1, size, 0, f, 0);
911     Qstring("</del>", f);
912 }
913 
914 
915 /*  codespan() -- write out a chunk of text as code, trimming one
916  *                space off the front and/or back as appropriate.
917  */
918 static void
codespan(MMIOT * f,int size)919 codespan(MMIOT *f, int size)
920 {
921     int i=0;
922 
923     if ( size > 1 && peek(f, size-1) == ' ' ) --size;
924     if ( peek(f,i) == ' ' ) ++i, --size;
925 
926     Qstring("<code>", f);
927     code(f, cursor(f)+(i-1), size);
928     Qstring("</code>", f);
929 } /* codespan */
930 
931 
932 /* before letting a tag through, validate against
933  * MKD_NOLINKS and MKD_NOIMAGE
934  */
935 static int
forbidden_tag(MMIOT * f)936 forbidden_tag(MMIOT *f)
937 {
938     int c = toupper(peek(f, 1));
939 
940     if ( is_flag_set(f->flags, MKD_NOHTML) )
941 	return 1;
942 
943     if ( c == 'A' && is_flag_set(f->flags, MKD_NOLINKS) && !isthisalnum(f,2) )
944 	return 1;
945     if ( c == 'I' && is_flag_set(f->flags, MKD_NOIMAGE)
946 		  && strncasecmp(cursor(f)+1, "MG", 2) == 0
947 		  && !isthisalnum(f,4) )
948 	return 1;
949     return 0;
950 }
951 
952 
953 /* Check a string to see if it looks like a mail address
954  * "looks like a mail address" means alphanumeric + some
955  * specials, then a `@`, then alphanumeric + some specials,
956  * but with a `.`
957  */
958 static int
maybe_address(char * p,int size)959 maybe_address(char *p, int size)
960 {
961     int ok = 0;
962 
963     for ( ;size && (isalnum(*p) || strchr("._-+*", *p)); ++p, --size)
964 	;
965 
966     if ( ! (size && *p == '@') )
967 	return 0;
968 
969     --size, ++p;
970 
971     if ( size && *p == '.' ) return 0;
972 
973     for ( ;size && (isalnum(*p) || strchr("._-+", *p)); ++p, --size )
974 	if ( *p == '.' && size > 1 ) ok = 1;
975 
976     return size ? 0 : ok;
977 }
978 
979 
980 /* The size-length token at cursor(f) is either a mailto:, an
981  * implicit mailto:, one of the approved url protocols, or just
982  * plain old text.   If it's a mailto: or an approved protocol,
983  * linkify it, otherwise say "no"
984  */
985 static int
process_possible_link(MMIOT * f,int size)986 process_possible_link(MMIOT *f, int size)
987 {
988     int address= 0;
989     int mailto = 0;
990     char *text = cursor(f);
991 
992     if ( is_flag_set(f->flags, MKD_NOLINKS) ) return 0;
993 
994     if ( (size > 7) && strncasecmp(text, "mailto:", 7) == 0 ) {
995 	/* if it says it's a mailto, it's a mailto -- who am
996 	 * I to second-guess the user?
997 	 */
998 	address = 1;
999 	mailto = 7; 	/* 7 is the length of "mailto:"; we need this */
1000     }
1001     else
1002 	address = maybe_address(text, size);
1003 
1004     if ( address ) {
1005 	Qstring("<a href=\"", f);
1006 	if ( !mailto ) {
1007 	    /* supply a mailto: protocol if one wasn't attached */
1008 	    mangle("mailto:", 7, f);
1009 	}
1010 	mangle(text, size, f);
1011 	Qstring("\">", f);
1012 	mangle(text+mailto, size-mailto, f);
1013 	Qstring("</a>", f);
1014 	return 1;
1015     }
1016     else if ( isautoprefix(text, size) ) {
1017 	printlinkyref(f, &linkt, text, size);
1018 	Qchar('>', f);
1019 	puturl(text,size,f, 1);
1020 	Qstring("</a>", f);
1021 	return 1;
1022     }
1023     return 0;
1024 } /* process_possible_link */
1025 
1026 
1027 /* a < may be just a regular character, the start of an embedded html
1028  * tag, or the start of an <automatic link>.    If it's an automatic
1029  * link, we also need to know if it's an email address because if it
1030  * is we need to mangle it in our futile attempt to cut down on the
1031  * spaminess of the rendered page.
1032  */
1033 static int
maybe_tag_or_link(MMIOT * f)1034 maybe_tag_or_link(MMIOT *f)
1035 {
1036     int c, size;
1037     int maybetag = 1;
1038 
1039     if ( is_flag_set(f->flags, MKD_TAGTEXT) )
1040 	return 0;
1041 
1042     for ( size=0; (c = peek(f, size+1)) != '>'; size++) {
1043 	if ( c == EOF )
1044 	    return 0;
1045 	else if ( c == '\\' ) {
1046 	    maybetag=0;
1047 	    if ( peek(f, size+2) != EOF )
1048 		size++;
1049 	}
1050 	else if ( isspace(c) )
1051 	    break;
1052 	else if ( ! (c == '/'
1053 		     || (is_flag_set(f->flags, MKD_GITHUBTAGS) && (c == '-' || c == '_'))
1054 		     || isalnum(c) ) )
1055 	    maybetag=0;
1056     }
1057 
1058     if ( size ) {
1059 	if ( maybetag || (size >= 3 && strncmp(cursor(f), "!--", 3) == 0) ) {
1060 
1061 	    /* It is not a html tag unless we find the closing '>' in
1062 	     * the same block.
1063 	     */
1064 	    while ( (c = peek(f, size+1)) != '>' )
1065 		if ( c == EOF )
1066 		    return 0;
1067 		else
1068 		    size++;
1069 
1070 	    if ( forbidden_tag(f) )
1071 		return 0;
1072 
1073 	    Qchar('<', f);
1074 	    while ( ((c = peek(f, 1)) != EOF) && (c != '>') )
1075 		Qchar(pull(f), f);
1076 	    return 1;
1077 	}
1078 	else if ( !isspace(c) && process_possible_link(f, size) ) {
1079 	    shift(f, size+1);
1080 	    return 1;
1081 	}
1082     }
1083 
1084     return 0;
1085 }
1086 
1087 
1088 /* autolinking means that all inline html is <a href'ified>.   A
1089  * autolink url is alphanumerics, slashes, periods, underscores,
1090  * the at sign, colon, and the % character.
1091  */
1092 static int
maybe_autolink(MMIOT * f)1093 maybe_autolink(MMIOT *f)
1094 {
1095     register int c;
1096     int size;
1097 
1098     /* greedily scan forward for the end of a legitimate link.
1099      */
1100     for ( size=0; (c=peek(f, size+1)) != EOF; size++ ) {
1101 	if ( c == '\\' ) {
1102 	     if ( peek(f, size+2) != EOF )
1103 		++size;
1104 	}
1105 	else if ( c & 0x80 )	/* HACK: ignore utf-8 extended characters */
1106 	    continue;
1107 	else if ( isspace(c) || strchr("'\"()[]{}<>`", c) || c == MKD_EOLN )
1108 	    break;
1109     }
1110 
1111     if ( (size > 1) && process_possible_link(f, size) ) {
1112 	shift(f, size);
1113 	return 1;
1114     }
1115     return 0;
1116 }
1117 
1118 
1119 /* smartyquote code that's common for single and double quotes
1120  */
1121 static int
smartyquote(int * flags,char typeofquote,MMIOT * f)1122 smartyquote(int *flags, char typeofquote, MMIOT *f)
1123 {
1124     int bit = (typeofquote == 's') ? 0x01 : 0x02;
1125 
1126     if ( bit & (*flags) ) {
1127 	if ( isthisnonword(f,1) ) {
1128 	    Qprintf(f, "&r%cquo;", typeofquote);
1129 	    (*flags) &= ~bit;
1130 	    return 1;
1131 	}
1132     }
1133     else if ( isthisnonword(f,-1) && peek(f,1) != EOF ) {
1134 	Qprintf(f, "&l%cquo;", typeofquote);
1135 	(*flags) |= bit;
1136 	return 1;
1137     }
1138     return 0;
1139 }
1140 
1141 
1142 static int
islike(MMIOT * f,char * s)1143 islike(MMIOT *f, char *s)
1144 {
1145     int len;
1146     int i;
1147 
1148     if ( s[0] == '|' ) {
1149 	if ( !isthisnonword(f, -1) )
1150 	    return 0;
1151        ++s;
1152     }
1153 
1154     if ( !(len = strlen(s)) )
1155 	return 0;
1156 
1157     if ( s[len-1] == '|' ) {
1158 	if ( !isthisnonword(f,len-1) )
1159 	    return 0;
1160 	len--;
1161     }
1162 
1163     for (i=1; i < len; i++)
1164 	if (tolower(peek(f,i)) != s[i])
1165 	    return 0;
1166     return 1;
1167 }
1168 
1169 
1170 static struct smarties {
1171     char c0;
1172     char *pat;
1173     char *entity;
1174     int shift;
1175 } smarties[] = {
1176     { '\'', "'s|",      "rsquo",  0 },
1177     { '\'', "'t|",      "rsquo",  0 },
1178     { '\'', "'re|",     "rsquo",  0 },
1179     { '\'', "'ll|",     "rsquo",  0 },
1180     { '\'', "'ve|",     "rsquo",  0 },
1181     { '\'', "'m|",      "rsquo",  0 },
1182     { '\'', "'d|",      "rsquo",  0 },
1183     { '-',  "---",      "mdash",  2 },
1184     { '-',  "--",       "ndash",  1 },
1185     { '.',  "...",      "hellip", 2 },
1186     { '.',  ". . .",    "hellip", 4 },
1187     { '(',  "(c)",      "copy",   2 },
1188     { '(',  "(r)",      "reg",    2 },
1189     { '(',  "(tm)",     "trade",  3 },
1190     { '3',  "|3/4|",    "frac34", 2 },
1191     { '3',  "|3/4ths|", "frac34", 2 },
1192     { '1',  "|1/2|",    "frac12", 2 },
1193     { '1',  "|1/4|",    "frac14", 2 },
1194     { '1',  "|1/4th|",  "frac14", 2 },
1195     { '&',  "&#0;",      0,       3 },
1196 } ;
1197 #define NRSMART ( sizeof smarties / sizeof smarties[0] )
1198 
1199 
1200 /* Smarty-pants-style chrome for quotes, -, ellipses, and (r)(c)(tm)
1201  */
1202 static int
smartypants(int c,int * flags,MMIOT * f)1203 smartypants(int c, int *flags, MMIOT *f)
1204 {
1205     int i;
1206 
1207     if ( is_flag_set(f->flags, MKD_NOPANTS)
1208       || is_flag_set(f->flags, MKD_TAGTEXT)
1209       || is_flag_set(f->flags, IS_LABEL) )
1210 	return 0;
1211 
1212     for ( i=0; i < NRSMART; i++)
1213 	if ( (c == smarties[i].c0) && islike(f, smarties[i].pat) ) {
1214 	    if ( smarties[i].entity )
1215 		Qprintf(f, "&%s;", smarties[i].entity);
1216 	    shift(f, smarties[i].shift);
1217 	    return 1;
1218 	}
1219 
1220     switch (c) {
1221     case '<' :  return 0;
1222     case '\'':  if ( smartyquote(flags, 's', f) ) return 1;
1223 		break;
1224 
1225     case '"':	if ( smartyquote(flags, 'd', f) ) return 1;
1226 		break;
1227 
1228     case '`':   if ( peek(f, 1) == '`' ) {
1229 		    int j = 2;
1230 
1231 		    while ( (c=peek(f,j)) != EOF ) {
1232 			if ( c == '\\' )
1233 			    j += 2;
1234 			else if ( c == '`' )
1235 			    break;
1236 			else if ( c == '\'' && peek(f, j+1) == '\'' ) {
1237 			    Qstring("&ldquo;", f);
1238 			    ___mkd_reparse(cursor(f)+1, j-2, 0, f, 0);
1239 			    Qstring("&rdquo;", f);
1240 			    shift(f,j+1);
1241 			    return 1;
1242 			}
1243 			else ++j;
1244 		    }
1245 
1246 		}
1247 		break;
1248     }
1249     return 0;
1250 } /* smartypants */
1251 
1252 
1253 /* process latex with arbitrary 2-character ( $$ .. $$, \[ .. \], \( .. \)
1254  * delimiters
1255  */
1256 static int
mathhandler(MMIOT * f,int e1,int e2)1257 mathhandler(MMIOT *f, int e1, int e2)
1258 {
1259     int i = 0;
1260 
1261     while(peek(f, ++i) != EOF) {
1262         if (peek(f, i) == e1 && peek(f, i+1) == e2) {
1263 	    cputc(peek(f,-1), f);
1264 	    cputc(peek(f, 0), f);
1265 	    while ( i-- > -1 )
1266 		cputc(pull(f), f);
1267             return 1;
1268         }
1269     }
1270     return 0;
1271 }
1272 
1273 
1274 /* process a body of text encased in some sort of tick marks.   If it
1275  * works, generate the output and return 1, otherwise just return 0 and
1276  * let the caller figure it out.
1277  */
1278 static int
tickhandler(MMIOT * f,int tickchar,int minticks,int allow_space,spanhandler spanner)1279 tickhandler(MMIOT *f, int tickchar, int minticks, int allow_space, spanhandler spanner)
1280 {
1281     int endticks, size;
1282     int tick = nrticks(0, tickchar, f);
1283 
1284     if ( !allow_space && isspace(peek(f,tick)) )
1285 	return 0;
1286 
1287     if ( (tick >= minticks) && (size = matchticks(f,tickchar,tick,&endticks)) ) {
1288 	if ( endticks < tick ) {
1289 	    size += (tick - endticks);
1290 	    tick = endticks;
1291 	}
1292 
1293 	shift(f, tick);
1294 	(*spanner)(f,size);
1295 	shift(f, size+tick-1);
1296 	return 1;
1297     }
1298     return 0;
1299 }
1300 
1301 #define tag_text(f)	is_flag_set(f->flags, MKD_TAGTEXT)
1302 
1303 
1304 static void
text(MMIOT * f)1305 text(MMIOT *f)
1306 {
1307     int c, j;
1308     int rep;
1309     int smartyflags = 0;
1310 
1311     while (1) {
1312         if ( is_flag_set(f->flags, MKD_AUTOLINK) && isalpha(peek(f,1)) && !tag_text(f) )
1313 	    maybe_autolink(f);
1314 
1315         c = pull(f);
1316 
1317         if (c == EOF)
1318           break;
1319 
1320 	if ( smartypants(c, &smartyflags, f) )
1321 	    continue;
1322 	switch (c) {
1323 	case 0:     break;
1324 
1325 	case MKD_EOLN:
1326 		    Qstring(tag_text(f) ? "  " : "<br/>", f);
1327 		    break;
1328 
1329 	case '>':   if ( tag_text(f) )
1330 			Qstring("&gt;", f);
1331 		    else
1332 			Qchar(c, f);
1333 		    break;
1334 
1335 	case '"':   if ( tag_text(f) )
1336 			Qstring("&quot;", f);
1337 		    else
1338 			Qchar(c, f);
1339 		    break;
1340 
1341 	case '!':   if ( peek(f,1) == '[' ) {
1342 			pull(f);
1343 			if ( tag_text(f) || !linkylinky(1, f) )
1344 			    Qstring("![", f);
1345 		    }
1346 		    else
1347 			Qchar(c, f);
1348 		    break;
1349 
1350 	case '[':   if ( tag_text(f) || !linkylinky(0, f) )
1351 			Qchar(c, f);
1352 		    break;
1353 	/* A^B -> A<sup>B</sup> */
1354 	case '^':   if ( is_flag_set(f->flags, MKD_NOSUPERSCRIPT)
1355 			    || is_flag_set(f->flags, MKD_STRICT)
1356 			    || is_flag_set(f->flags, MKD_TAGTEXT)
1357 			    || (f->last == 0)
1358 			    || ((ispunct(f->last) || isspace(f->last))
1359 						    && f->last != ')')
1360 			    || isthisspace(f,1) )
1361 			Qchar(c,f);
1362 		    else {
1363 			char *sup = cursor(f);
1364 			int len = 0;
1365 
1366 			if ( peek(f,1) == '(' ) {
1367 			    int here = mmiottell(f);
1368 			    pull(f);
1369 
1370 			    if ( (len = parenthetical('(',')',f)) <= 0 ) {
1371 				mmiotseek(f,here);
1372 				Qchar(c, f);
1373 				break;
1374 			    }
1375 			    sup++;
1376 			}
1377 			else {
1378 			    while ( isthisalnum(f,1+len) )
1379 				++len;
1380 			    if ( !len ) {
1381 				Qchar(c,f);
1382 				break;
1383 			    }
1384 			    shift(f,len);
1385 			}
1386 			Qstring("<sup>",f);
1387 			___mkd_reparse(sup, len, 0, f, "()");
1388 			Qstring("</sup>", f);
1389 		    }
1390 		    break;
1391 	case '_':
1392 	/* Underscores don't count if they're in the middle of a word */
1393 		    if ( !(is_flag_set(f->flags, MKD_NORELAXED) || is_flag_set(f->flags, MKD_STRICT))
1394 				&& isthisalnum(f,-1) && isthisalnum(f,1) ) {
1395 			Qchar(c, f);
1396 			break;
1397 		    }
1398 	case '*':
1399 	/* Underscores & stars don't count if they're out in the middle
1400 	 * of whitespace */
1401 		    if ( isthisspace(f,-1) && isthisspace(f,1) ) {
1402 			Qchar(c, f);
1403 			break;
1404 		    }
1405 		    /* else fall into the regular old emphasis case */
1406 		    if ( tag_text(f) )
1407 			Qchar(c, f);
1408 		    else {
1409 			for (rep = 1; peek(f,1) == c; pull(f) )
1410 			    ++rep;
1411 			Qem(f,c,rep);
1412 		    }
1413 		    break;
1414 
1415 	case '~':   if ( is_flag_set(f->flags, MKD_NOSTRIKETHROUGH)
1416 			 || is_flag_set(f->flags, MKD_STRICT)
1417 			 || is_flag_set(f->flags, MKD_TAGTEXT)
1418 			 || ! tickhandler(f,c,2,0, delspan) )
1419 			Qchar(c, f);
1420 		    break;
1421 
1422 	case '`':   if ( tag_text(f) || !tickhandler(f,c,1,1,codespan) )
1423 			Qchar(c, f);
1424 		    break;
1425 
1426 	case '\\':  switch ( c = pull(f) ) {
1427 		    case '&':   Qstring("&amp;", f);
1428 				break;
1429 		    case '<':   c = peek(f,1);
1430 				if ( (c == EOF) || isspace(c) )
1431 				    Qstring("&lt;", f);
1432 				else {
1433 				    /* Markdown.pl does not escape <[nonwhite]
1434 				     * sequences */
1435 				    Qchar('\\', f);
1436 				    shift(f, -1);
1437 				}
1438 
1439 				break;
1440 		    case '^':   if ( is_flag_set(f->flags, MKD_STRICT)
1441 					|| is_flag_set(f->flags, MKD_NOSUPERSCRIPT) ) {
1442 				    Qchar('\\', f);
1443 				    shift(f,-1);
1444 				    break;
1445 				}
1446 				Qchar(c, f);
1447 				break;
1448 
1449 		    case ':': case '|':
1450 				if ( is_flag_set(f->flags, MKD_NOTABLES) ) {
1451 				    Qchar('\\', f);
1452 				    shift(f,-1);
1453 				    break;
1454 				}
1455 				Qchar(c, f);
1456 				break;
1457 
1458 		    case EOF:	Qchar('\\', f);
1459 				break;
1460 
1461 		    case '[':
1462 		    case '(':   if ( is_flag_set(f->flags, MKD_LATEX)
1463 				   && mathhandler(f, '\\', (c =='(')?')':']') )
1464 				    break;
1465 				/* else fall through to default */
1466 
1467 		    default:    if ( escaped(f,c) ||
1468 				     strchr(">#.-+{}]![*_\\()`", c) )
1469 				    Qchar(c, f);
1470 				else {
1471 				    Qchar('\\', f);
1472 				    shift(f, -1);
1473 				}
1474 				break;
1475 		    }
1476 		    break;
1477 
1478 	case '<':   if ( !maybe_tag_or_link(f) )
1479 			Qstring("&lt;", f);
1480 		    break;
1481 
1482 	case '&':   j = (peek(f,1) == '#' ) ? 2 : 1;
1483 		    while ( isthisalnum(f,j) )
1484 			++j;
1485 
1486 		    if ( peek(f,j) != ';' )
1487 			Qstring("&amp;", f);
1488 		    else
1489 			Qchar(c, f);
1490 		    break;
1491 
1492 	case '$':   if ( is_flag_set(f->flags, MKD_LATEX) && (peek(f, 1) == '$') ) {
1493 			pull(f);
1494 			if ( mathhandler(f, '$', '$') )
1495 			    break;
1496 			Qchar('$', f);
1497 		    }
1498 		    /* fall through to default */
1499 
1500 	default:    f->last = c;
1501 		    Qchar(c, f);
1502 		    break;
1503 	}
1504     }
1505     /* truncate the input string after we've finished processing it */
1506     S(f->in) = f->isp = 0;
1507 } /* text */
1508 
1509 
1510 /* print a header block
1511  */
1512 static void
printheader(Paragraph * pp,MMIOT * f)1513 printheader(Paragraph *pp, MMIOT *f)
1514 {
1515     if ( is_flag_set(f->flags, MKD_IDANCHOR) ) {
1516 	Qprintf(f, "<h%d", pp->hnumber);
1517 	if ( is_flag_set(f->flags, MKD_TOC) ) {
1518 	    Qstring(" id=\"", f);
1519 	    Qanchor(pp->text, f);
1520 	    Qchar('"', f);
1521 	}
1522 	Qchar('>', f);
1523     } else {
1524 	if ( is_flag_set(f->flags, MKD_TOC) ) {
1525 	    Qstring("<a name=\"", f);
1526 	    Qanchor(pp->text, f);
1527 	    Qstring("\"></a>\n", f);
1528 	}
1529 	Qprintf(f, "<h%d>", pp->hnumber);
1530     }
1531     push(T(pp->text->text), S(pp->text->text), f);
1532     text(f);
1533     Qprintf(f, "</h%d>", pp->hnumber);
1534 }
1535 
1536 
1537 enum e_alignments { a_NONE, a_CENTER, a_LEFT, a_RIGHT };
1538 
1539 static char* alignments[] = { "", " style=\"text-align:center;\"",
1540 				  " style=\"text-align:left;\"",
1541 				  " style=\"text-align:right;\"" };
1542 
1543 typedef STRING(int) Istring;
1544 
1545 static int
splat(Line * p,char * block,Istring align,int force,MMIOT * f)1546 splat(Line *p, char *block, Istring align, int force, MMIOT *f)
1547 {
1548     int first,
1549 	idx = p->dle,
1550 	colno = 0;
1551 
1552 
1553     ___mkd_tidy(&p->text);
1554     if ( T(p->text)[S(p->text)-1] == '|' )
1555 	--S(p->text);
1556 
1557     Qstring("<tr>\n", f);
1558     while ( idx < S(p->text) ) {
1559 	first = idx;
1560 	if ( force && (colno >= S(align)-1) )
1561 	    idx = S(p->text);
1562 	else
1563 	    while ( (idx < S(p->text)) && (T(p->text)[idx] != '|') ) {
1564 		if ( T(p->text)[idx] == '\\' )
1565 		    ++idx;
1566 		++idx;
1567 	    }
1568 
1569 	Qprintf(f, "<%s%s>",
1570 		   block,
1571 		   alignments[ (colno < S(align)) ? T(align)[colno] : a_NONE ]);
1572 	___mkd_reparse(T(p->text)+first, idx-first, 0, f, "|");
1573 	Qprintf(f, "</%s>\n", block);
1574 	idx++;
1575 	colno++;
1576     }
1577     if ( force )
1578 	while (colno < S(align) ) {
1579 	    Qprintf(f, "<%s></%s>\n", block, block);
1580 	    ++colno;
1581 	}
1582     Qstring("</tr>\n", f);
1583     return colno;
1584 }
1585 
1586 
1587 static int
printtable(Paragraph * pp,MMIOT * f)1588 printtable(Paragraph *pp, MMIOT *f)
1589 {
1590     /* header, dashes, then lines of content */
1591 
1592     Line *hdr, *dash, *body;
1593     Istring align;
1594     int hcols,start;
1595     char *p;
1596     enum e_alignments it;
1597 
1598     hdr = pp->text;
1599     dash= hdr->next;
1600     body= dash->next;
1601 
1602     if ( T(hdr->text)[hdr->dle] == '|' ) {
1603 	/* trim leading pipe off all lines
1604 	 */
1605 	Line *r;
1606 	for ( r = pp->text; r; r = r->next )
1607 	    r->dle ++;
1608     }
1609 
1610     /* figure out cell alignments */
1611 
1612     CREATE(align);
1613 
1614     for (p=T(dash->text), start=dash->dle; start < S(dash->text); ) {
1615 	char first, last;
1616 	int end;
1617 
1618 	last=first=0;
1619 	for (end=start ; (end < S(dash->text)) && p[end] != '|'; ++ end ) {
1620 	    if ( p[end] == '\\' )
1621 		++ end;
1622 	    else if ( !isspace(p[end]) ) {
1623 		if ( !first) first = p[end];
1624 		last = p[end];
1625 	    }
1626 	}
1627 	it = ( first == ':' ) ? (( last == ':') ? a_CENTER : a_LEFT)
1628 			      : (( last == ':') ? a_RIGHT : a_NONE );
1629 
1630 	EXPAND(align) = it;
1631 	start = 1+end;
1632     }
1633 
1634     Qstring("<table>\n", f);
1635     Qstring("<thead>\n", f);
1636     hcols = splat(hdr, "th", align, 0, f);
1637     Qstring("</thead>\n", f);
1638 
1639     if ( hcols < S(align) )
1640 	S(align) = hcols;
1641     else
1642 	while ( hcols > S(align) )
1643 	    EXPAND(align) = a_NONE;
1644 
1645     Qstring("<tbody>\n", f);
1646     for ( ; body; body = body->next)
1647 	splat(body, "td", align, 1, f);
1648     Qstring("</tbody>\n", f);
1649     Qstring("</table>\n", f);
1650 
1651     DELETE(align);
1652     return 1;
1653 }
1654 
1655 
1656 static int
printblock(Paragraph * pp,MMIOT * f)1657 printblock(Paragraph *pp, MMIOT *f)
1658 {
1659     static char *Begin[] = { "", "<p>", "<p style=\"text-align:center;\">"  };
1660     static char *End[]   = { "", "</p>","</p>" };
1661     Line *t = pp->text;
1662     int align = pp->align;
1663 
1664     while (t) {
1665 	if ( S(t->text) ) {
1666 	    if ( t->next && S(t->text) > 2
1667 			 && T(t->text)[S(t->text)-2] == ' '
1668 			 && T(t->text)[S(t->text)-1] == ' ' ) {
1669 		push(T(t->text), S(t->text)-2, f);
1670 		pushc(MKD_EOLN, f);
1671 		pushc('\n', f);
1672 	    }
1673 	    else {
1674 		___mkd_tidy(&t->text);
1675 		push(T(t->text), S(t->text), f);
1676 		if ( t->next )
1677 		    pushc('\n', f);
1678 	    }
1679 	}
1680 	t = t->next;
1681     }
1682     Qstring(Begin[align], f);
1683     text(f);
1684     Qstring(End[align], f);
1685     return 1;
1686 }
1687 
1688 
1689 static void
printcode(Line * t,char * lang,MMIOT * f)1690 printcode(Line *t, char *lang, MMIOT *f)
1691 {
1692     int blanks;
1693 
1694     if ( f->cb->e_codefmt ) {
1695 	/* external code block formatter;  copy the text into a buffer,
1696 	 * call the formatter to style it, then dump that styled text
1697 	 * directly to the queue
1698 	 */
1699 	char *text;
1700 	char *fmt;
1701 	int size, copy_p;
1702 	Line *p;
1703 
1704 	for (size=0, p = t; p; p = p->next )
1705 	    size += 1+S(p->text);
1706 
1707 	text = malloc(1+size);
1708 
1709 	for ( copy_p = 0; t ; t = t->next ) {
1710 	    memcpy(text+copy_p, T(t->text), S(t->text));
1711 	    copy_p += S(t->text);
1712 	    text[copy_p++] = '\n';
1713 	}
1714 	text[copy_p] = 0;
1715 
1716 	fmt = (*(f->cb->e_codefmt))(text, copy_p, (lang && lang[0]) ? lang : 0);
1717 	free(text);
1718 
1719 	if ( fmt ) {
1720 	    Qwrite(fmt, strlen(fmt), f);
1721 	    if ( f->cb->e_free )
1722 		(*(f->cb->e_free))(fmt, f->cb->e_data);
1723 	    return;
1724 	}
1725 	/* otherwise the external formatter failed and we need to
1726 	 * fall back to the traditional codeblock format
1727 	 */
1728     }
1729 
1730     Qstring("<pre><code", f);
1731     if (lang && lang[0]) {
1732       Qstring(" class=\"", f);
1733       Qstring(lang, f);
1734       Qstring("\"", f);
1735     }
1736     Qstring(">", f);
1737     for ( blanks = 0; t ; t = t->next ) {
1738 	if ( S(t->text) > t->dle ) {
1739 	    while ( blanks ) {
1740 		Qchar('\n', f);
1741 		--blanks;
1742 	    }
1743 	    code(f, T(t->text), S(t->text));
1744 	    Qchar('\n', f);
1745 	}
1746 	else blanks++;
1747     }
1748     Qstring("</code></pre>", f);
1749 }
1750 
1751 
1752 static void
printhtml(Line * t,MMIOT * f)1753 printhtml(Line *t, MMIOT *f)
1754 {
1755     int blanks;
1756 
1757     for ( blanks=0; t ; t = t->next )
1758 	if ( S(t->text) ) {
1759 	    for ( ; blanks; --blanks )
1760 		Qchar('\n', f);
1761 
1762 	    Qwrite(T(t->text), S(t->text), f);
1763 	    Qchar('\n', f);
1764 	}
1765 	else
1766 	    blanks++;
1767 }
1768 
1769 
1770 static void
htmlify_paragraphs(Paragraph * p,MMIOT * f)1771 htmlify_paragraphs(Paragraph *p, MMIOT *f)
1772 {
1773     ___mkd_emblock(f);
1774 
1775     while (( p = display(p, f) )) {
1776 	___mkd_emblock(f);
1777 	Qstring("\n\n", f);
1778     }
1779 }
1780 
1781 
1782 #ifdef GITHUB_CHECKBOX
1783 static void
li_htmlify(Paragraph * p,char * arguments,mkd_flag_t flags,MMIOT * f)1784 li_htmlify(Paragraph *p, char *arguments, mkd_flag_t flags, MMIOT *f)
1785 {
1786     ___mkd_emblock(f);
1787 
1788     Qprintf(f, "<li");
1789     if ( arguments )
1790 	Qprintf(f, " %s", arguments);
1791     if ( flags & GITHUB_CHECK )
1792 	Qprintf(f, " class=\"github_checkbox\"");
1793     Qprintf(f, ">");
1794 #if CHECKBOX_AS_INPUT
1795     if ( flags & GITHUB_CHECK ) {
1796 	Qprintf(f, "<input disabled=\"\" type=\"checkbox\"");
1797 	if ( flags & IS_CHECKED )
1798 	    Qprintf(f, " checked=\"checked\"");
1799 	Qprintf(f, "/>");
1800     }
1801 #else
1802     if ( flags & GITHUB_CHECK )
1803 	Qprintf(f, flags & IS_CHECKED ? "&#x2611;" : "&#x2610;");
1804 #endif
1805 
1806     htmlify_paragraphs(p, f);
1807 
1808      Qprintf(f, "</li>");
1809     ___mkd_emblock(f);
1810 }
1811 #endif
1812 
1813 
1814 static void
htmlify(Paragraph * p,char * block,char * arguments,MMIOT * f)1815 htmlify(Paragraph *p, char *block, char *arguments, MMIOT *f)
1816 {
1817     ___mkd_emblock(f);
1818     if ( block )
1819 	Qprintf(f, arguments ? "<%s %s>" : "<%s>", block, arguments);
1820 
1821     htmlify_paragraphs(p, f);
1822 
1823     if ( block )
1824 	 Qprintf(f, "</%s>", block);
1825     ___mkd_emblock(f);
1826 }
1827 
1828 
1829 static void
definitionlist(Paragraph * p,MMIOT * f)1830 definitionlist(Paragraph *p, MMIOT *f)
1831 {
1832     Line *tag;
1833 
1834     if ( p ) {
1835 	Qstring("<dl>\n", f);
1836 
1837 	for ( ; p ; p = p->next) {
1838 	    for ( tag = p->text; tag; tag = tag->next ) {
1839 		Qstring("<dt>", f);
1840 		___mkd_reparse(T(tag->text), S(tag->text), 0, f, 0);
1841 		Qstring("</dt>\n", f);
1842 	    }
1843 
1844 	    htmlify(p->down, "dd", p->ident, f);
1845 	    Qchar('\n', f);
1846 	}
1847 
1848 	Qstring("</dl>", f);
1849     }
1850 }
1851 
1852 
1853 static void
listdisplay(int typ,Paragraph * p,MMIOT * f)1854 listdisplay(int typ, Paragraph *p, MMIOT* f)
1855 {
1856     if ( p ) {
1857 	Qprintf(f, "<%cl", (typ==UL)?'u':'o');
1858 	if ( typ == AL )
1859 	    Qprintf(f, " type=\"a\"");
1860 	Qprintf(f, ">\n");
1861 
1862 	for ( ; p ; p = p->next ) {
1863 #ifdef GITHUB_CHECKBOX
1864 	    li_htmlify(p->down, p->ident, p->flags, f);
1865 #else
1866 	    htmlify(p->down, "li", p->ident, f);
1867 #endif
1868 	    Qchar('\n', f);
1869 	}
1870 
1871 	Qprintf(f, "</%cl>\n", (typ==UL)?'u':'o');
1872     }
1873 }
1874 
1875 
1876 /* dump out a Paragraph in the desired manner
1877  */
1878 static Paragraph*
display(Paragraph * p,MMIOT * f)1879 display(Paragraph *p, MMIOT *f)
1880 {
1881     if ( !p ) return 0;
1882 
1883     switch ( p->typ ) {
1884     case STYLE:
1885     case WHITESPACE:
1886 	break;
1887 
1888     case HTML:
1889 	printhtml(p->text, f);
1890 	break;
1891 
1892     case CODE:
1893 	printcode(p->text, p->lang, f);
1894 	break;
1895 
1896     case QUOTE:
1897 	htmlify(p->down, p->ident ? "div" : "blockquote", p->ident, f);
1898 	break;
1899 
1900     case UL:
1901     case OL:
1902     case AL:
1903 	listdisplay(p->typ, p->down, f);
1904 	break;
1905 
1906     case DL:
1907 	definitionlist(p->down, f);
1908 	break;
1909 
1910     case HR:
1911 	Qstring("<hr />", f);
1912 	break;
1913 
1914     case HDR:
1915 	printheader(p, f);
1916 	break;
1917 
1918     case TABLE:
1919 	printtable(p, f);
1920 	break;
1921 
1922     case SOURCE:
1923 	htmlify(p->down, 0, 0, f);
1924 	break;
1925 
1926     default:
1927 	printblock(p, f);
1928 	break;
1929     }
1930     return p->next;
1931 }
1932 
1933 
1934 /* dump out a list of footnotes
1935  */
1936 static void
mkd_extra_footnotes(MMIOT * m)1937 mkd_extra_footnotes(MMIOT *m)
1938 {
1939     int j, i;
1940     Footnote *t;
1941 
1942     if ( m->footnotes->reference == 0 )
1943 	return;
1944 
1945     Csprintf(&m->out, "\n<div class=\"footnotes\">\n<hr/>\n<ol>\n");
1946 
1947     for ( i=1; i <= m->footnotes->reference; i++ ) {
1948 	for ( j=0; j < S(m->footnotes->note); j++ ) {
1949 	    t = &T(m->footnotes->note)[j];
1950 	    if ( (t->refnumber == i) && (t->flags & REFERENCED) ) {
1951 		Csprintf(&m->out, "<li id=\"%s:%d\">\n",
1952 			    p_or_nothing(m), t->refnumber);
1953 		htmlify(t->text, 0, 0, m);
1954 		Csprintf(&m->out, "<a href=\"#%sref:%d\" rev=\"footnote\">&#8617;</a>",
1955 			    p_or_nothing(m), t->refnumber);
1956 		Csprintf(&m->out, "</li>\n");
1957 	    }
1958 	}
1959     }
1960     Csprintf(&m->out, "</ol>\n</div>\n");
1961 }
1962 
1963 
1964 /* return a pointer to the compiled markdown
1965  * document.
1966  */
1967 int
mkd_document(Document * p,char ** res)1968 mkd_document(Document *p, char **res)
1969 {
1970     int size;
1971 
1972     if ( p && p->compiled ) {
1973 	if ( ! p->html ) {
1974 	    htmlify(p->code, 0, 0, p->ctx);
1975 	    if ( is_flag_set(p->ctx->flags, MKD_EXTRA_FOOTNOTE) )
1976 		mkd_extra_footnotes(p->ctx);
1977 	    p->html = 1;
1978 	    size = S(p->ctx->out);
1979 
1980 	    if ( (size == 0) || T(p->ctx->out)[size-1] ) {
1981 		/* Add a null byte at the end of the generated html,
1982 		 * but pretend it doesn't exist.
1983 		 */
1984 		EXPAND(p->ctx->out) = 0;
1985 		--S(p->ctx->out);
1986 	    }
1987 	}
1988 
1989 	*res = T(p->ctx->out);
1990 	return S(p->ctx->out);
1991     }
1992     return EOF;
1993 }
1994