1 /* markdown: a C implementation of John Gruber's Markdown markup language.
2 *
3 * Copyright (C) 2007 David L Parsons.
4 * The redistribution terms are provided in the COPYRIGHT file that must
5 * be distributed with this source code.
6 */
7 #include <stdio.h>
8 #include <string.h>
9 #include <stdarg.h>
10 #include <stdlib.h>
11 #include <time.h>
12 #include <ctype.h>
13
14 #include "config.h"
15
16 #include "cstring.h"
17 #include "markdown.h"
18 #include "amalloc.h"
19
20 typedef int (*stfu)(const void*,const void*);
21 typedef void (*spanhandler)(MMIOT*,int);
22
23 /* forward declarations */
24 static void text(MMIOT *f);
25 static Paragraph *display(Paragraph*, MMIOT*);
26
27 /* externals from markdown.c */
28 int __mkd_footsort(Footnote *, Footnote *);
29
30 /*
31 * push text into the generator input buffer
32 */
33 static void
push(char * bfr,int size,MMIOT * f)34 push(char *bfr, int size, MMIOT *f)
35 {
36 while ( size-- > 0 )
37 EXPAND(f->in) = *bfr++;
38 }
39
40
41 /*
42 * push a character into the generator input buffer
43 */
44 static void
pushc(char c,MMIOT * f)45 pushc(char c, MMIOT *f)
46 {
47 EXPAND(f->in) = c;
48 }
49
50
51 /* look <i> characters ahead of the cursor.
52 */
53 static inline int
peek(MMIOT * f,int i)54 peek(MMIOT *f, int i)
55 {
56
57 i += (f->isp-1);
58
59 return (i >= 0) && (i < S(f->in)) ? (unsigned char)T(f->in)[i] : EOF;
60 }
61
62
63 /* pull a byte from the input buffer
64 */
65 static inline unsigned int
pull(MMIOT * f)66 pull(MMIOT *f)
67 {
68 return ( f->isp < S(f->in) ) ? (unsigned char)T(f->in)[f->isp++] : EOF;
69 }
70
71
72 /* return a pointer to the current position in the input buffer.
73 */
74 static inline char*
cursor(MMIOT * f)75 cursor(MMIOT *f)
76 {
77 return T(f->in) + f->isp;
78 }
79
80
81 static inline int
isthisspace(MMIOT * f,int i)82 isthisspace(MMIOT *f, int i)
83 {
84 int c = peek(f, i);
85
86 if ( c == EOF )
87 return 1;
88 if ( c & 0x80 )
89 return 0;
90 return isspace(c) || (c < ' ');
91 }
92
93
94 static inline int
isthisalnum(MMIOT * f,int i)95 isthisalnum(MMIOT *f, int i)
96 {
97 int c = peek(f, i);
98
99 return (c != EOF) && isalnum(c);
100 }
101
102
103 static inline int
isthisnonword(MMIOT * f,int i)104 isthisnonword(MMIOT *f, int i)
105 {
106 return isthisspace(f, i) || ispunct(peek(f,i));
107 }
108
109
110 /* return/set the current cursor position
111 * (when setting the current cursor position we also need to flush the
112 * last character written cache)
113 */
114 #define mmiotseek(f,x) ((f->isp = x), (f->last = 0))
115 #define mmiottell(f) (f->isp)
116
117
118 /* move n characters forward ( or -n characters backward) in the input buffer.
119 */
120 static void
shift(MMIOT * f,int i)121 shift(MMIOT *f, int i)
122 {
123 if (f->isp + i >= 0 )
124 f->isp += i;
125 }
126
127
128 /* Qchar()
129 */
130 static void
Qchar(int c,MMIOT * f)131 Qchar(int c, MMIOT *f)
132 {
133 block *cur;
134
135 if ( S(f->Q) == 0 ) {
136 cur = &EXPAND(f->Q);
137 memset(cur, 0, sizeof *cur);
138 cur->b_type = bTEXT;
139 }
140 else
141 cur = &T(f->Q)[S(f->Q)-1];
142
143 EXPAND(cur->b_text) = c;
144
145 }
146
147
148 /* Qstring()
149 */
150 static void
Qstring(char * s,MMIOT * f)151 Qstring(char *s, MMIOT *f)
152 {
153 while (*s)
154 Qchar(*s++, f);
155 }
156
157
158 /* Qwrite()
159 */
160 static void
Qwrite(char * s,int size,MMIOT * f)161 Qwrite(char *s, int size, MMIOT *f)
162 {
163 while (size-- > 0)
164 Qchar(*s++, f);
165 }
166
167
168 /* Qprintf()
169 */
170 static void
Qprintf(MMIOT * f,char * fmt,...)171 Qprintf(MMIOT *f, char *fmt, ...)
172 {
173 char bfr[80];
174 va_list ptr;
175
176 va_start(ptr,fmt);
177 vsnprintf(bfr, sizeof bfr, fmt, ptr);
178 va_end(ptr);
179 Qstring(bfr, f);
180 }
181
182
183 /* Qanchor() prints out a suitable-for-id-tag version of a string
184 */
185 static void
Qanchor(struct line * p,MMIOT * f)186 Qanchor(struct line *p, MMIOT *f)
187 {
188 mkd_string_to_anchor(T(p->text), S(p->text),
189 (mkd_sta_function_t)Qchar, f, 1, f);
190 }
191
192
193 /* Qem()
194 */
195 static void
Qem(MMIOT * f,char c,int count)196 Qem(MMIOT *f, char c, int count)
197 {
198 block *p = &EXPAND(f->Q);
199
200 memset(p, 0, sizeof *p);
201 p->b_type = (c == '*') ? bSTAR : bUNDER;
202 p->b_char = c;
203 p->b_count = count;
204
205 memset(&EXPAND(f->Q), 0, sizeof(block));
206 }
207
208
209 /* generate html from a markup fragment
210 */
211 void
___mkd_reparse(char * bfr,int size,mkd_flag_t flags,MMIOT * f,char * esc)212 ___mkd_reparse(char *bfr, int size, mkd_flag_t flags, MMIOT *f, char *esc)
213 {
214 MMIOT sub;
215 struct escaped e;
216
217 ___mkd_initmmiot(&sub, f->footnotes);
218
219 sub.flags = f->flags | flags;
220 sub.cb = f->cb;
221 sub.ref_prefix = f->ref_prefix;
222
223 if ( esc ) {
224 sub.esc = &e;
225 e.up = f->esc;
226 e.text = esc;
227 }
228 else
229 sub.esc = f->esc;
230
231 push(bfr, size, &sub);
232 pushc(0, &sub);
233 S(sub.in)--;
234
235 text(&sub);
236 ___mkd_emblock(&sub);
237
238 Qwrite(T(sub.out), S(sub.out), f);
239 /* inherit the last character printed from the reparsed
240 * text; this way superscripts can work when they're
241 * applied to something embedded in a link
242 */
243 f->last = sub.last;
244
245 ___mkd_freemmiot(&sub, f->footnotes);
246 }
247
248
249 /*
250 * check the escape list for special cases
251 */
252 static int
escaped(MMIOT * f,char c)253 escaped(MMIOT *f, char c)
254 {
255 struct escaped *thing = f->esc;
256
257 while ( thing ) {
258 if ( strchr(thing->text, c) )
259 return 1;
260 thing = thing->up;
261 }
262 return 0;
263 }
264
265
266 /*
267 * write out a url, escaping problematic characters
268 */
269 static void
puturl(char * s,int size,MMIOT * f,int display)270 puturl(char *s, int size, MMIOT *f, int display)
271 {
272 unsigned char c;
273
274 while ( size-- > 0 ) {
275 c = *s++;
276
277 if ( c == '\\' && size-- > 0 ) {
278 c = *s++;
279
280 if ( !( ispunct(c) || isspace(c) ) )
281 Qchar('\\', f);
282 }
283
284 if ( c == '&' )
285 Qstring("&", f);
286 else if ( c == '<' )
287 Qstring("<", f);
288 else if ( c == '"' )
289 Qstring("%22", f);
290 else if ( isalnum(c) || ispunct(c) || (display && isspace(c)) )
291 Qchar(c, f);
292 else if ( c == MKD_EOLN ) /* untokenize hard return */
293 Qstring(" ", f);
294 else
295 Qprintf(f, "%%%02X", c);
296 }
297 }
298
299
300 /* advance forward until the next character is not whitespace
301 */
302 static int
eatspace(MMIOT * f)303 eatspace(MMIOT *f)
304 {
305 int c;
306
307 for ( ; ((c=peek(f, 1)) != EOF) && isspace(c); pull(f) )
308 ;
309 return c;
310 }
311
312
313 /* (match (a (nested (parenthetical (string.)))))
314 */
315 static int
parenthetical(int in,int out,MMIOT * f)316 parenthetical(int in, int out, MMIOT *f)
317 {
318 int size, indent, c;
319
320 for ( indent=1,size=0; indent; size++ ) {
321 if ( (c = pull(f)) == EOF )
322 return EOF;
323 else if ( (c == '\\') && (peek(f,1) == out || peek(f,1) == in) ) {
324 ++size;
325 pull(f);
326 }
327 else if ( c == in )
328 ++indent;
329 else if ( c == out )
330 --indent;
331 }
332 return size ? (size-1) : 0;
333 }
334
335
336 /* extract a []-delimited label from the input stream.
337 */
338 static int
linkylabel(MMIOT * f,Cstring * res)339 linkylabel(MMIOT *f, Cstring *res)
340 {
341 char *ptr = cursor(f);
342 int size;
343
344 if ( (size = parenthetical('[',']',f)) != EOF ) {
345 T(*res) = ptr;
346 S(*res) = size;
347 return 1;
348 }
349 return 0;
350 }
351
352
353 /* see if the quote-prefixed linky segment is actually a title.
354 */
355 static int
linkytitle(MMIOT * f,char quote,Footnote * ref)356 linkytitle(MMIOT *f, char quote, Footnote *ref)
357 {
358 int whence = mmiottell(f);
359 char *title = cursor(f);
360 char *e;
361 register int c;
362
363 while ( (c = pull(f)) != EOF ) {
364 e = cursor(f);
365 if ( c == quote ) {
366 if ( (c = eatspace(f)) == ')' ) {
367 T(ref->title) = 1+title;
368 S(ref->title) = (e-title)-2;
369 return 1;
370 }
371 }
372 }
373 mmiotseek(f, whence);
374 return 0;
375 }
376
377
378 /* extract a =HHHxWWW size from the input stream
379 */
380 static int
linkysize(MMIOT * f,Footnote * ref)381 linkysize(MMIOT *f, Footnote *ref)
382 {
383 int height=0, width=0;
384 int whence = mmiottell(f);
385 int c;
386
387 if ( isspace(peek(f,0)) ) {
388 pull(f); /* eat '=' */
389
390 for ( c = pull(f); isdigit(c); c = pull(f))
391 width = (width * 10) + (c - '0');
392
393 if ( c == 'x' ) {
394 for ( c = pull(f); isdigit(c); c = pull(f))
395 height = (height*10) + (c - '0');
396
397 if ( isspace(c) )
398 c = eatspace(f);
399
400 if ( (c == ')') || ((c == '\'' || c == '"') && linkytitle(f, c, ref)) ) {
401 ref->height = height;
402 ref->width = width;
403 return 1;
404 }
405 }
406 }
407 mmiotseek(f, whence);
408 return 0;
409 }
410
411
412 /* extract a <...>-encased url from the input stream.
413 * (markdown 1.0.2b8 compatibility; older versions
414 * of markdown treated the < and > as syntactic
415 * sugar that didn't have to be there. 1.0.2b8
416 * requires a closing >, and then falls into the
417 * title or closing )
418 */
419 static int
linkybroket(MMIOT * f,int image,Footnote * p)420 linkybroket(MMIOT *f, int image, Footnote *p)
421 {
422 int c;
423 int good = 0;
424
425 T(p->link) = cursor(f);
426 for ( S(p->link)=0; (c = pull(f)) != '>'; ++S(p->link) ) {
427 /* pull in all input until a '>' is found, or die trying.
428 */
429 if ( c == EOF )
430 return 0;
431 else if ( (c == '\\') && ispunct(peek(f,2)) ) {
432 ++S(p->link);
433 pull(f);
434 }
435 }
436
437 c = eatspace(f);
438
439 /* next nonspace needs to be a title, a size, or )
440 */
441 if ( ( c == '\'' || c == '"' ) && linkytitle(f,c,p) )
442 good=1;
443 else if ( image && (c == '=') && linkysize(f,p) )
444 good=1;
445 else
446 good=( c == ')' );
447
448 if ( good ) {
449 if ( peek(f, 1) == ')' )
450 pull(f);
451
452 ___mkd_tidy(&p->link);
453 }
454
455 return good;
456 } /* linkybroket */
457
458
459 /* extract a (-prefixed url from the input stream.
460 * the label is either of the format `<link>`, where I
461 * extract until I find a >, or it is of the format
462 * `text`, where I extract until I reach a ')', a quote,
463 * or (if image) a '='
464 */
465 static int
linkyurl(MMIOT * f,int image,Footnote * p)466 linkyurl(MMIOT *f, int image, Footnote *p)
467 {
468 int c;
469 int mayneedtotrim=0;
470
471 if ( (c = eatspace(f)) == EOF )
472 return 0;
473
474 if ( c == '<' ) {
475 pull(f);
476 if ( !is_flag_set(f->flags, MKD_1_COMPAT) )
477 return linkybroket(f,image,p);
478 mayneedtotrim=1;
479 }
480
481 T(p->link) = cursor(f);
482 for ( S(p->link)=0; (c = peek(f,1)) != ')'; ++S(p->link) ) {
483 if ( c == EOF )
484 return 0;
485 else if ( (c == '"' || c == '\'') && linkytitle(f, c, p) )
486 break;
487 else if ( image && (c == '=') && linkysize(f, p) )
488 break;
489 else if ( (c == '\\') && ispunct(peek(f,2)) ) {
490 ++S(p->link);
491 pull(f);
492 }
493 pull(f);
494 }
495 if ( peek(f, 1) == ')' )
496 pull(f);
497
498 ___mkd_tidy(&p->link);
499
500 if ( mayneedtotrim && (T(p->link)[S(p->link)-1] == '>') )
501 --S(p->link);
502
503 return 1;
504 }
505
506
507
508 /* prefixes for <automatic links>
509 */
510 static struct _protocol {
511 char *name;
512 int nlen;
513 } protocol[] = {
514 #define _aprotocol(x) { x, (sizeof x)-1 }
515 _aprotocol( "https:" ),
516 _aprotocol( "http:" ),
517 _aprotocol( "news:" ),
518 _aprotocol( "ftp:" ),
519 #undef _aprotocol
520 };
521 #define NRPROTOCOLS (sizeof protocol / sizeof protocol[0])
522
523
524 static int
isautoprefix(char * text,int size)525 isautoprefix(char *text, int size)
526 {
527 int i;
528 struct _protocol *p;
529
530 for (i=0, p=protocol; i < NRPROTOCOLS; i++, p++)
531 if ( (size >= p->nlen) && strncasecmp(text, p->name, p->nlen) == 0 )
532 return 1;
533 return 0;
534 }
535
536
537 /*
538 * all the tag types that linkylinky can produce are
539 * defined by this structure.
540 */
541 typedef struct linkytype {
542 char *pat;
543 int szpat;
544 char *link_pfx; /* tag prefix and link pointer (eg: "<a href="\"" */
545 char *link_sfx; /* link suffix (eg: "\"" */
546 int WxH; /* this tag allows width x height arguments */
547 char *text_pfx; /* text prefix (eg: ">" */
548 char *text_sfx; /* text suffix (eg: "</a>" */
549 int flags; /* reparse flags */
550 int kind; /* tag is url or something else? */
551 #define IS_URL 0x01
552 } linkytype;
553
554 static linkytype imaget = { 0, 0, "<img src=\"", "\"",
555 1, " alt=\"", "\" />", MKD_NOIMAGE|MKD_TAGTEXT, IS_URL };
556 static linkytype linkt = { 0, 0, "<a href=\"", "\"",
557 0, ">", "</a>", MKD_NOLINKS, IS_URL };
558
559 /*
560 * pseudo-protocols for [][];
561 *
562 * id: generates <a id="link">tag</a>
563 * class: generates <span class="link">tag</span>
564 * raw: just dump the link without any processing
565 */
566 static linkytype specials[] = {
567 { "id:", 3, "<span id=\"", "\"", 0, ">", "</span>", 0, 0 },
568 { "raw:", 4, 0, 0, 0, 0, 0, MKD_NOHTML, 0 },
569 { "lang:", 5, "<span lang=\"", "\"", 0, ">", "</span>", 0, 0 },
570 { "abbr:", 5, "<abbr title=\"", "\"", 0, ">", "</abbr>", 0, 0 },
571 { "class:", 6, "<span class=\"", "\"", 0, ">", "</span>", 0, 0 },
572 } ;
573
574 #define NR(x) (sizeof x / sizeof x[0])
575
576 /* see if t contains one of our pseudo-protocols.
577 */
578 static linkytype *
pseudo(Cstring t)579 pseudo(Cstring t)
580 {
581 int i;
582 linkytype *r;
583
584 for ( i=0, r=specials; i < NR(specials); i++,r++ ) {
585 if ( (S(t) > r->szpat) && (strncasecmp(T(t), r->pat, r->szpat) == 0) )
586 return r;
587 }
588 return 0;
589 }
590
591
592 /* print out the start of an `img' or `a' tag, applying callbacks as needed.
593 */
594 static void
printlinkyref(MMIOT * f,linkytype * tag,char * link,int size)595 printlinkyref(MMIOT *f, linkytype *tag, char *link, int size)
596 {
597 char *edit;
598
599 if ( is_flag_set(f->flags, IS_LABEL) )
600 return;
601
602 Qstring(tag->link_pfx, f);
603
604 if ( tag->kind & IS_URL ) {
605 if ( f->cb && f->cb->e_url && (edit = (*f->cb->e_url)(link, size, f->cb->e_data)) ) {
606 puturl(edit, strlen(edit), f, 0);
607 if ( f->cb->e_free ) (*f->cb->e_free)(edit, f->cb->e_data);
608 }
609 else
610 puturl(link + tag->szpat, size - tag->szpat, f, 0);
611 }
612 else
613 ___mkd_reparse(link + tag->szpat, size - tag->szpat, MKD_TAGTEXT, f, 0);
614
615 Qstring(tag->link_sfx, f);
616
617 if ( f->cb && f->cb->e_flags && (edit = (*f->cb->e_flags)(link, size, f->cb->e_data)) ) {
618 Qchar(' ', f);
619 Qstring(edit, f);
620 if ( f->cb->e_free ) (*f->cb->e_free)(edit, f->cb->e_data);
621 }
622 } /* printlinkyref */
623
624
625 /* helper function for php markdown extra footnotes; allow the user to
626 * define a prefix tag instead of just `fn`
627 */
628 static char *
p_or_nothing(p)629 p_or_nothing(p)
630 MMIOT *p;
631 {
632 return p->ref_prefix ? p->ref_prefix : "fn";
633 }
634
635
636 /* php markdown extra/daring fireball style print footnotes
637 */
638 static int
extra_linky(MMIOT * f,Cstring text,Footnote * ref)639 extra_linky(MMIOT *f, Cstring text, Footnote *ref)
640 {
641 if ( ref->flags & REFERENCED )
642 return 0;
643
644 if ( f->flags & IS_LABEL )
645 ___mkd_reparse(T(text), S(text), linkt.flags, f, 0);
646 else {
647 ref->flags |= REFERENCED;
648 ref->refnumber = ++ f->footnotes->reference;
649 Qprintf(f, "<sup id=\"%sref:%d\"><a href=\"#%s:%d\" rel=\"footnote\">%d</a></sup>",
650 p_or_nothing(f), ref->refnumber,
651 p_or_nothing(f), ref->refnumber, ref->refnumber);
652 }
653 return 1;
654 } /* extra_linky */
655
656
657
658 /* check a url (or url fragment to see that it begins with a known good
659 * protocol (or no protocol at all)
660 */
661 static int
safelink(Cstring link)662 safelink(Cstring link)
663 {
664 char *p, *colon;
665
666 if ( T(link) == 0 ) /* no link; safe */
667 return 1;
668
669 p = T(link);
670 if ( (colon = memchr(p, ':', S(link))) == 0 )
671 return 1; /* no protocol specified: safe */
672
673 if ( !isalpha(*p) ) /* protocol/method is [alpha][alnum or '+.-'] */
674 return 1;
675 while ( ++p < colon )
676 if ( !(isalnum(*p) || *p == '.' || *p == '+' || *p == '-') )
677 return 1;
678
679 return isautoprefix(T(link), S(link));
680 }
681
682
683 /* print out a linky (or fail if it's Not Allowed)
684 */
685 static int
linkyformat(MMIOT * f,Cstring text,int image,Footnote * ref)686 linkyformat(MMIOT *f, Cstring text, int image, Footnote *ref)
687 {
688 linkytype *tag;
689
690
691 if ( image )
692 tag = &imaget;
693 else if ( tag = pseudo(ref->link) ) {
694 if ( is_flag_set(f->flags, MKD_NO_EXT) || is_flag_set(f->flags, MKD_SAFELINK) )
695 return 0;
696 }
697 else if ( is_flag_set(f->flags, MKD_SAFELINK) && !safelink(ref->link) )
698 /* if MKD_SAFELINK, only accept links that are local or
699 * a well-known protocol
700 */
701 return 0;
702 else
703 tag = &linkt;
704
705 if ( f->flags & tag->flags )
706 return 0;
707
708 if ( is_flag_set(f->flags, IS_LABEL) )
709 ___mkd_reparse(T(text), S(text), tag->flags, f, 0);
710 else if ( tag->link_pfx ) {
711 printlinkyref(f, tag, T(ref->link), S(ref->link));
712
713 if ( tag->WxH ) {
714 if ( ref->height ) Qprintf(f," height=\"%d\"", ref->height);
715 if ( ref->width ) Qprintf(f, " width=\"%d\"", ref->width);
716 }
717
718 if ( S(ref->title) ) {
719 Qstring(" title=\"", f);
720 ___mkd_reparse(T(ref->title), S(ref->title), MKD_TAGTEXT, f, 0);
721 Qchar('"', f);
722 }
723
724 Qstring(tag->text_pfx, f);
725 ___mkd_reparse(T(text), S(text), tag->flags, f, 0);
726 Qstring(tag->text_sfx, f);
727 }
728 else
729 Qwrite(T(ref->link) + tag->szpat, S(ref->link) - tag->szpat, f);
730
731 return 1;
732 } /* linkyformat */
733
734
735 /*
736 * process embedded links and images
737 */
738 static int
linkylinky(int image,MMIOT * f)739 linkylinky(int image, MMIOT *f)
740 {
741 int start = mmiottell(f);
742 Cstring name;
743 Footnote key, *ref;
744
745 int status = 0;
746 int extra_footnote = 0;
747
748 CREATE(name);
749 memset(&key, 0, sizeof key);
750
751 if ( linkylabel(f, &name) ) {
752 if ( peek(f,1) == '(' ) {
753 pull(f);
754 if ( linkyurl(f, image, &key) )
755 status = linkyformat(f, name, image, &key);
756 }
757 else {
758 int goodlink, implicit_mark = mmiottell(f);
759
760 if ( isspace(peek(f,1)) )
761 pull(f);
762
763 if ( peek(f,1) == '[' ) {
764 pull(f); /* consume leading '[' */
765 goodlink = linkylabel(f, &key.tag);
766 }
767 else {
768 /* new markdown implicit name syntax doesn't
769 * require a second []
770 */
771 mmiotseek(f, implicit_mark);
772 goodlink = !is_flag_set(f->flags, MKD_1_COMPAT);
773
774 if ( is_flag_set(f->flags, MKD_EXTRA_FOOTNOTE) && (!image) && S(name) && T(name)[0] == '^' )
775 extra_footnote = 1;
776 }
777
778 if ( goodlink ) {
779 if ( !S(key.tag) ) {
780 DELETE(key.tag);
781 T(key.tag) = T(name);
782 S(key.tag) = S(name);
783 }
784
785 if ( ref = bsearch(&key, T(f->footnotes->note),
786 S(f->footnotes->note),
787 sizeof key, (stfu)__mkd_footsort) ) {
788 if ( extra_footnote )
789 status = extra_linky(f,name,ref);
790 else
791 status = linkyformat(f, name, image, ref);
792 }
793 }
794 }
795 }
796
797 DELETE(name);
798 ___mkd_freefootnote(&key);
799
800 if ( status == 0 )
801 mmiotseek(f, start);
802
803 return status;
804 }
805
806
807 /* write a character to output, doing text escapes ( & -> &,
808 * > -> > < -> < )
809 */
810 static void
cputc(int c,MMIOT * f)811 cputc(int c, MMIOT *f)
812 {
813 switch (c) {
814 case '&': Qstring("&", f); break;
815 case '>': Qstring(">", f); break;
816 case '<': Qstring("<", f); break;
817 default : Qchar(c, f); break;
818 }
819 }
820
821
822 /*
823 * convert an email address to a string of nonsense
824 */
825 static void
mangle(char * s,int len,MMIOT * f)826 mangle(char *s, int len, MMIOT *f)
827 {
828 while ( len-- > 0 ) {
829 #if DEBIAN_GLITCH
830 Qprintf(f, "&#%02d;", *((unsigned char*)(s++)) );
831 #else
832 Qstring("&#", f);
833 Qprintf(f, COINTOSS() ? "x%02x;" : "%02d;", *((unsigned char*)(s++)) );
834 #endif
835 }
836 }
837
838
839 /* nrticks() -- count up a row of tick marks
840 */
841 static int
nrticks(int offset,int tickchar,MMIOT * f)842 nrticks(int offset, int tickchar, MMIOT *f)
843 {
844 int tick = 0;
845
846 while ( peek(f, offset+tick) == tickchar ) tick++;
847
848 return tick;
849 } /* nrticks */
850
851
852 /* matchticks() -- match a certain # of ticks, and if that fails
853 * match the largest subset of those ticks.
854 *
855 * if a subset was matched, return the # of ticks
856 * that were matched.
857 */
858 static int
matchticks(MMIOT * f,int tickchar,int ticks,int * endticks)859 matchticks(MMIOT *f, int tickchar, int ticks, int *endticks)
860 {
861 int size, count, c;
862 int subsize=0, subtick=0;
863
864 *endticks = ticks;
865 for (size = 0; (c=peek(f,size+ticks)) != EOF; size ++) {
866 if ( (c == tickchar) && ( count = nrticks(size+ticks,tickchar,f)) ) {
867 if ( count == ticks )
868 return size;
869 else if ( count ) {
870 if ( (count > subtick) && (count < ticks) ) {
871 subsize = size;
872 subtick = count;
873 }
874 size += count;
875 }
876 }
877 }
878 if ( subsize ) {
879 *endticks = subtick;
880 return subsize;
881 }
882 return 0;
883 } /* matchticks */
884
885
886 /* code() -- write a string out as code. The only characters that have
887 * special meaning in a code block are * `<' and `&' , which
888 * are /always/ expanded to < and &
889 */
890 static void
code(MMIOT * f,char * s,int length)891 code(MMIOT *f, char *s, int length)
892 {
893 int i,c;
894
895 for ( i=0; i < length; i++ )
896 if ( (c = s[i]) == MKD_EOLN) /* expand back to 2 spaces */
897 Qstring(" ", f);
898 else if ( c == '\\' && (i < length-1) && escaped(f, s[i+1]) )
899 cputc(s[++i], f);
900 else
901 cputc(c, f);
902 } /* code */
903
904 /* delspan() -- write out a chunk of text, blocking with <del>...</del>
905 */
906 static void
delspan(MMIOT * f,int size)907 delspan(MMIOT *f, int size)
908 {
909 Qstring("<del>", f);
910 ___mkd_reparse(cursor(f)-1, size, 0, f, 0);
911 Qstring("</del>", f);
912 }
913
914
915 /* codespan() -- write out a chunk of text as code, trimming one
916 * space off the front and/or back as appropriate.
917 */
918 static void
codespan(MMIOT * f,int size)919 codespan(MMIOT *f, int size)
920 {
921 int i=0;
922
923 if ( size > 1 && peek(f, size-1) == ' ' ) --size;
924 if ( peek(f,i) == ' ' ) ++i, --size;
925
926 Qstring("<code>", f);
927 code(f, cursor(f)+(i-1), size);
928 Qstring("</code>", f);
929 } /* codespan */
930
931
932 /* before letting a tag through, validate against
933 * MKD_NOLINKS and MKD_NOIMAGE
934 */
935 static int
forbidden_tag(MMIOT * f)936 forbidden_tag(MMIOT *f)
937 {
938 int c = toupper(peek(f, 1));
939
940 if ( is_flag_set(f->flags, MKD_NOHTML) )
941 return 1;
942
943 if ( c == 'A' && is_flag_set(f->flags, MKD_NOLINKS) && !isthisalnum(f,2) )
944 return 1;
945 if ( c == 'I' && is_flag_set(f->flags, MKD_NOIMAGE)
946 && strncasecmp(cursor(f)+1, "MG", 2) == 0
947 && !isthisalnum(f,4) )
948 return 1;
949 return 0;
950 }
951
952
953 /* Check a string to see if it looks like a mail address
954 * "looks like a mail address" means alphanumeric + some
955 * specials, then a `@`, then alphanumeric + some specials,
956 * but with a `.`
957 */
958 static int
maybe_address(char * p,int size)959 maybe_address(char *p, int size)
960 {
961 int ok = 0;
962
963 for ( ;size && (isalnum(*p) || strchr("._-+*", *p)); ++p, --size)
964 ;
965
966 if ( ! (size && *p == '@') )
967 return 0;
968
969 --size, ++p;
970
971 if ( size && *p == '.' ) return 0;
972
973 for ( ;size && (isalnum(*p) || strchr("._-+", *p)); ++p, --size )
974 if ( *p == '.' && size > 1 ) ok = 1;
975
976 return size ? 0 : ok;
977 }
978
979
980 /* The size-length token at cursor(f) is either a mailto:, an
981 * implicit mailto:, one of the approved url protocols, or just
982 * plain old text. If it's a mailto: or an approved protocol,
983 * linkify it, otherwise say "no"
984 */
985 static int
process_possible_link(MMIOT * f,int size)986 process_possible_link(MMIOT *f, int size)
987 {
988 int address= 0;
989 int mailto = 0;
990 char *text = cursor(f);
991
992 if ( is_flag_set(f->flags, MKD_NOLINKS) ) return 0;
993
994 if ( (size > 7) && strncasecmp(text, "mailto:", 7) == 0 ) {
995 /* if it says it's a mailto, it's a mailto -- who am
996 * I to second-guess the user?
997 */
998 address = 1;
999 mailto = 7; /* 7 is the length of "mailto:"; we need this */
1000 }
1001 else
1002 address = maybe_address(text, size);
1003
1004 if ( address ) {
1005 Qstring("<a href=\"", f);
1006 if ( !mailto ) {
1007 /* supply a mailto: protocol if one wasn't attached */
1008 mangle("mailto:", 7, f);
1009 }
1010 mangle(text, size, f);
1011 Qstring("\">", f);
1012 mangle(text+mailto, size-mailto, f);
1013 Qstring("</a>", f);
1014 return 1;
1015 }
1016 else if ( isautoprefix(text, size) ) {
1017 printlinkyref(f, &linkt, text, size);
1018 Qchar('>', f);
1019 puturl(text,size,f, 1);
1020 Qstring("</a>", f);
1021 return 1;
1022 }
1023 return 0;
1024 } /* process_possible_link */
1025
1026
1027 /* a < may be just a regular character, the start of an embedded html
1028 * tag, or the start of an <automatic link>. If it's an automatic
1029 * link, we also need to know if it's an email address because if it
1030 * is we need to mangle it in our futile attempt to cut down on the
1031 * spaminess of the rendered page.
1032 */
1033 static int
maybe_tag_or_link(MMIOT * f)1034 maybe_tag_or_link(MMIOT *f)
1035 {
1036 int c, size;
1037 int maybetag = 1;
1038
1039 if ( is_flag_set(f->flags, MKD_TAGTEXT) )
1040 return 0;
1041
1042 for ( size=0; (c = peek(f, size+1)) != '>'; size++) {
1043 if ( c == EOF )
1044 return 0;
1045 else if ( c == '\\' ) {
1046 maybetag=0;
1047 if ( peek(f, size+2) != EOF )
1048 size++;
1049 }
1050 else if ( isspace(c) )
1051 break;
1052 else if ( ! (c == '/'
1053 || (is_flag_set(f->flags, MKD_GITHUBTAGS) && (c == '-' || c == '_'))
1054 || isalnum(c) ) )
1055 maybetag=0;
1056 }
1057
1058 if ( size ) {
1059 if ( maybetag || (size >= 3 && strncmp(cursor(f), "!--", 3) == 0) ) {
1060
1061 /* It is not a html tag unless we find the closing '>' in
1062 * the same block.
1063 */
1064 while ( (c = peek(f, size+1)) != '>' )
1065 if ( c == EOF )
1066 return 0;
1067 else
1068 size++;
1069
1070 if ( forbidden_tag(f) )
1071 return 0;
1072
1073 Qchar('<', f);
1074 while ( ((c = peek(f, 1)) != EOF) && (c != '>') )
1075 Qchar(pull(f), f);
1076 return 1;
1077 }
1078 else if ( !isspace(c) && process_possible_link(f, size) ) {
1079 shift(f, size+1);
1080 return 1;
1081 }
1082 }
1083
1084 return 0;
1085 }
1086
1087
1088 /* autolinking means that all inline html is <a href'ified>. A
1089 * autolink url is alphanumerics, slashes, periods, underscores,
1090 * the at sign, colon, and the % character.
1091 */
1092 static int
maybe_autolink(MMIOT * f)1093 maybe_autolink(MMIOT *f)
1094 {
1095 register int c;
1096 int size;
1097
1098 /* greedily scan forward for the end of a legitimate link.
1099 */
1100 for ( size=0; (c=peek(f, size+1)) != EOF; size++ ) {
1101 if ( c == '\\' ) {
1102 if ( peek(f, size+2) != EOF )
1103 ++size;
1104 }
1105 else if ( c & 0x80 ) /* HACK: ignore utf-8 extended characters */
1106 continue;
1107 else if ( isspace(c) || strchr("'\"()[]{}<>`", c) || c == MKD_EOLN )
1108 break;
1109 }
1110
1111 if ( (size > 1) && process_possible_link(f, size) ) {
1112 shift(f, size);
1113 return 1;
1114 }
1115 return 0;
1116 }
1117
1118
1119 /* smartyquote code that's common for single and double quotes
1120 */
1121 static int
smartyquote(int * flags,char typeofquote,MMIOT * f)1122 smartyquote(int *flags, char typeofquote, MMIOT *f)
1123 {
1124 int bit = (typeofquote == 's') ? 0x01 : 0x02;
1125
1126 if ( bit & (*flags) ) {
1127 if ( isthisnonword(f,1) ) {
1128 Qprintf(f, "&r%cquo;", typeofquote);
1129 (*flags) &= ~bit;
1130 return 1;
1131 }
1132 }
1133 else if ( isthisnonword(f,-1) && peek(f,1) != EOF ) {
1134 Qprintf(f, "&l%cquo;", typeofquote);
1135 (*flags) |= bit;
1136 return 1;
1137 }
1138 return 0;
1139 }
1140
1141
1142 static int
islike(MMIOT * f,char * s)1143 islike(MMIOT *f, char *s)
1144 {
1145 int len;
1146 int i;
1147
1148 if ( s[0] == '|' ) {
1149 if ( !isthisnonword(f, -1) )
1150 return 0;
1151 ++s;
1152 }
1153
1154 if ( !(len = strlen(s)) )
1155 return 0;
1156
1157 if ( s[len-1] == '|' ) {
1158 if ( !isthisnonword(f,len-1) )
1159 return 0;
1160 len--;
1161 }
1162
1163 for (i=1; i < len; i++)
1164 if (tolower(peek(f,i)) != s[i])
1165 return 0;
1166 return 1;
1167 }
1168
1169
1170 static struct smarties {
1171 char c0;
1172 char *pat;
1173 char *entity;
1174 int shift;
1175 } smarties[] = {
1176 { '\'', "'s|", "rsquo", 0 },
1177 { '\'', "'t|", "rsquo", 0 },
1178 { '\'', "'re|", "rsquo", 0 },
1179 { '\'', "'ll|", "rsquo", 0 },
1180 { '\'', "'ve|", "rsquo", 0 },
1181 { '\'', "'m|", "rsquo", 0 },
1182 { '\'', "'d|", "rsquo", 0 },
1183 { '-', "---", "mdash", 2 },
1184 { '-', "--", "ndash", 1 },
1185 { '.', "...", "hellip", 2 },
1186 { '.', ". . .", "hellip", 4 },
1187 { '(', "(c)", "copy", 2 },
1188 { '(', "(r)", "reg", 2 },
1189 { '(', "(tm)", "trade", 3 },
1190 { '3', "|3/4|", "frac34", 2 },
1191 { '3', "|3/4ths|", "frac34", 2 },
1192 { '1', "|1/2|", "frac12", 2 },
1193 { '1', "|1/4|", "frac14", 2 },
1194 { '1', "|1/4th|", "frac14", 2 },
1195 { '&', "�", 0, 3 },
1196 } ;
1197 #define NRSMART ( sizeof smarties / sizeof smarties[0] )
1198
1199
1200 /* Smarty-pants-style chrome for quotes, -, ellipses, and (r)(c)(tm)
1201 */
1202 static int
smartypants(int c,int * flags,MMIOT * f)1203 smartypants(int c, int *flags, MMIOT *f)
1204 {
1205 int i;
1206
1207 if ( is_flag_set(f->flags, MKD_NOPANTS)
1208 || is_flag_set(f->flags, MKD_TAGTEXT)
1209 || is_flag_set(f->flags, IS_LABEL) )
1210 return 0;
1211
1212 for ( i=0; i < NRSMART; i++)
1213 if ( (c == smarties[i].c0) && islike(f, smarties[i].pat) ) {
1214 if ( smarties[i].entity )
1215 Qprintf(f, "&%s;", smarties[i].entity);
1216 shift(f, smarties[i].shift);
1217 return 1;
1218 }
1219
1220 switch (c) {
1221 case '<' : return 0;
1222 case '\'': if ( smartyquote(flags, 's', f) ) return 1;
1223 break;
1224
1225 case '"': if ( smartyquote(flags, 'd', f) ) return 1;
1226 break;
1227
1228 case '`': if ( peek(f, 1) == '`' ) {
1229 int j = 2;
1230
1231 while ( (c=peek(f,j)) != EOF ) {
1232 if ( c == '\\' )
1233 j += 2;
1234 else if ( c == '`' )
1235 break;
1236 else if ( c == '\'' && peek(f, j+1) == '\'' ) {
1237 Qstring("“", f);
1238 ___mkd_reparse(cursor(f)+1, j-2, 0, f, 0);
1239 Qstring("”", f);
1240 shift(f,j+1);
1241 return 1;
1242 }
1243 else ++j;
1244 }
1245
1246 }
1247 break;
1248 }
1249 return 0;
1250 } /* smartypants */
1251
1252
1253 /* process latex with arbitrary 2-character ( $$ .. $$, \[ .. \], \( .. \)
1254 * delimiters
1255 */
1256 static int
mathhandler(MMIOT * f,int e1,int e2)1257 mathhandler(MMIOT *f, int e1, int e2)
1258 {
1259 int i = 0;
1260
1261 while(peek(f, ++i) != EOF) {
1262 if (peek(f, i) == e1 && peek(f, i+1) == e2) {
1263 cputc(peek(f,-1), f);
1264 cputc(peek(f, 0), f);
1265 while ( i-- > -1 )
1266 cputc(pull(f), f);
1267 return 1;
1268 }
1269 }
1270 return 0;
1271 }
1272
1273
1274 /* process a body of text encased in some sort of tick marks. If it
1275 * works, generate the output and return 1, otherwise just return 0 and
1276 * let the caller figure it out.
1277 */
1278 static int
tickhandler(MMIOT * f,int tickchar,int minticks,int allow_space,spanhandler spanner)1279 tickhandler(MMIOT *f, int tickchar, int minticks, int allow_space, spanhandler spanner)
1280 {
1281 int endticks, size;
1282 int tick = nrticks(0, tickchar, f);
1283
1284 if ( !allow_space && isspace(peek(f,tick)) )
1285 return 0;
1286
1287 if ( (tick >= minticks) && (size = matchticks(f,tickchar,tick,&endticks)) ) {
1288 if ( endticks < tick ) {
1289 size += (tick - endticks);
1290 tick = endticks;
1291 }
1292
1293 shift(f, tick);
1294 (*spanner)(f,size);
1295 shift(f, size+tick-1);
1296 return 1;
1297 }
1298 return 0;
1299 }
1300
1301 #define tag_text(f) is_flag_set(f->flags, MKD_TAGTEXT)
1302
1303
1304 static void
text(MMIOT * f)1305 text(MMIOT *f)
1306 {
1307 int c, j;
1308 int rep;
1309 int smartyflags = 0;
1310
1311 while (1) {
1312 if ( is_flag_set(f->flags, MKD_AUTOLINK) && isalpha(peek(f,1)) && !tag_text(f) )
1313 maybe_autolink(f);
1314
1315 c = pull(f);
1316
1317 if (c == EOF)
1318 break;
1319
1320 if ( smartypants(c, &smartyflags, f) )
1321 continue;
1322 switch (c) {
1323 case 0: break;
1324
1325 case MKD_EOLN:
1326 Qstring(tag_text(f) ? " " : "<br/>", f);
1327 break;
1328
1329 case '>': if ( tag_text(f) )
1330 Qstring(">", f);
1331 else
1332 Qchar(c, f);
1333 break;
1334
1335 case '"': if ( tag_text(f) )
1336 Qstring(""", f);
1337 else
1338 Qchar(c, f);
1339 break;
1340
1341 case '!': if ( peek(f,1) == '[' ) {
1342 pull(f);
1343 if ( tag_text(f) || !linkylinky(1, f) )
1344 Qstring("![", f);
1345 }
1346 else
1347 Qchar(c, f);
1348 break;
1349
1350 case '[': if ( tag_text(f) || !linkylinky(0, f) )
1351 Qchar(c, f);
1352 break;
1353 /* A^B -> A<sup>B</sup> */
1354 case '^': if ( is_flag_set(f->flags, MKD_NOSUPERSCRIPT)
1355 || is_flag_set(f->flags, MKD_STRICT)
1356 || is_flag_set(f->flags, MKD_TAGTEXT)
1357 || (f->last == 0)
1358 || ((ispunct(f->last) || isspace(f->last))
1359 && f->last != ')')
1360 || isthisspace(f,1) )
1361 Qchar(c,f);
1362 else {
1363 char *sup = cursor(f);
1364 int len = 0;
1365
1366 if ( peek(f,1) == '(' ) {
1367 int here = mmiottell(f);
1368 pull(f);
1369
1370 if ( (len = parenthetical('(',')',f)) <= 0 ) {
1371 mmiotseek(f,here);
1372 Qchar(c, f);
1373 break;
1374 }
1375 sup++;
1376 }
1377 else {
1378 while ( isthisalnum(f,1+len) )
1379 ++len;
1380 if ( !len ) {
1381 Qchar(c,f);
1382 break;
1383 }
1384 shift(f,len);
1385 }
1386 Qstring("<sup>",f);
1387 ___mkd_reparse(sup, len, 0, f, "()");
1388 Qstring("</sup>", f);
1389 }
1390 break;
1391 case '_':
1392 /* Underscores don't count if they're in the middle of a word */
1393 if ( !(is_flag_set(f->flags, MKD_NORELAXED) || is_flag_set(f->flags, MKD_STRICT))
1394 && isthisalnum(f,-1) && isthisalnum(f,1) ) {
1395 Qchar(c, f);
1396 break;
1397 }
1398 case '*':
1399 /* Underscores & stars don't count if they're out in the middle
1400 * of whitespace */
1401 if ( isthisspace(f,-1) && isthisspace(f,1) ) {
1402 Qchar(c, f);
1403 break;
1404 }
1405 /* else fall into the regular old emphasis case */
1406 if ( tag_text(f) )
1407 Qchar(c, f);
1408 else {
1409 for (rep = 1; peek(f,1) == c; pull(f) )
1410 ++rep;
1411 Qem(f,c,rep);
1412 }
1413 break;
1414
1415 case '~': if ( is_flag_set(f->flags, MKD_NOSTRIKETHROUGH)
1416 || is_flag_set(f->flags, MKD_STRICT)
1417 || is_flag_set(f->flags, MKD_TAGTEXT)
1418 || ! tickhandler(f,c,2,0, delspan) )
1419 Qchar(c, f);
1420 break;
1421
1422 case '`': if ( tag_text(f) || !tickhandler(f,c,1,1,codespan) )
1423 Qchar(c, f);
1424 break;
1425
1426 case '\\': switch ( c = pull(f) ) {
1427 case '&': Qstring("&", f);
1428 break;
1429 case '<': c = peek(f,1);
1430 if ( (c == EOF) || isspace(c) )
1431 Qstring("<", f);
1432 else {
1433 /* Markdown.pl does not escape <[nonwhite]
1434 * sequences */
1435 Qchar('\\', f);
1436 shift(f, -1);
1437 }
1438
1439 break;
1440 case '^': if ( is_flag_set(f->flags, MKD_STRICT)
1441 || is_flag_set(f->flags, MKD_NOSUPERSCRIPT) ) {
1442 Qchar('\\', f);
1443 shift(f,-1);
1444 break;
1445 }
1446 Qchar(c, f);
1447 break;
1448
1449 case ':': case '|':
1450 if ( is_flag_set(f->flags, MKD_NOTABLES) ) {
1451 Qchar('\\', f);
1452 shift(f,-1);
1453 break;
1454 }
1455 Qchar(c, f);
1456 break;
1457
1458 case EOF: Qchar('\\', f);
1459 break;
1460
1461 case '[':
1462 case '(': if ( is_flag_set(f->flags, MKD_LATEX)
1463 && mathhandler(f, '\\', (c =='(')?')':']') )
1464 break;
1465 /* else fall through to default */
1466
1467 default: if ( escaped(f,c) ||
1468 strchr(">#.-+{}]![*_\\()`", c) )
1469 Qchar(c, f);
1470 else {
1471 Qchar('\\', f);
1472 shift(f, -1);
1473 }
1474 break;
1475 }
1476 break;
1477
1478 case '<': if ( !maybe_tag_or_link(f) )
1479 Qstring("<", f);
1480 break;
1481
1482 case '&': j = (peek(f,1) == '#' ) ? 2 : 1;
1483 while ( isthisalnum(f,j) )
1484 ++j;
1485
1486 if ( peek(f,j) != ';' )
1487 Qstring("&", f);
1488 else
1489 Qchar(c, f);
1490 break;
1491
1492 case '$': if ( is_flag_set(f->flags, MKD_LATEX) && (peek(f, 1) == '$') ) {
1493 pull(f);
1494 if ( mathhandler(f, '$', '$') )
1495 break;
1496 Qchar('$', f);
1497 }
1498 /* fall through to default */
1499
1500 default: f->last = c;
1501 Qchar(c, f);
1502 break;
1503 }
1504 }
1505 /* truncate the input string after we've finished processing it */
1506 S(f->in) = f->isp = 0;
1507 } /* text */
1508
1509
1510 /* print a header block
1511 */
1512 static void
printheader(Paragraph * pp,MMIOT * f)1513 printheader(Paragraph *pp, MMIOT *f)
1514 {
1515 if ( is_flag_set(f->flags, MKD_IDANCHOR) ) {
1516 Qprintf(f, "<h%d", pp->hnumber);
1517 if ( is_flag_set(f->flags, MKD_TOC) ) {
1518 Qstring(" id=\"", f);
1519 Qanchor(pp->text, f);
1520 Qchar('"', f);
1521 }
1522 Qchar('>', f);
1523 } else {
1524 if ( is_flag_set(f->flags, MKD_TOC) ) {
1525 Qstring("<a name=\"", f);
1526 Qanchor(pp->text, f);
1527 Qstring("\"></a>\n", f);
1528 }
1529 Qprintf(f, "<h%d>", pp->hnumber);
1530 }
1531 push(T(pp->text->text), S(pp->text->text), f);
1532 text(f);
1533 Qprintf(f, "</h%d>", pp->hnumber);
1534 }
1535
1536
1537 enum e_alignments { a_NONE, a_CENTER, a_LEFT, a_RIGHT };
1538
1539 static char* alignments[] = { "", " style=\"text-align:center;\"",
1540 " style=\"text-align:left;\"",
1541 " style=\"text-align:right;\"" };
1542
1543 typedef STRING(int) Istring;
1544
1545 static int
splat(Line * p,char * block,Istring align,int force,MMIOT * f)1546 splat(Line *p, char *block, Istring align, int force, MMIOT *f)
1547 {
1548 int first,
1549 idx = p->dle,
1550 colno = 0;
1551
1552
1553 ___mkd_tidy(&p->text);
1554 if ( T(p->text)[S(p->text)-1] == '|' )
1555 --S(p->text);
1556
1557 Qstring("<tr>\n", f);
1558 while ( idx < S(p->text) ) {
1559 first = idx;
1560 if ( force && (colno >= S(align)-1) )
1561 idx = S(p->text);
1562 else
1563 while ( (idx < S(p->text)) && (T(p->text)[idx] != '|') ) {
1564 if ( T(p->text)[idx] == '\\' )
1565 ++idx;
1566 ++idx;
1567 }
1568
1569 Qprintf(f, "<%s%s>",
1570 block,
1571 alignments[ (colno < S(align)) ? T(align)[colno] : a_NONE ]);
1572 ___mkd_reparse(T(p->text)+first, idx-first, 0, f, "|");
1573 Qprintf(f, "</%s>\n", block);
1574 idx++;
1575 colno++;
1576 }
1577 if ( force )
1578 while (colno < S(align) ) {
1579 Qprintf(f, "<%s></%s>\n", block, block);
1580 ++colno;
1581 }
1582 Qstring("</tr>\n", f);
1583 return colno;
1584 }
1585
1586
1587 static int
printtable(Paragraph * pp,MMIOT * f)1588 printtable(Paragraph *pp, MMIOT *f)
1589 {
1590 /* header, dashes, then lines of content */
1591
1592 Line *hdr, *dash, *body;
1593 Istring align;
1594 int hcols,start;
1595 char *p;
1596 enum e_alignments it;
1597
1598 hdr = pp->text;
1599 dash= hdr->next;
1600 body= dash->next;
1601
1602 if ( T(hdr->text)[hdr->dle] == '|' ) {
1603 /* trim leading pipe off all lines
1604 */
1605 Line *r;
1606 for ( r = pp->text; r; r = r->next )
1607 r->dle ++;
1608 }
1609
1610 /* figure out cell alignments */
1611
1612 CREATE(align);
1613
1614 for (p=T(dash->text), start=dash->dle; start < S(dash->text); ) {
1615 char first, last;
1616 int end;
1617
1618 last=first=0;
1619 for (end=start ; (end < S(dash->text)) && p[end] != '|'; ++ end ) {
1620 if ( p[end] == '\\' )
1621 ++ end;
1622 else if ( !isspace(p[end]) ) {
1623 if ( !first) first = p[end];
1624 last = p[end];
1625 }
1626 }
1627 it = ( first == ':' ) ? (( last == ':') ? a_CENTER : a_LEFT)
1628 : (( last == ':') ? a_RIGHT : a_NONE );
1629
1630 EXPAND(align) = it;
1631 start = 1+end;
1632 }
1633
1634 Qstring("<table>\n", f);
1635 Qstring("<thead>\n", f);
1636 hcols = splat(hdr, "th", align, 0, f);
1637 Qstring("</thead>\n", f);
1638
1639 if ( hcols < S(align) )
1640 S(align) = hcols;
1641 else
1642 while ( hcols > S(align) )
1643 EXPAND(align) = a_NONE;
1644
1645 Qstring("<tbody>\n", f);
1646 for ( ; body; body = body->next)
1647 splat(body, "td", align, 1, f);
1648 Qstring("</tbody>\n", f);
1649 Qstring("</table>\n", f);
1650
1651 DELETE(align);
1652 return 1;
1653 }
1654
1655
1656 static int
printblock(Paragraph * pp,MMIOT * f)1657 printblock(Paragraph *pp, MMIOT *f)
1658 {
1659 static char *Begin[] = { "", "<p>", "<p style=\"text-align:center;\">" };
1660 static char *End[] = { "", "</p>","</p>" };
1661 Line *t = pp->text;
1662 int align = pp->align;
1663
1664 while (t) {
1665 if ( S(t->text) ) {
1666 if ( t->next && S(t->text) > 2
1667 && T(t->text)[S(t->text)-2] == ' '
1668 && T(t->text)[S(t->text)-1] == ' ' ) {
1669 push(T(t->text), S(t->text)-2, f);
1670 pushc(MKD_EOLN, f);
1671 pushc('\n', f);
1672 }
1673 else {
1674 ___mkd_tidy(&t->text);
1675 push(T(t->text), S(t->text), f);
1676 if ( t->next )
1677 pushc('\n', f);
1678 }
1679 }
1680 t = t->next;
1681 }
1682 Qstring(Begin[align], f);
1683 text(f);
1684 Qstring(End[align], f);
1685 return 1;
1686 }
1687
1688
1689 static void
printcode(Line * t,char * lang,MMIOT * f)1690 printcode(Line *t, char *lang, MMIOT *f)
1691 {
1692 int blanks;
1693
1694 if ( f->cb->e_codefmt ) {
1695 /* external code block formatter; copy the text into a buffer,
1696 * call the formatter to style it, then dump that styled text
1697 * directly to the queue
1698 */
1699 char *text;
1700 char *fmt;
1701 int size, copy_p;
1702 Line *p;
1703
1704 for (size=0, p = t; p; p = p->next )
1705 size += 1+S(p->text);
1706
1707 text = malloc(1+size);
1708
1709 for ( copy_p = 0; t ; t = t->next ) {
1710 memcpy(text+copy_p, T(t->text), S(t->text));
1711 copy_p += S(t->text);
1712 text[copy_p++] = '\n';
1713 }
1714 text[copy_p] = 0;
1715
1716 fmt = (*(f->cb->e_codefmt))(text, copy_p, (lang && lang[0]) ? lang : 0);
1717 free(text);
1718
1719 if ( fmt ) {
1720 Qwrite(fmt, strlen(fmt), f);
1721 if ( f->cb->e_free )
1722 (*(f->cb->e_free))(fmt, f->cb->e_data);
1723 return;
1724 }
1725 /* otherwise the external formatter failed and we need to
1726 * fall back to the traditional codeblock format
1727 */
1728 }
1729
1730 Qstring("<pre><code", f);
1731 if (lang && lang[0]) {
1732 Qstring(" class=\"", f);
1733 Qstring(lang, f);
1734 Qstring("\"", f);
1735 }
1736 Qstring(">", f);
1737 for ( blanks = 0; t ; t = t->next ) {
1738 if ( S(t->text) > t->dle ) {
1739 while ( blanks ) {
1740 Qchar('\n', f);
1741 --blanks;
1742 }
1743 code(f, T(t->text), S(t->text));
1744 Qchar('\n', f);
1745 }
1746 else blanks++;
1747 }
1748 Qstring("</code></pre>", f);
1749 }
1750
1751
1752 static void
printhtml(Line * t,MMIOT * f)1753 printhtml(Line *t, MMIOT *f)
1754 {
1755 int blanks;
1756
1757 for ( blanks=0; t ; t = t->next )
1758 if ( S(t->text) ) {
1759 for ( ; blanks; --blanks )
1760 Qchar('\n', f);
1761
1762 Qwrite(T(t->text), S(t->text), f);
1763 Qchar('\n', f);
1764 }
1765 else
1766 blanks++;
1767 }
1768
1769
1770 static void
htmlify_paragraphs(Paragraph * p,MMIOT * f)1771 htmlify_paragraphs(Paragraph *p, MMIOT *f)
1772 {
1773 ___mkd_emblock(f);
1774
1775 while (( p = display(p, f) )) {
1776 ___mkd_emblock(f);
1777 Qstring("\n\n", f);
1778 }
1779 }
1780
1781
1782 #ifdef GITHUB_CHECKBOX
1783 static void
li_htmlify(Paragraph * p,char * arguments,mkd_flag_t flags,MMIOT * f)1784 li_htmlify(Paragraph *p, char *arguments, mkd_flag_t flags, MMIOT *f)
1785 {
1786 ___mkd_emblock(f);
1787
1788 Qprintf(f, "<li");
1789 if ( arguments )
1790 Qprintf(f, " %s", arguments);
1791 if ( flags & GITHUB_CHECK )
1792 Qprintf(f, " class=\"github_checkbox\"");
1793 Qprintf(f, ">");
1794 #if CHECKBOX_AS_INPUT
1795 if ( flags & GITHUB_CHECK ) {
1796 Qprintf(f, "<input disabled=\"\" type=\"checkbox\"");
1797 if ( flags & IS_CHECKED )
1798 Qprintf(f, " checked=\"checked\"");
1799 Qprintf(f, "/>");
1800 }
1801 #else
1802 if ( flags & GITHUB_CHECK )
1803 Qprintf(f, flags & IS_CHECKED ? "☑" : "☐");
1804 #endif
1805
1806 htmlify_paragraphs(p, f);
1807
1808 Qprintf(f, "</li>");
1809 ___mkd_emblock(f);
1810 }
1811 #endif
1812
1813
1814 static void
htmlify(Paragraph * p,char * block,char * arguments,MMIOT * f)1815 htmlify(Paragraph *p, char *block, char *arguments, MMIOT *f)
1816 {
1817 ___mkd_emblock(f);
1818 if ( block )
1819 Qprintf(f, arguments ? "<%s %s>" : "<%s>", block, arguments);
1820
1821 htmlify_paragraphs(p, f);
1822
1823 if ( block )
1824 Qprintf(f, "</%s>", block);
1825 ___mkd_emblock(f);
1826 }
1827
1828
1829 static void
definitionlist(Paragraph * p,MMIOT * f)1830 definitionlist(Paragraph *p, MMIOT *f)
1831 {
1832 Line *tag;
1833
1834 if ( p ) {
1835 Qstring("<dl>\n", f);
1836
1837 for ( ; p ; p = p->next) {
1838 for ( tag = p->text; tag; tag = tag->next ) {
1839 Qstring("<dt>", f);
1840 ___mkd_reparse(T(tag->text), S(tag->text), 0, f, 0);
1841 Qstring("</dt>\n", f);
1842 }
1843
1844 htmlify(p->down, "dd", p->ident, f);
1845 Qchar('\n', f);
1846 }
1847
1848 Qstring("</dl>", f);
1849 }
1850 }
1851
1852
1853 static void
listdisplay(int typ,Paragraph * p,MMIOT * f)1854 listdisplay(int typ, Paragraph *p, MMIOT* f)
1855 {
1856 if ( p ) {
1857 Qprintf(f, "<%cl", (typ==UL)?'u':'o');
1858 if ( typ == AL )
1859 Qprintf(f, " type=\"a\"");
1860 Qprintf(f, ">\n");
1861
1862 for ( ; p ; p = p->next ) {
1863 #ifdef GITHUB_CHECKBOX
1864 li_htmlify(p->down, p->ident, p->flags, f);
1865 #else
1866 htmlify(p->down, "li", p->ident, f);
1867 #endif
1868 Qchar('\n', f);
1869 }
1870
1871 Qprintf(f, "</%cl>\n", (typ==UL)?'u':'o');
1872 }
1873 }
1874
1875
1876 /* dump out a Paragraph in the desired manner
1877 */
1878 static Paragraph*
display(Paragraph * p,MMIOT * f)1879 display(Paragraph *p, MMIOT *f)
1880 {
1881 if ( !p ) return 0;
1882
1883 switch ( p->typ ) {
1884 case STYLE:
1885 case WHITESPACE:
1886 break;
1887
1888 case HTML:
1889 printhtml(p->text, f);
1890 break;
1891
1892 case CODE:
1893 printcode(p->text, p->lang, f);
1894 break;
1895
1896 case QUOTE:
1897 htmlify(p->down, p->ident ? "div" : "blockquote", p->ident, f);
1898 break;
1899
1900 case UL:
1901 case OL:
1902 case AL:
1903 listdisplay(p->typ, p->down, f);
1904 break;
1905
1906 case DL:
1907 definitionlist(p->down, f);
1908 break;
1909
1910 case HR:
1911 Qstring("<hr />", f);
1912 break;
1913
1914 case HDR:
1915 printheader(p, f);
1916 break;
1917
1918 case TABLE:
1919 printtable(p, f);
1920 break;
1921
1922 case SOURCE:
1923 htmlify(p->down, 0, 0, f);
1924 break;
1925
1926 default:
1927 printblock(p, f);
1928 break;
1929 }
1930 return p->next;
1931 }
1932
1933
1934 /* dump out a list of footnotes
1935 */
1936 static void
mkd_extra_footnotes(MMIOT * m)1937 mkd_extra_footnotes(MMIOT *m)
1938 {
1939 int j, i;
1940 Footnote *t;
1941
1942 if ( m->footnotes->reference == 0 )
1943 return;
1944
1945 Csprintf(&m->out, "\n<div class=\"footnotes\">\n<hr/>\n<ol>\n");
1946
1947 for ( i=1; i <= m->footnotes->reference; i++ ) {
1948 for ( j=0; j < S(m->footnotes->note); j++ ) {
1949 t = &T(m->footnotes->note)[j];
1950 if ( (t->refnumber == i) && (t->flags & REFERENCED) ) {
1951 Csprintf(&m->out, "<li id=\"%s:%d\">\n",
1952 p_or_nothing(m), t->refnumber);
1953 htmlify(t->text, 0, 0, m);
1954 Csprintf(&m->out, "<a href=\"#%sref:%d\" rev=\"footnote\">↩</a>",
1955 p_or_nothing(m), t->refnumber);
1956 Csprintf(&m->out, "</li>\n");
1957 }
1958 }
1959 }
1960 Csprintf(&m->out, "</ol>\n</div>\n");
1961 }
1962
1963
1964 /* return a pointer to the compiled markdown
1965 * document.
1966 */
1967 int
mkd_document(Document * p,char ** res)1968 mkd_document(Document *p, char **res)
1969 {
1970 int size;
1971
1972 if ( p && p->compiled ) {
1973 if ( ! p->html ) {
1974 htmlify(p->code, 0, 0, p->ctx);
1975 if ( is_flag_set(p->ctx->flags, MKD_EXTRA_FOOTNOTE) )
1976 mkd_extra_footnotes(p->ctx);
1977 p->html = 1;
1978 size = S(p->ctx->out);
1979
1980 if ( (size == 0) || T(p->ctx->out)[size-1] ) {
1981 /* Add a null byte at the end of the generated html,
1982 * but pretend it doesn't exist.
1983 */
1984 EXPAND(p->ctx->out) = 0;
1985 --S(p->ctx->out);
1986 }
1987 }
1988
1989 *res = T(p->ctx->out);
1990 return S(p->ctx->out);
1991 }
1992 return EOF;
1993 }
1994