1 /* smi - simple markup interpreter
2 * Copyright (C) <2014> Chris Hutchinson <portmaster bsdforge com>
3 * based on work (C) <2007, 2008> Enno boland <g s01 de>
4 *
5 * See LICENSE for terms, and usage information
6 */
7
8 #include <stdlib.h>
9 #include <stdio.h>
10 #include <stdarg.h>
11 #include <string.h>
12
13 #define BUFFERSIZE 512
14 #define LENGTH(x) sizeof(x)/sizeof(x[0])
15 #define ADDC(b,i) if(i % BUFFERSIZE == 0) \
16 { b = realloc(b, (i + BUFFERSIZE) * sizeof(b)); if(!b) eprint("Malloc failed."); } b[i]
17
18
19 typedef int (*Parser)(const char *, const char *, int);
20 struct Tag {
21 char *search;
22 int process;
23 char *before, *after;
24 };
25
26
27 void eprint(const char *format, ...); /* Prints error and exits */
28 int doamp(const char *begin, const char *end, int newblock);
29 /* Parser for & */
30 int dogtlt(const char *begin, const char *end, int newblock);
31 /* Parser for < and > */
32 int dohtml(const char *begin, const char *end, int newblock);
33 /* Parser for html */
34 int dolineprefix(const char *begin, const char *end, int newblock);
35 /* Parser for line prefix tags */
36 int dolink(const char *begin, const char *end, int newblock);
37 /* Parser for links and images */
38 int dolist(const char *begin, const char *end, int newblock);
39 /* Parser for lists */
40 int doparagraph(const char *begin, const char *end, int newblock);
41 /* Parser for paragraphs */
42 int doreplace(const char *begin, const char *end, int newblock);
43 /* Parser for simple replaces */
44 int doshortlink(const char *begin, const char *end, int newblock);
45 /* Parser for links and images */
46 int dosurround(const char *begin, const char *end, int newblock);
47 /* Parser for surrounding tags */
48 int dounderline(const char *begin, const char *end, int newblock);
49 /* Parser for underline tags */
50 void hprint(const char *begin, const char *end); /* escapes HTML and prints it to stdout*/
51 void process(const char *begin, const char *end, int isblock);
52 /* Processes range between begin and end. */
53
54 Parser parsers[] = { dounderline, dohtml, dolineprefix, dolist, doparagraph,
55 dogtlt, dosurround, dolink, doshortlink, doamp, doreplace };
56 /* list of parsers */
57 FILE *source;
58 unsigned int nohtml = 0;
59 struct Tag lineprefix[] = {
60 { " ", 0, "<pre><code>", "</code></pre>" },
61 { "\t", 0, "<pre><code>", "</code></pre>" },
62 { "> ", 2, "<blockquote>", "</blockquote>" },
63 { "###### ", 1, "<h6>", "</h6>" },
64 { "##### ", 1, "<h5>", "</h5>" },
65 { "#### ", 1, "<h4>", "</h4>" },
66 { "### ", 1, "<h3>", "</h3>" },
67 { "## ", 1, "<h2>", "</h2>" },
68 { "# ", 1, "<h1>", "</h1>" },
69 { "- - -\n", 1, "<hr />", ""},
70 };
71 struct Tag underline[] = {
72 { "=", 1, "<h1>", "</h1>\n" },
73 { "-", 1, "<h2>", "</h2>\n" },
74 };
75 struct Tag surround[] = {
76 { "``", 0, "<code>", "</code>" },
77 { "`", 0, "<code>", "</code>" },
78 { "___", 1, "<strong><em>", "</em></strong>" },
79 { "***", 1, "<strong><em>", "</em></strong>" },
80 { "__", 1, "<strong>", "</strong>" },
81 { "**", 1, "<strong>", "</strong>" },
82 { "_", 1, "<em>", "</em>" },
83 { "*", 1, "<em>", "</em>" },
84 };
85 char * replace[][2] = {
86 { "\\\\", "\\" },
87 { "\\`", "`" },
88 { "\\*", "*" },
89 { "\\_", "_" },
90 { "\\{", "{" },
91 { "\\}", "}" },
92 { "\\[", "[" },
93 { "\\]", "]" },
94 { "\\(", "(" },
95 { "\\)", ")" },
96 { "\\#", "#" },
97 { "\\+", "+" },
98 { "\\-", "-" },
99 { "\\.", "." },
100 { "\\!", "!" },
101 };
102 char * insert[][2] = {
103 { " \n", "<br />" },
104 };
105
106 void
eprint(const char * format,...)107 eprint(const char *format, ...) {
108 va_list ap;
109
110 va_start(ap, format);
111 vfprintf(stderr, format, ap);
112 va_end(ap);
113 exit(EXIT_FAILURE);
114 }
115
116 int
doamp(const char * begin,const char * end,int newblock)117 doamp(const char *begin, const char *end, int newblock) {
118 const char *p;
119
120 if(*begin != '&')
121 return 0;
122 if(!nohtml) {
123 for(p = begin + 1; p != end && !strchr("; \\\n\t", *p); p++);
124 if(p == end || *p == ';')
125 return 0;
126 }
127 fputs("&", stdout);
128 return 1;
129 }
130
131 int
dogtlt(const char * begin,const char * end,int newblock)132 dogtlt(const char *begin, const char *end, int newblock) {
133 int brpos;
134 char c;
135
136 if(nohtml || begin + 1 >= end)
137 return 0;
138 brpos = begin[1] == '>';
139 if(!brpos && *begin != '<')
140 return 0;
141 c = begin[brpos ? 0 : 1];
142 if(!brpos && (c < 'a' || c > 'z') && (c < 'A' || c > 'Z')) {
143 fputs("<",stdout);
144 return 1;
145 }
146 else if(brpos && (c < 'a' || c > 'z') && (c < 'A' || c > 'Z') && !strchr("/\"'",c)) {
147 printf("%c>",c);
148 return 2;
149 }
150 return 0;
151 }
152
153 int
dohtml(const char * begin,const char * end,int newblock)154 dohtml(const char *begin, const char *end, int newblock) {
155 const char *p, *tag, *tagend;
156
157 if(nohtml || !newblock || *begin == '\n' || begin + 2 >= end)
158 return 0;
159 p = begin;
160 if(p[1] == '\n')
161 p++;
162 if(p[1] != '<' || strchr(" /\n\t\\", p[2]))
163 return 0;
164 tag = p + 2;
165 p += 2;
166 for(; !strchr(" >", *p); p++);
167 tagend = p;
168 while((p = strstr(p, "\n</")) && p < end) {
169 p += 3;
170 if(strncmp(p, tag, tagend - tag) == 0 && p[tagend - tag] == '>') {
171 p++;
172 fwrite(begin, sizeof(char), p - begin + tagend - tag, stdout);
173 puts("\n");
174 return -(p - begin + tagend - tag);
175 }
176 }
177 return 0;
178 }
179
180 int
dolineprefix(const char * begin,const char * end,int newblock)181 dolineprefix(const char *begin, const char *end, int newblock) {
182 unsigned int i, j, l;
183 char *buffer;
184 const char *p;
185
186 if(newblock)
187 p = begin;
188 else if(*begin == '\n')
189 p = begin + 1;
190 else
191 return 0;
192 for(i = 0; i < LENGTH(lineprefix); i++) {
193 l = strlen(lineprefix[i].search);
194 if(end - p < l)
195 continue;
196 if(strncmp(lineprefix[i].search, p, l))
197 continue;
198 if(*begin == '\n')
199 fputc('\n', stdout);
200 fputs(lineprefix[i].before, stdout);
201 if(lineprefix[i].search[l-1] == '\n') {
202 fputc('\n', stdout);
203 return l;
204 }
205 if(!(buffer = malloc(BUFFERSIZE)))
206 eprint("Malloc failed.");
207 buffer[0] = '\0';
208 for(j = 0, p += l; p < end; p++, j++) {
209 ADDC(buffer, j) = *p;
210 if(*p == '\n' && p + l < end) {
211 if(strncmp(lineprefix[i].search, p + 1, l) != 0)
212 break;
213 p += l;
214 }
215 }
216 ADDC(buffer, j) = '\0';
217 if(lineprefix[i].process)
218 process(buffer, buffer + strlen(buffer), lineprefix[i].process >= 2);
219 else
220 hprint(buffer, buffer + strlen(buffer));
221 puts(lineprefix[i].after);
222 free(buffer);
223 return -(p - begin);
224 }
225 return 0;
226 }
227
228 int
dolink(const char * begin,const char * end,int newblock)229 dolink(const char *begin, const char *end, int newblock) {
230 int img;
231 const char *desc, *link, *p, *q, *descend, *linkend;
232
233 if(*begin == '[')
234 img = 0;
235 else if(strncmp(begin, "![", 2) == 0)
236 img = 1;
237 else
238 return 0;
239 p = desc = begin + 1 + img;
240 if(!(p = strstr(desc, "](")) || p > end)
241 return 0;
242 for(q = strstr(desc, "!["); q && q < end && q < p; q = strstr(q + 1, "!["))
243 if(!(p = strstr(p + 1, "](")) || p > end)
244 return 0;
245 descend = p;
246 link = p + 2;
247 if(!(p = strstr(link, ")")) || p > end)
248 return 0;
249 linkend = p;
250 if(img) {
251 fputs("<img src=\"", stdout);
252 hprint(link, linkend);
253 fputs("\" alt=\"", stdout);
254 hprint(desc, descend);
255 fputs("\" />", stdout);
256 }
257 else {
258 fputs("<a href=\"", stdout);
259 hprint(link, linkend);
260 fputs("\">", stdout);
261 process(desc, descend, 0);
262 fputs("</a>", stdout);
263 }
264 return p + 1 - begin;
265 }
266
267 int
dolist(const char * begin,const char * end,int newblock)268 dolist(const char *begin, const char *end, int newblock) {
269 unsigned int i, j, indent, run, ul, isblock;
270 const char *p, *q;
271 char *buffer;
272
273 isblock = 0;
274 if(newblock)
275 p = begin;
276 else if(*begin == '\n')
277 p = begin + 1;
278 else
279 return 0;
280 q = p;
281 if(*p == '-' || *p == '*' || *p == '+')
282 ul = 1;
283 else {
284 ul = 0;
285 for(; p < end && *p >= '0' && *p <= '9'; p++);
286 if(p >= end || *p != '.')
287 return 0;
288 }
289 p++;
290 if(p >= end || !(*p == ' ' || *p == '\t'))
291 return 0;
292 for(p++; p != end && (*p == ' ' || *p == '\t'); p++);
293 indent = p - q;
294 if(!(buffer = malloc(BUFFERSIZE)))
295 eprint("Malloc failed.");
296 if(!newblock)
297 putchar('\n');
298 fputs(ul ? "<ul>\n" : "<ol>\n", stdout);
299 run = 1;
300 for(; p < end && run; p++) {
301 for(i = 0; p < end && run; p++, i++) {
302 if(*p == '\n') {
303 if(p + 1 == end)
304 break;
305 else if(p[1] == '\n') {
306 p++;
307 ADDC(buffer, i) = '\n';
308 i++;
309 run = 0;
310 isblock++;
311 }
312 q = p + 1;
313 j = 0;
314 if(ul && (*q == '-' || *q == '*' || *q == '+'))
315 j = 1;
316 else if(!ul) {
317 for(; q + j != end && q[j] >= '0' && q[j] <= '9' && j < indent; j++);
318 if(q + j == end)
319 break;
320 if(j > 0 && q[j] == '.')
321 j++;
322 else
323 j = 0;
324 }
325 if(q + indent < end)
326 for(; (q[j] == ' ' || q[j] == '\t') && j < indent; j++);
327 if(j == indent) {
328 ADDC(buffer, i) = '\n';
329 i++;
330 p += indent;
331 run = 1;
332 if(*q == ' ' || *q == '\t')
333 p++;
334 else
335 break;
336 }
337 }
338 ADDC(buffer, i) = *p;
339 }
340 ADDC(buffer, i) = '\0';
341 fputs("<li>", stdout);
342 process(buffer, buffer + i, isblock > 1 || (isblock == 1 && run));
343 fputs("</li>\n", stdout);
344 }
345 fputs(ul ? "</ul>\n" : "</ol>\n", stdout);
346 free(buffer);
347 p--;
348 while(*(--p) == '\n');
349 return -(p - begin + 1);
350 }
351
352 int
doparagraph(const char * begin,const char * end,int newblock)353 doparagraph(const char *begin, const char *end, int newblock) {
354 const char *p;
355
356 if(!newblock)
357 return 0;
358 p = strstr(begin, "\n\n");
359 if(!p || p > end)
360 p = end;
361 if(p - begin <= 1)
362 return 0;
363 fputs("<p>\n", stdout);
364 process(begin, p, 0);
365 fputs("</p>\n", stdout);
366 return -(p - begin);
367 }
368
369 int
doreplace(const char * begin,const char * end,int newblock)370 doreplace(const char *begin, const char *end, int newblock) {
371 unsigned int i, l;
372
373 for(i = 0; i < LENGTH(insert); i++)
374 if(strncmp(insert[i][0], begin, strlen(insert[i][0])) == 0)
375 fputs(insert[i][1], stdout);
376 for(i = 0; i < LENGTH(replace); i++) {
377 l = strlen(replace[i][0]);
378 if(end - begin < l)
379 continue;
380 if(strncmp(replace[i][0], begin, l) == 0) {
381 fputs(replace[i][1], stdout);
382 return l;
383 }
384 }
385 return 0;
386 }
387
388 int
doshortlink(const char * begin,const char * end,int newblock)389 doshortlink(const char *begin, const char *end, int newblock) {
390 const char *p, *c;
391 int ismail = 0;
392
393 if(*begin != '<')
394 return 0;
395 for(p = begin + 1; p != end; p++) {
396 switch(*p) {
397 case ' ':
398 case '\t':
399 case '\n':
400 return 0;
401 case '#':
402 case ':':
403 ismail = -1;
404 break;
405 case '@':
406 if(ismail == 0)
407 ismail = 1;
408 break;
409 case '>':
410 if(ismail == 0)
411 return 0;
412 fputs("<a href=\"", stdout);
413 if(ismail == 1) {
414 /* mailto: */
415 fputs("mailto:", stdout);
416 for(c = begin + 1; *c != '>'; c++)
417 printf("&#%u;", *c);
418 fputs("\">", stdout);
419 for(c = begin + 1; *c != '>'; c++)
420 printf("&#%u;", *c);
421 }
422 else {
423 hprint(begin + 1, p);
424 fputs("\">", stdout);
425 hprint(begin + 1, p);
426 }
427 fputs("</a>", stdout);
428 return p - begin + 1;
429 }
430 }
431 return 0;
432 }
433
434 int
dosurround(const char * begin,const char * end,int newblock)435 dosurround(const char *begin, const char *end, int newblock) {
436 unsigned int i, l;
437 const char *p, *start, *stop;
438
439 for(i = 0; i < LENGTH(surround); i++) {
440 l = strlen(surround[i].search);
441 if(end - begin < 2*l || strncmp(begin, surround[i].search, l) != 0)
442 continue;
443 start = begin + l;
444 p = start - 1;
445 do {
446 p = strstr(p + 1, surround[i].search);
447 } while(p && p[-1] == '\\');
448 if(!p || p >= end ||
449 !(stop = strstr(start, surround[i].search)) || stop >= end)
450 continue;
451 fputs(surround[i].before, stdout);
452 if(surround[i].process)
453 process(start, stop, 0);
454 else
455 hprint(start, stop);
456 fputs(surround[i].after, stdout);
457 return stop - begin + l;
458 }
459 return 0;
460 }
461
462 int
dounderline(const char * begin,const char * end,int newblock)463 dounderline(const char *begin, const char *end, int newblock) {
464 unsigned int i, j, l;
465 const char *p;
466
467 if(!newblock)
468 return 0;
469 p = begin;
470 for(l = 0; p + l != end && p[l] != '\n'; l++);
471 p += l + 1;
472 if(l == 0)
473 return 0;
474 for(i = 0; i < LENGTH(underline); i++) {
475 for(j = 0; p + j != end && p[j] != '\n' && p[j] == underline[i].search[0]; j++);
476 if(j >= l) {
477 fputs(underline[i].before, stdout);
478 if(underline[i].process)
479 process(begin, begin + l, 0);
480 else
481 hprint(begin, begin + l);
482 fputs(underline[i].after, stdout);
483 return -(j + p - begin);
484 }
485 }
486 return 0;
487 }
488
489 void
hprint(const char * begin,const char * end)490 hprint(const char *begin, const char *end) {
491 const char *p;
492
493 for(p = begin; p != end; p++) {
494 if(*p == '&')
495 fputs("&", stdout);
496 else if(*p == '"')
497 fputs(""", stdout);
498 else if(*p == '>')
499 fputs(">", stdout);
500 else if(*p == '<')
501 fputs("<", stdout);
502 else
503 putchar(*p);
504 }
505 }
506
507 void
process(const char * begin,const char * end,int newblock)508 process(const char *begin, const char *end, int newblock) {
509 const char *p, *q;
510 int affected;
511 unsigned int i;
512
513 for(p = begin; p != end;) {
514 if(newblock)
515 while(*p == '\n')
516 if (++p == end)
517 return;
518 affected = 0;
519 for(i = 0; i < LENGTH(parsers) && affected == 0; i++)
520 affected = parsers[i](p, end, newblock);
521 p += abs(affected);
522 if(!affected) {
523 if(nohtml)
524 hprint(p, p + 1);
525 else
526 putchar(*p);
527 p++;
528 }
529 for(q = p; q != end && *q == '\n'; q++);
530 if(q == end)
531 return;
532 else if(p[0] == '\n' && p + 1 != end && p[1] == '\n')
533 newblock = 1;
534 else
535 newblock = affected < 0;
536 }
537 }
538
539 int
main(int argc,char * argv[])540 main(int argc, char *argv[]) {
541 char *buffer;
542 int s;
543 unsigned long len, bsize;
544
545 source = stdin;
546 if(argc > 1 && strcmp("-v", argv[1]) == 0)
547 eprint("Simple Markup Interpreter %s (C) Chris Hutchinson\n",VERSION);
548 else if(argc > 1 && strcmp("-h", argv[1]) == 0)
549 eprint("Usage %s [-n] [file]\n -n escape html strictly\n",argv[0]);
550 if(argc > 1 && strcmp("-n", argv[1]) == 0)
551 nohtml = 1;
552 if(argc > 1 + nohtml && strcmp("-", argv[1 + nohtml]) != 0
553 && !(source = fopen(argv[1 + nohtml],"r")))
554 eprint("Cannot open file `%s`\n",argv[1 + nohtml]);
555 bsize = 2 * BUFFERSIZE;
556 if(!(buffer = malloc(bsize)))
557 eprint("Malloc failed.");
558 len = 0;
559 while((s = fread(buffer + len, 1, BUFFERSIZE, source))) {
560 len += s;
561 if(BUFFERSIZE + len + 1 > bsize) {
562 bsize += BUFFERSIZE;
563 if(!(buffer = realloc(buffer, bsize)))
564 eprint("Malloc failed.");
565 }
566 }
567 buffer[len] = '\0';
568 process(buffer, buffer + len, 1);
569 fclose(source);
570 free(buffer);
571 return EXIT_SUCCESS;
572 }
573