1 /**********************************************************************
2 
3   markdown_lib.c - markdown in C using a PEG grammar.
4   (c) 2008 John MacFarlane (jgm at berkeley dot edu).
5 
6   This program is free software; you can redistribute it and/or modify
7   it under the terms of the GNU General Public License or the MIT
8   license.  See LICENSE for details.
9 
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14 
15  ***********************************************************************/
16 
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include "markdown_peg.h"
21 
22 #define TABSTOP 4
23 
24 /* preformat_text - allocate and copy text buffer while
25  * performing tab expansion. */
preformat_text(char * text)26 static GString *preformat_text(char *text) {
27     GString *buf;
28     char next_char;
29     int charstotab;
30 
31     int len = 0;
32 
33     buf = g_string_new("");
34 
35     charstotab = TABSTOP;
36     while ((next_char = *text++) != '\0') {
37         switch (next_char) {
38         case '\t':
39             while (charstotab > 0)
40                 g_string_append_c(buf, ' '), len++, charstotab--;
41             break;
42         case '\n':
43             g_string_append_c(buf, '\n'), len++, charstotab = TABSTOP;
44             break;
45         default:
46             g_string_append_c(buf, next_char), len++, charstotab--;
47         }
48         if (charstotab == 0)
49             charstotab = TABSTOP;
50     }
51     g_string_append(buf, "\n\n");
52     return(buf);
53 }
54 
55 /* print_tree - print tree of elements, for debugging only. */
print_tree(element * elt,int indent)56 static void print_tree(element * elt, int indent) {
57     int i;
58     char * key;
59     while (elt != NULL) {
60         for (i = 0; i < indent; i++)
61             fputc(' ', stderr);
62         switch (elt->key) {
63             case LIST:               key = "LIST"; break;
64             case RAW:                key = "RAW"; break;
65             case SPACE:              key = "SPACE"; break;
66             case LINEBREAK:          key = "LINEBREAK"; break;
67             case ELLIPSIS:           key = "ELLIPSIS"; break;
68             case EMDASH:             key = "EMDASH"; break;
69             case ENDASH:             key = "ENDASH"; break;
70             case APOSTROPHE:         key = "APOSTROPHE"; break;
71             case SINGLEQUOTED:       key = "SINGLEQUOTED"; break;
72             case DOUBLEQUOTED:       key = "DOUBLEQUOTED"; break;
73             case STR:                key = "STR"; break;
74             case LINK:               key = "LINK"; break;
75             case IMAGE:              key = "IMAGE"; break;
76             case CODE:               key = "CODE"; break;
77             case HTML:               key = "HTML"; break;
78             case EMPH:               key = "EMPH"; break;
79             case STRONG:             key = "STRONG"; break;
80             case PLAIN:              key = "PLAIN"; break;
81             case PARA:               key = "PARA"; break;
82             case LISTITEM:           key = "LISTITEM"; break;
83             case BULLETLIST:         key = "BULLETLIST"; break;
84             case ORDEREDLIST:        key = "ORDEREDLIST"; break;
85             case H1:                 key = "H1"; break;
86             case H2:                 key = "H2"; break;
87             case H3:                 key = "H3"; break;
88             case H4:                 key = "H4"; break;
89             case H5:                 key = "H5"; break;
90             case H6:                 key = "H6"; break;
91             case BLOCKQUOTE:         key = "BLOCKQUOTE"; break;
92             case VERBATIM:           key = "VERBATIM"; break;
93             case HTMLBLOCK:          key = "HTMLBLOCK"; break;
94             case HRULE:              key = "HRULE"; break;
95             case REFERENCE:          key = "REFERENCE"; break;
96             case NOTE:               key = "NOTE"; break;
97             default:                 key = "?";
98         }
99         if ( elt->key == STR ) {
100             fprintf(stderr, "0x%p: %s   '%s'\n", (void *)elt, key, elt->contents.str);
101         } else {
102             fprintf(stderr, "0x%p: %s\n", (void *)elt, key);
103         }
104         if (elt->children)
105             print_tree(elt->children, indent + 4);
106         elt = elt->next;
107     }
108 }
109 
110 /* process_raw_blocks - traverses an element list, replacing any RAW elements with
111  * the result of parsing them as markdown text, and recursing into the children
112  * of parent elements.  The result should be a tree of elements without any RAWs. */
process_raw_blocks(element * input,int extensions,element * references,element * notes)113 static element * process_raw_blocks(element *input, int extensions, element *references, element *notes) {
114     element *current = NULL;
115     element *last_child = NULL;
116     char *contents;
117     current = input;
118 
119     while (current != NULL) {
120         if (current->key == RAW) {
121             /* \001 is used to indicate boundaries between nested lists when there
122              * is no blank line.  We split the string by \001 and parse
123              * each chunk separately. */
124             contents = strtok(current->contents.str, "\001");
125             current->key = LIST;
126             current->children = parse_markdown(contents, extensions, references, notes);
127             last_child = current->children;
128             while ((contents = strtok(NULL, "\001"))) {
129                 while (last_child->next != NULL)
130                     last_child = last_child->next;
131                 last_child->next = parse_markdown(contents, extensions, references, notes);
132             }
133             free(current->contents.str);
134             current->contents.str = NULL;
135         }
136         if (current->children != NULL)
137             current->children = process_raw_blocks(current->children, extensions, references, notes);
138         current = current->next;
139     }
140     return input;
141 }
142 
143 /* markdown_to_gstring - convert markdown text to the output format specified.
144  * Returns a GString, which must be freed after use using g_string_free(). */
markdown_to_g_string(char * text,int extensions,int output_format)145 GString * markdown_to_g_string(char *text, int extensions, int output_format) {
146     element *result;
147     element *references;
148     element *notes;
149     GString *formatted_text;
150     GString *out;
151     out = g_string_new("");
152 
153     formatted_text = preformat_text(text);
154 
155     references = parse_references(formatted_text->str, extensions);
156     notes = parse_notes(formatted_text->str, extensions, references);
157     result = parse_markdown(formatted_text->str, extensions, references, notes);
158 
159     result = process_raw_blocks(result, extensions, references, notes);
160 
161     g_string_free(formatted_text, TRUE);
162 
163     print_element_list(out, result, output_format, extensions);
164 
165     free_element_list(result);
166     free_element_list(references);
167     return out;
168 }
169 
170 /* markdown_to_string - convert markdown text to the output format specified.
171  * Returns a null-terminated string, which must be freed after use. */
markdown_to_string(char * text,int extensions,int output_format)172 char * markdown_to_string(char *text, int extensions, int output_format) {
173     GString *out;
174     char *char_out;
175     out = markdown_to_g_string(text, extensions, output_format);
176     char_out = out->str;
177     g_string_free(out, FALSE);
178     return char_out;
179 }
180 
181 /* vim:set ts=4 sw=4: */
182