1 /**********************************************************************
2
3 markdown_lib.c - markdown in C using a PEG grammar.
4 (c) 2008 John MacFarlane (jgm at berkeley dot edu).
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License or the MIT
8 license. See LICENSE for details.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 ***********************************************************************/
16
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include "markdown_peg.h"
21
22 #define TABSTOP 4
23
24 /* preformat_text - allocate and copy text buffer while
25 * performing tab expansion. */
preformat_text(char * text)26 static GString *preformat_text(char *text) {
27 GString *buf;
28 char next_char;
29 int charstotab;
30
31 int len = 0;
32
33 buf = g_string_new("");
34
35 charstotab = TABSTOP;
36 while ((next_char = *text++) != '\0') {
37 switch (next_char) {
38 case '\t':
39 while (charstotab > 0)
40 g_string_append_c(buf, ' '), len++, charstotab--;
41 break;
42 case '\n':
43 g_string_append_c(buf, '\n'), len++, charstotab = TABSTOP;
44 break;
45 default:
46 g_string_append_c(buf, next_char), len++, charstotab--;
47 }
48 if (charstotab == 0)
49 charstotab = TABSTOP;
50 }
51 g_string_append(buf, "\n\n");
52 return(buf);
53 }
54
55 /* print_tree - print tree of elements, for debugging only. */
print_tree(element * elt,int indent)56 static void print_tree(element * elt, int indent) {
57 int i;
58 char * key;
59 while (elt != NULL) {
60 for (i = 0; i < indent; i++)
61 fputc(' ', stderr);
62 switch (elt->key) {
63 case LIST: key = "LIST"; break;
64 case RAW: key = "RAW"; break;
65 case SPACE: key = "SPACE"; break;
66 case LINEBREAK: key = "LINEBREAK"; break;
67 case ELLIPSIS: key = "ELLIPSIS"; break;
68 case EMDASH: key = "EMDASH"; break;
69 case ENDASH: key = "ENDASH"; break;
70 case APOSTROPHE: key = "APOSTROPHE"; break;
71 case SINGLEQUOTED: key = "SINGLEQUOTED"; break;
72 case DOUBLEQUOTED: key = "DOUBLEQUOTED"; break;
73 case STR: key = "STR"; break;
74 case LINK: key = "LINK"; break;
75 case IMAGE: key = "IMAGE"; break;
76 case CODE: key = "CODE"; break;
77 case HTML: key = "HTML"; break;
78 case EMPH: key = "EMPH"; break;
79 case STRONG: key = "STRONG"; break;
80 case PLAIN: key = "PLAIN"; break;
81 case PARA: key = "PARA"; break;
82 case LISTITEM: key = "LISTITEM"; break;
83 case BULLETLIST: key = "BULLETLIST"; break;
84 case ORDEREDLIST: key = "ORDEREDLIST"; break;
85 case H1: key = "H1"; break;
86 case H2: key = "H2"; break;
87 case H3: key = "H3"; break;
88 case H4: key = "H4"; break;
89 case H5: key = "H5"; break;
90 case H6: key = "H6"; break;
91 case BLOCKQUOTE: key = "BLOCKQUOTE"; break;
92 case VERBATIM: key = "VERBATIM"; break;
93 case HTMLBLOCK: key = "HTMLBLOCK"; break;
94 case HRULE: key = "HRULE"; break;
95 case REFERENCE: key = "REFERENCE"; break;
96 case NOTE: key = "NOTE"; break;
97 default: key = "?";
98 }
99 if ( elt->key == STR ) {
100 fprintf(stderr, "0x%p: %s '%s'\n", (void *)elt, key, elt->contents.str);
101 } else {
102 fprintf(stderr, "0x%p: %s\n", (void *)elt, key);
103 }
104 if (elt->children)
105 print_tree(elt->children, indent + 4);
106 elt = elt->next;
107 }
108 }
109
110 /* process_raw_blocks - traverses an element list, replacing any RAW elements with
111 * the result of parsing them as markdown text, and recursing into the children
112 * of parent elements. The result should be a tree of elements without any RAWs. */
process_raw_blocks(element * input,int extensions,element * references,element * notes)113 static element * process_raw_blocks(element *input, int extensions, element *references, element *notes) {
114 element *current = NULL;
115 element *last_child = NULL;
116 char *contents;
117 current = input;
118
119 while (current != NULL) {
120 if (current->key == RAW) {
121 /* \001 is used to indicate boundaries between nested lists when there
122 * is no blank line. We split the string by \001 and parse
123 * each chunk separately. */
124 contents = strtok(current->contents.str, "\001");
125 current->key = LIST;
126 current->children = parse_markdown(contents, extensions, references, notes);
127 last_child = current->children;
128 while ((contents = strtok(NULL, "\001"))) {
129 while (last_child->next != NULL)
130 last_child = last_child->next;
131 last_child->next = parse_markdown(contents, extensions, references, notes);
132 }
133 free(current->contents.str);
134 current->contents.str = NULL;
135 }
136 if (current->children != NULL)
137 current->children = process_raw_blocks(current->children, extensions, references, notes);
138 current = current->next;
139 }
140 return input;
141 }
142
143 /* markdown_to_gstring - convert markdown text to the output format specified.
144 * Returns a GString, which must be freed after use using g_string_free(). */
markdown_to_g_string(char * text,int extensions,int output_format)145 GString * markdown_to_g_string(char *text, int extensions, int output_format) {
146 element *result;
147 element *references;
148 element *notes;
149 GString *formatted_text;
150 GString *out;
151 out = g_string_new("");
152
153 formatted_text = preformat_text(text);
154
155 references = parse_references(formatted_text->str, extensions);
156 notes = parse_notes(formatted_text->str, extensions, references);
157 result = parse_markdown(formatted_text->str, extensions, references, notes);
158
159 result = process_raw_blocks(result, extensions, references, notes);
160
161 g_string_free(formatted_text, TRUE);
162
163 print_element_list(out, result, output_format, extensions);
164
165 free_element_list(result);
166 free_element_list(references);
167 return out;
168 }
169
170 /* markdown_to_string - convert markdown text to the output format specified.
171 * Returns a null-terminated string, which must be freed after use. */
markdown_to_string(char * text,int extensions,int output_format)172 char * markdown_to_string(char *text, int extensions, int output_format) {
173 GString *out;
174 char *char_out;
175 out = markdown_to_g_string(text, extensions, output_format);
176 char_out = out->str;
177 g_string_free(out, FALSE);
178 return char_out;
179 }
180
181 /* vim:set ts=4 sw=4: */
182