1
2 /*
3 * Copyright (c) 2003, Peter Strand <peter@zarquon.se>
4 *
5 * This source code is released for free distribution under the terms of the
6 * GNU General Public License.
7 *
8 * This module contains functions for generating tags for Haskell language
9 * files.
10 *
11 *
12 *
13 * Does not handle operators or infix definitions like:
14 * a `f` b = ...
15 *
16 */
17
18
19 /*
20 * INCLUDE FILES
21 */
22
23 #include "general.h" /* must always come first */
24
25 #include <string.h>
26
27 #include "parse.h"
28 #include "read.h"
29 #include "vstring.h"
30 #include "routines.h"
31
32
33 /*
34 * DATA DEFINITIONS
35 */
36 typedef enum {
37 K_TYPE, K_CONSTRUCTOR, K_FUNCTION, K_MODULE
38 } haskellKind;
39
40 static kindDefinition HaskellKinds [] = {
41 { true, 't', "typedef", "types" },
42 { true, 'c', "macro", "type constructors" },
43 { true, 'f', "function", "functions" },
44 { true, 'm', "namespace", "modules"}
45 };
46
47
48 typedef const unsigned char *custr;
49
50 /*
51 * FUNCTION DEFINITIONS
52 */
53
54
skip_rest_of_line(void)55 static void skip_rest_of_line(void)
56 {
57 int c;
58 do {
59 c = getcFromInputFile();
60 } while (c != EOF && c != '\n');
61 }
62
get_line(char * buf)63 static int get_line(char *buf)
64 {
65 int i = 0;
66 int c;
67 do {
68 c = getcFromInputFile();
69 buf[i++] = c;
70 } while (c != EOF && c != '\n' && i < 1000);
71 buf[i] = '\0';
72 return i;
73 }
74
get_next_char(void)75 static int get_next_char(void)
76 {
77 int c, nxt;
78 c = getcFromInputFile();
79 if (c == EOF)
80 return c;
81 nxt = getcFromInputFile();
82 if (nxt == EOF)
83 return c;
84 ungetcToInputFile(nxt);
85
86 if (c == '-' && nxt == '-') {
87 skip_rest_of_line();
88 return get_next_char();
89 }
90 if (c == '{' && nxt == '-') {
91 int last = '\0';
92 do {
93 last = c;
94 c = get_next_char();
95 } while (! (c == EOF || (last == '-' && c == '}')));
96 return get_next_char();
97 }
98 return c;
99 }
100
add_tag(const char * token,haskellKind kind,vString * name)101 static void add_tag(const char *token, haskellKind kind, vString *name)
102 {
103 int i;
104 for (i = 0; token[i] != '\0'; ++i)
105 vStringPut(name, token[i]);
106
107 makeSimpleTag(name, kind);
108 vStringClear(name);
109 }
110
isident(char c)111 static int isident(char c)
112 {
113 return isalnum(c) || c == '_' || c == '\'' || c == '$';
114 }
115
get_token(char * token,int n)116 static int get_token(char *token, int n)
117 {
118 int c = getcFromInputFile();
119 int i = n;
120 while (c != EOF && isident(c) && i < 1000) {
121 token[i] = c;
122 i++;
123 c = getcFromInputFile();
124 }
125 token[i] = '\0';
126 if (c == EOF)
127 return 0;
128 if (i != n) {
129 ungetcToInputFile(c);
130 return 1;
131 } else {
132 return 0;
133 }
134 }
135
136 enum Find_State { Find_Eq, Find_Constr, Get_Extr, Find_Extr, Find_Bar };
137
inside_datatype(vString * name)138 static int inside_datatype(vString *name)
139 {
140 enum Find_State st = Find_Eq;
141 int c;
142 char token[1001];
143
144 while (1) {
145 if (st == Find_Eq)
146 {
147 do {
148 c = get_next_char();
149 if (c == '\n') {
150 c = get_next_char();
151 if (! (c == ' ' || c == '\t')) {
152 return c;
153 }
154 }
155 } while (c != EOF && c != '=');
156 st = Find_Constr;
157 }
158 else if (st == Find_Constr)
159 {
160 do {
161 c = get_next_char();
162 } while (isspace(c));
163 if (!isupper(c)) {
164 skip_rest_of_line();
165 return '\n';
166 }
167 token[0] = c;
168 if (!get_token(token, 1))
169 return '\n';
170 add_tag(token, K_CONSTRUCTOR, name);
171 st = Find_Extr;
172 }
173 else if (st == Find_Extr)
174 {
175 c = get_next_char();
176 if (c == '{')
177 st = Get_Extr;
178 else if (c == '|')
179 st = Find_Constr;
180 else if (c == '\n') {
181 c = get_next_char();
182 if (! (c == ' ' || c == '\t')) {
183 return c;
184 }
185 }
186 else if (!isspace(c))
187 st = Find_Bar;
188 }
189 else if (st == Get_Extr)
190 {
191 do {
192 c = get_next_char();
193 } while (isspace(c));
194 if (c == EOF)
195 return c;
196 token[0] = c;
197 get_token(token, 1);
198 add_tag(token, K_FUNCTION, name);
199 do {
200 c = get_next_char();
201 if (c == '}') {
202 st = Find_Bar;
203 break;
204 }
205 } while (c != EOF && c != ',');
206 }
207 else if (st == Find_Bar)
208 {
209 do {
210 c = get_next_char();
211 if (c == '\n') {
212 c = get_next_char();
213 if (! (c == ' ' || c == '\t')) {
214 return c;
215 }
216 }
217 } while (c != EOF && c != '|');
218 st = Find_Constr;
219 }
220 }
221 return '\n';
222 }
223
findHaskellTags(int is_literate)224 static void findHaskellTags (int is_literate)
225 {
226 vString *name = vStringNew ();
227 char token[1001], arg[1001];
228 int c;
229 int in_tex_lit_code = 0;
230 c = get_next_char();
231
232 while (c != EOF)
233 {
234 if (c == '\n') {
235 c = get_next_char();
236 continue;
237 }
238
239 if (isspace(c)) {
240 skip_rest_of_line();
241 c = get_next_char();
242 continue;
243 }
244 if (is_literate && !in_tex_lit_code) {
245 if (c == '>') {
246 c = getcFromInputFile();
247 if (c == ' ') {
248 c = get_next_char();
249 if (!isident(c)) {
250 skip_rest_of_line();
251 c = get_next_char();
252 continue;
253 }
254 } else {
255 skip_rest_of_line();
256 c = get_next_char();
257 continue;
258 }
259 } else if (c == '\\') {
260 int n = get_line(token);
261 if (strncmp(token, "begin{code}", 11) == 0) {
262 in_tex_lit_code = 1;
263 c = get_next_char();
264 continue;
265 } else {
266 if (n > 0 && token[n-1] != '\n')
267 skip_rest_of_line();
268 else
269 c = get_next_char();
270 }
271 continue;
272 } else {
273 skip_rest_of_line();
274 c = get_next_char();
275 continue;
276 }
277 }
278 if (is_literate && in_tex_lit_code && c == '\\') {
279 if (strncmp(token, "end{code}", 9) == 0) {
280 in_tex_lit_code = 0;
281 c = get_next_char();
282 continue;
283 }
284 }
285 token[0] = c;
286 if (!isident(c)) {
287 skip_rest_of_line();
288 c = get_next_char();
289 continue;
290 }
291 if (!get_token(token, 1)) {
292 c = get_next_char();
293 continue;
294 }
295 do {
296 if ((c = getcFromInputFile()) == EOF)
297 return;
298 } while (c == ' ' || c == '\t');
299 arg[0] = c;
300 get_token(arg, 1);
301 if (strcmp(token, "data") == 0 || strcmp(token, "newtype") == 0) {
302 add_tag(arg, K_TYPE, name);
303 c = inside_datatype(name);
304 continue;
305 }
306 if (strcmp(token, "type") == 0)
307 add_tag(arg, K_TYPE, name);
308 else if (strcmp(token, "module") == 0)
309 add_tag(arg, K_MODULE, name);
310 else if (strcmp(token, "instance") == 0 ||
311 strcmp(token, "foreign") == 0 ||
312 strcmp(token, "import") == 0)
313 ;
314 else {
315 if (arg[0] != ':')
316 add_tag(token, K_FUNCTION, name);
317 }
318 skip_rest_of_line();
319 c = get_next_char();
320 }
321 vStringDelete(name);
322 }
323
findNormalHaskellTags(void)324 static void findNormalHaskellTags (void)
325 {
326 findHaskellTags (0);
327 }
328
findLiterateHaskellTags(void)329 static void findLiterateHaskellTags (void)
330 {
331 findHaskellTags (1);
332 }
333
HaskellParser(void)334 extern parserDefinition* HaskellParser (void)
335 {
336 static const char *const extensions [] = { "hs", NULL };
337 parserDefinition* def = parserNew ("Haskell");
338
339 def->kindTable = HaskellKinds;
340 def->kindCount = ARRAY_SIZE(HaskellKinds);
341 def->extensions = extensions;
342 def->parser = findNormalHaskellTags;
343 return def;
344 }
345
LiterateHaskellParser(void)346 extern parserDefinition* LiterateHaskellParser (void)
347 {
348 static const char *const extensions [] = { "lhs", NULL };
349 parserDefinition* def = parserNew ("Literate Haskell");
350 def->kindTable = HaskellKinds;
351 def->kindCount = ARRAY_SIZE(HaskellKinds);
352 def->extensions = extensions;
353 def->parser = findLiterateHaskellTags;
354 return def;
355 }
356