1 /*
2 * $Id: tex.c 666 2008-05-15 17:47:31Z dfishburn $
3 *
4 * Copyright (c) 2008, David Fishburn
5 *
6 * This source code is released for free distribution under the terms of the
7 * GNU General Public License.
8 *
9 * This module contains functions for generating tags for TeX language files.
10 *
11 * Tex language reference:
12 * http://en.wikibooks.org/wiki/TeX#The_Structure_of_TeX
13 */
14
15 /*
16 * INCLUDE FILES
17 */
18 #include "general.h" /* must always come first */
19 #include <ctype.h> /* to define isalpha () */
20 #include <setjmp.h>
21 #ifdef DEBUG
22 #include <stdio.h>
23 #endif
24
25 #include "debug.h"
26 #include "entry.h"
27 #include "keyword.h"
28 #include "parse.h"
29 #include "read.h"
30 #include "routines.h"
31 #include "vstring.h"
32
33 /*
34 * MACROS
35 */
36 #define isType(token,t) (boolean) ((token)->type == (t))
37 #define isKeyword(token,k) (boolean) ((token)->keyword == (k))
38
39 /*
40 * DATA DECLARATIONS
41 */
42
43 typedef enum eException { ExceptionNone, ExceptionEOF } exception_t;
44
45 /*
46 * Used to specify type of keyword.
47 */
48 typedef enum eKeywordId {
49 KEYWORD_NONE = -1,
50 KEYWORD_chapter,
51 KEYWORD_section,
52 KEYWORD_subsection,
53 KEYWORD_subsubsection,
54 KEYWORD_part,
55 KEYWORD_paragraph,
56 KEYWORD_subparagraph,
57 KEYWORD_include
58 } keywordId;
59
60 /* Used to determine whether keyword is valid for the token language and
61 * what its ID is.
62 */
63 typedef struct sKeywordDesc {
64 const char *name;
65 keywordId id;
66 } keywordDesc;
67
68 typedef enum eTokenType {
69 TOKEN_UNDEFINED,
70 TOKEN_CHARACTER,
71 TOKEN_CLOSE_PAREN,
72 TOKEN_COMMA,
73 TOKEN_KEYWORD,
74 TOKEN_OPEN_PAREN,
75 TOKEN_IDENTIFIER,
76 TOKEN_STRING,
77 TOKEN_OPEN_CURLY,
78 TOKEN_CLOSE_CURLY,
79 TOKEN_OPEN_SQUARE,
80 TOKEN_CLOSE_SQUARE,
81 TOKEN_QUESTION_MARK,
82 TOKEN_STAR
83 } tokenType;
84
85 typedef struct sTokenInfo {
86 tokenType type;
87 keywordId keyword;
88 vString * string;
89 vString * scope;
90 unsigned long lineNumber;
91 long filePosition;
92 } tokenInfo;
93
94 /*
95 * DATA DEFINITIONS
96 */
97
98 static langType Lang_js;
99
100 static jmp_buf Exception;
101
102 typedef enum {
103 TEXTAG_CHAPTER,
104 TEXTAG_SECTION,
105 TEXTAG_SUBSECTION,
106 TEXTAG_SUBSUBSECTION,
107 TEXTAG_PART,
108 TEXTAG_PARAGRAPH,
109 TEXTAG_SUBPARAGRAPH,
110 TEXTAG_INCLUDE,
111 TEXTAG_COUNT
112 } texKind;
113
114 static kindOption TexKinds [] = {
115 { TRUE, 'c', "chapter", "chapters" },
116 { TRUE, 's', "section", "sections" },
117 { TRUE, 'u', "subsection", "subsections" },
118 { TRUE, 'b', "subsubsection", "subsubsections" },
119 { TRUE, 'p', "part", "parts" },
120 { TRUE, 'P', "paragraph", "paragraphs" },
121 { TRUE, 'G', "subparagraph", "subparagraphs" },
122 { TRUE, 'i', "include", "includes" }
123 };
124
125 static const keywordDesc TexKeywordTable [] = {
126 /* keyword keyword ID */
127 { "chapter", KEYWORD_chapter },
128 { "section", KEYWORD_section },
129 { "subsection", KEYWORD_subsection },
130 { "subsubsection", KEYWORD_subsubsection },
131 { "part", KEYWORD_part },
132 { "paragraph", KEYWORD_paragraph },
133 { "subparagraph", KEYWORD_subparagraph },
134 { "include", KEYWORD_include }
135 };
136
137 /*
138 * FUNCTION DEFINITIONS
139 */
140
isIdentChar(const int c)141 static boolean isIdentChar (const int c)
142 {
143 return (boolean)
144 (isalpha (c) || isdigit (c) || c == '$' ||
145 c == '_' || c == '#' || c == '-' || c == '.');
146 }
147
buildTexKeywordHash(void)148 static void buildTexKeywordHash (void)
149 {
150 const size_t count = sizeof (TexKeywordTable) /
151 sizeof (TexKeywordTable [0]);
152 size_t i;
153 for (i = 0 ; i < count ; ++i)
154 {
155 const keywordDesc* const p = &TexKeywordTable [i];
156 addKeyword (p->name, Lang_js, (int) p->id);
157 }
158 }
159
newToken(void)160 static tokenInfo *newToken (void)
161 {
162 tokenInfo *const token = xMalloc (1, tokenInfo);
163
164 token->type = TOKEN_UNDEFINED;
165 token->keyword = KEYWORD_NONE;
166 token->string = vStringNew ();
167 token->scope = vStringNew ();
168 token->lineNumber = getSourceLineNumber ();
169 token->filePosition = getInputFilePosition ();
170
171 return token;
172 }
173
deleteToken(tokenInfo * const token)174 static void deleteToken (tokenInfo *const token)
175 {
176 vStringDelete (token->string);
177 vStringDelete (token->scope);
178 eFree (token);
179 }
180
181 /*
182 * Tag generation functions
183 */
184
makeConstTag(tokenInfo * const token,const texKind kind)185 static void makeConstTag (tokenInfo *const token, const texKind kind)
186 {
187 if (TexKinds [kind].enabled )
188 {
189 const char *const name = vStringValue (token->string);
190 tagEntryInfo e;
191 initTagEntry (&e, name);
192
193 e.lineNumber = token->lineNumber;
194 e.filePosition = token->filePosition;
195 e.kindName = TexKinds [kind].name;
196 e.kind = TexKinds [kind].letter;
197
198 makeTagEntry (&e);
199 }
200 }
201
makeTexTag(tokenInfo * const token,texKind kind)202 static void makeTexTag (tokenInfo *const token, texKind kind)
203 {
204 vString * fulltag;
205
206 if (TexKinds [kind].enabled)
207 {
208 /*
209 * If a scope has been added to the token, change the token
210 * string to include the scope when making the tag.
211 */
212 if ( vStringLength (token->scope) > 0 )
213 {
214 fulltag = vStringNew ();
215 vStringCopy (fulltag, token->scope);
216 vStringCatS (fulltag, ".");
217 vStringCatS (fulltag, vStringValue (token->string));
218 vStringTerminate (fulltag);
219 vStringCopy (token->string, fulltag);
220 vStringDelete (fulltag);
221 }
222 makeConstTag (token, kind);
223 }
224 }
225
226 /*
227 * Parsing functions
228 */
229
parseString(vString * const string,const int delimiter)230 static void parseString (vString *const string, const int delimiter)
231 {
232 boolean end = FALSE;
233 while (! end)
234 {
235 int c = fileGetc ();
236 if (c == EOF)
237 end = TRUE;
238 else if (c == '\\')
239 {
240 c = fileGetc(); /* This maybe a ' or ". */
241 vStringPut (string, c);
242 }
243 else if (c == delimiter)
244 end = TRUE;
245 else
246 vStringPut (string, c);
247 }
248 vStringTerminate (string);
249 }
250
251 /*
252 * Read a C identifier beginning with "firstChar" and places it into
253 * "name".
254 */
parseIdentifier(vString * const string,const int firstChar)255 static void parseIdentifier (vString *const string, const int firstChar)
256 {
257 int c = firstChar;
258 Assert (isIdentChar (c));
259 do
260 {
261 vStringPut (string, c);
262 c = fileGetc ();
263 } while (isIdentChar (c));
264
265 vStringTerminate (string);
266 if (!isspace (c))
267 fileUngetc (c); /* unget non-identifier character */
268 }
269
readToken(tokenInfo * const token)270 static void readToken (tokenInfo *const token)
271 {
272 int c;
273
274 token->type = TOKEN_UNDEFINED;
275 token->keyword = KEYWORD_NONE;
276 vStringClear (token->string);
277
278 getNextChar:
279 do
280 {
281 c = fileGetc ();
282 token->lineNumber = getSourceLineNumber ();
283 token->filePosition = getInputFilePosition ();
284 }
285 while (c == '\t' || c == ' ' || c == '\n');
286
287 switch (c)
288 {
289 case EOF: longjmp (Exception, (int)ExceptionEOF); break;
290 case '(': token->type = TOKEN_OPEN_PAREN; break;
291 case ')': token->type = TOKEN_CLOSE_PAREN; break;
292 case ',': token->type = TOKEN_COMMA; break;
293 case '{': token->type = TOKEN_OPEN_CURLY; break;
294 case '}': token->type = TOKEN_CLOSE_CURLY; break;
295 case '[': token->type = TOKEN_OPEN_SQUARE; break;
296 case ']': token->type = TOKEN_CLOSE_SQUARE; break;
297 case '*': token->type = TOKEN_STAR; break;
298
299 case '\'':
300 case '"':
301 token->type = TOKEN_STRING;
302 parseString (token->string, c);
303 token->lineNumber = getSourceLineNumber ();
304 token->filePosition = getInputFilePosition ();
305 break;
306
307 case '\\':
308 /*
309 * All Tex tags start with a backslash.
310 * Check if the next character is an alpha character
311 * else it is not a potential tex tag.
312 */
313 c = fileGetc ();
314 if (! isalpha (c))
315 fileUngetc (c);
316 else
317 {
318 parseIdentifier (token->string, c);
319 token->lineNumber = getSourceLineNumber ();
320 token->filePosition = getInputFilePosition ();
321 token->keyword = analyzeToken (token->string, Lang_js);
322 if (isKeyword (token, KEYWORD_NONE))
323 token->type = TOKEN_IDENTIFIER;
324 else
325 token->type = TOKEN_KEYWORD;
326 }
327 break;
328
329 case '%':
330 fileSkipToCharacter ('\n'); /* % are single line comments */
331 goto getNextChar;
332 break;
333
334 default:
335 if (! isIdentChar (c))
336 token->type = TOKEN_UNDEFINED;
337 else
338 {
339 parseIdentifier (token->string, c);
340 token->lineNumber = getSourceLineNumber ();
341 token->filePosition = getInputFilePosition ();
342 token->type = TOKEN_IDENTIFIER;
343 }
344 break;
345 }
346 }
347
copyToken(tokenInfo * const dest,tokenInfo * const src)348 static void copyToken (tokenInfo *const dest, tokenInfo *const src)
349 {
350 dest->lineNumber = src->lineNumber;
351 dest->filePosition = src->filePosition;
352 dest->type = src->type;
353 dest->keyword = src->keyword;
354 vStringCopy (dest->string, src->string);
355 vStringCopy (dest->scope, src->scope);
356 }
357
358 /*
359 * Scanning functions
360 */
361
parseTag(tokenInfo * const token,texKind kind)362 static boolean parseTag (tokenInfo *const token, texKind kind)
363 {
364 tokenInfo *const name = newToken ();
365 vString * fullname;
366 boolean useLongName = TRUE;
367
368 fullname = vStringNew ();
369 vStringClear (fullname);
370
371 /*
372 * Tex tags are of these formats:
373 * \keyword{any number of words}
374 * \keyword[short desc]{any number of words}
375 * \keyword*[short desc]{any number of words}
376 *
377 * When a keyword is found, loop through all words within
378 * the curly braces for the tag name.
379 */
380
381 if (isType (token, TOKEN_KEYWORD))
382 {
383 copyToken (name, token);
384 readToken (token);
385 }
386
387 if (isType (token, TOKEN_OPEN_SQUARE))
388 {
389 useLongName = FALSE;
390
391 readToken (token);
392 while (! isType (token, TOKEN_CLOSE_SQUARE) )
393 {
394 if (isType (token, TOKEN_IDENTIFIER))
395 {
396 if (fullname->length > 0)
397 vStringCatS (fullname, " ");
398 vStringCatS (fullname, vStringValue (token->string));
399 }
400 readToken (token);
401 }
402 vStringTerminate (fullname);
403 vStringCopy (name->string, fullname);
404 makeTexTag (name, kind);
405 }
406
407 if (isType (token, TOKEN_STAR))
408 {
409 readToken (token);
410 }
411
412 if (isType (token, TOKEN_OPEN_CURLY))
413 {
414 readToken (token);
415 while (! isType (token, TOKEN_CLOSE_CURLY) )
416 {
417 /* if (isType (token, TOKEN_IDENTIFIER) && useLongName) */
418 if (useLongName)
419 {
420 if (fullname->length > 0)
421 vStringCatS (fullname, " ");
422 vStringCatS (fullname, vStringValue (token->string));
423 }
424 readToken (token);
425 }
426 if (useLongName)
427 {
428 vStringTerminate (fullname);
429 vStringCopy (name->string, fullname);
430 makeTexTag (name, kind);
431 }
432 }
433
434 deleteToken (name);
435 vStringDelete (fullname);
436 return TRUE;
437 }
438
parseTexFile(tokenInfo * const token)439 static void parseTexFile (tokenInfo *const token)
440 {
441 do
442 {
443 readToken (token);
444
445 if (isType (token, TOKEN_KEYWORD))
446 {
447 switch (token->keyword)
448 {
449 case KEYWORD_chapter:
450 parseTag (token, TEXTAG_CHAPTER);
451 break;
452 case KEYWORD_section:
453 parseTag (token, TEXTAG_SECTION);
454 break;
455 case KEYWORD_subsection:
456 parseTag (token, TEXTAG_SUBSUBSECTION);
457 break;
458 case KEYWORD_subsubsection:
459 parseTag (token, TEXTAG_SUBSUBSECTION);
460 break;
461 case KEYWORD_part:
462 parseTag (token, TEXTAG_PART);
463 break;
464 case KEYWORD_paragraph:
465 parseTag (token, TEXTAG_PARAGRAPH);
466 break;
467 case KEYWORD_subparagraph:
468 parseTag (token, TEXTAG_SUBPARAGRAPH);
469 break;
470 case KEYWORD_include:
471 parseTag (token, TEXTAG_INCLUDE);
472 break;
473 default:
474 break;
475 }
476 }
477 } while (TRUE);
478 }
479
initialize(const langType language)480 static void initialize (const langType language)
481 {
482 Assert (sizeof (TexKinds) / sizeof (TexKinds [0]) == TEXTAG_COUNT);
483 Lang_js = language;
484 buildTexKeywordHash ();
485 }
486
findTexTags(void)487 static void findTexTags (void)
488 {
489 tokenInfo *const token = newToken ();
490 exception_t exception;
491
492 exception = (exception_t) (setjmp (Exception));
493 while (exception == ExceptionNone)
494 parseTexFile (token);
495
496 deleteToken (token);
497 }
498
499 /* Create parser definition stucture */
TexParser(void)500 extern parserDefinition* TexParser (void)
501 {
502 static const char *const extensions [] = { "tex", NULL };
503 parserDefinition *const def = parserNew ("Tex");
504 def->extensions = extensions;
505 /*
506 * New definitions for parsing instead of regex
507 */
508 def->kinds = TexKinds;
509 def->kindCount = KIND_COUNT (TexKinds);
510 def->parser = findTexTags;
511 def->initialize = initialize;
512
513 return def;
514 }
515 /* vi:set tabstop=4 shiftwidth=4 noexpandtab: */
516