1 /*
2 * Copyright (c) 2000-2001, Jérôme Plût
3 * Copyright (c) 2006, Enrico Tröger
4 * Copyright (c) 2019, Mirco Schönfeld
5 *
6 * This source code is released for free distribution under the terms of the
7 * GNU General Public License.
8 *
9 * This module contains functions for generating tags for source files
10 * for the BibTex formatting system.
11 * https://en.wikipedia.org/wiki/BibTeX
12 */
13
14 /*
15 * INCLUDE FILES
16 */
17 #include "general.h" /* must always come first */
18 #include <ctype.h> /* to define isalpha () */
19 #include <string.h>
20
21 #include "debug.h"
22 #include "entry.h"
23 #include "keyword.h"
24 #include "parse.h"
25 #include "read.h"
26 #include "routines.h"
27 #include "vstring.h"
28
29 /*
30 * MACROS
31 */
32 #define isType(token,t) (bool) ((token)->type == (t))
33 #define isKeyword(token,k) (bool) ((token)->keyword == (k))
34 #define isIdentChar(c) \
35 (isalpha (c) || isdigit (c) || (c) == '_' || (c) == '-' || (c) == '+')
36
37 /*
38 * DATA DECLARATIONS
39 */
40
41 /*
42 * Used to specify type of keyword.
43 */
44 enum eKeywordId {
45 KEYWORD_article,
46 KEYWORD_book,
47 KEYWORD_booklet,
48 KEYWORD_conference,
49 KEYWORD_inbook,
50 KEYWORD_incollection,
51 KEYWORD_inproceedings,
52 KEYWORD_manual,
53 KEYWORD_mastersthesis,
54 KEYWORD_misc,
55 KEYWORD_phdthesis,
56 KEYWORD_proceedings,
57 KEYWORD_string,
58 KEYWORD_techreport,
59 KEYWORD_unpublished
60 };
61 typedef int keywordId; /* to allow KEYWORD_NONE */
62
63 enum eTokenType {
64 /* 0..255 are the byte's value. Some are named for convenience */
65 TOKEN_OPEN_CURLY = '{',
66 /* above is special types */
67 TOKEN_UNDEFINED = 256,
68 TOKEN_KEYWORD,
69 TOKEN_IDENTIFIER
70 };
71 typedef int tokenType;
72
73 typedef struct sTokenInfo {
74 tokenType type;
75 keywordId keyword;
76 vString * string;
77 unsigned long lineNumber;
78 MIOPos filePosition;
79 } tokenInfo;
80
81 /*
82 * DATA DEFINITIONS
83 */
84
85 static langType Lang_bib;
86
87 typedef enum {
88 BIBTAG_ARTICLE,
89 BIBTAG_BOOK,
90 BIBTAG_BOOKLET,
91 BIBTAG_CONFERENCE,
92 BIBTAG_INBOOK,
93 BIBTAG_INCOLLECTION,
94 BIBTAG_INPROCEEDINGS,
95 BIBTAG_MANUAL,
96 BIBTAG_MASTERSTHESIS,
97 BIBTAG_MISC,
98 BIBTAG_PHDTHESIS,
99 BIBTAG_PROCEEDINGS,
100 BIBTAG_STRING,
101 BIBTAG_TECHREPORT,
102 BIBTAG_UNPUBLISHED,
103 BIBTAG_COUNT
104 } bibKind;
105
106 static kindDefinition BibKinds [] = {
107 { true, 'a', "article", "article" },
108 { true, 'b', "book", "book" },
109 { true, 'B', "booklet", "booklet" },
110 { true, 'c', "conference", "conference" },
111 { true, 'i', "inbook", "inbook" },
112 { true, 'I', "incollection", "incollection" },
113 { true, 'j', "inproceedings", "inproceedings" },
114 { true, 'm', "manual", "manual" },
115 { true, 'M', "mastersthesis", "mastersthesis" },
116 { true, 'n', "misc", "misc" },
117 { true, 'p', "phdthesis", "phdthesis" },
118 { true, 'P', "proceedings", "proceedings" },
119 { true, 's', "string", "string" },
120 { true, 't', "techreport", "techreport" },
121 { true, 'u', "unpublished", "unpublished" }
122 };
123
124 static const keywordTable BibKeywordTable [] = {
125 /* keyword keyword ID */
126 { "article", KEYWORD_article },
127 { "book", KEYWORD_book },
128 { "booklet", KEYWORD_booklet },
129 { "conference", KEYWORD_conference },
130 { "inbook", KEYWORD_inbook },
131 { "incollection", KEYWORD_incollection },
132 { "inproceedings",KEYWORD_inproceedings },
133 { "manual", KEYWORD_manual },
134 { "mastersthesis",KEYWORD_mastersthesis },
135 { "misc", KEYWORD_misc },
136 { "phdthesis", KEYWORD_phdthesis },
137 { "proceedings", KEYWORD_proceedings },
138 { "string", KEYWORD_string },
139 { "techreport", KEYWORD_techreport },
140 { "unpublished", KEYWORD_unpublished }
141 };
142
143 /*
144 * FUNCTION DEFINITIONS
145 */
146
newToken(void)147 static tokenInfo *newToken (void)
148 {
149 tokenInfo *const token = xMalloc (1, tokenInfo);
150
151 token->type = TOKEN_UNDEFINED;
152 token->keyword = KEYWORD_NONE;
153 token->string = vStringNew ();
154 token->lineNumber = getInputLineNumber ();
155 token->filePosition = getInputFilePosition ();
156
157 return token;
158 }
159
deleteToken(tokenInfo * const token)160 static void deleteToken (tokenInfo *const token)
161 {
162 vStringDelete (token->string);
163 eFree (token);
164 }
165
166 /*
167 * Tag generation functions
168 */
makeBibTag(tokenInfo * const token,bibKind kind)169 static void makeBibTag (tokenInfo *const token, bibKind kind)
170 {
171 if (BibKinds [kind].enabled)
172 {
173 const char *const name = vStringValue (token->string);
174 tagEntryInfo e;
175 initTagEntry (&e, name, kind);
176
177 e.lineNumber = token->lineNumber;
178 e.filePosition = token->filePosition;
179
180 makeTagEntry (&e);
181 }
182 }
183
184 /*
185 * Parsing functions
186 */
187
188 /*
189 * Read a C identifier beginning with "firstChar" and places it into
190 * "name".
191 */
parseIdentifier(vString * const string,const int firstChar)192 static void parseIdentifier (vString *const string, const int firstChar)
193 {
194 int c = firstChar;
195 Assert (isIdentChar (c));
196 do
197 {
198 vStringPut (string, c);
199 c = getcFromInputFile ();
200 } while (c != EOF && isIdentChar (c));
201 if (c != EOF)
202 ungetcToInputFile (c); /* unget non-identifier character */
203 }
204
readToken(tokenInfo * const token)205 static bool readToken (tokenInfo *const token)
206 {
207 int c;
208
209 token->type = TOKEN_UNDEFINED;
210 token->keyword = KEYWORD_NONE;
211 vStringClear (token->string);
212
213 getNextChar:
214
215 do
216 {
217 c = getcFromInputFile ();
218 }
219 while (c == '\t' || c == ' ' || c == '\n');
220
221 token->lineNumber = getInputLineNumber ();
222 token->filePosition = getInputFilePosition ();
223
224 token->type = (unsigned char) c;
225 switch (c)
226 {
227 case EOF: return false;
228
229 case '@':
230 /*
231 * All Bib entries start with an at symbol.
232 * Check if the next character is an alpha character
233 * else it is not a potential tex tag.
234 */
235 c = getcFromInputFile ();
236 if (! isalpha (c))
237 ungetcToInputFile (c);
238 else
239 {
240 vStringPut (token->string, '@');
241 parseIdentifier (token->string, c);
242 token->keyword = lookupCaseKeyword (vStringValue (token->string) + 1, Lang_bib);
243 if (isKeyword (token, KEYWORD_NONE))
244 token->type = TOKEN_IDENTIFIER;
245 else
246 token->type = TOKEN_KEYWORD;
247 }
248 break;
249 case '%':
250 skipToCharacterInInputFile ('\n'); /* % are single line comments */
251 goto getNextChar;
252 break;
253 default:
254 if (isIdentChar (c))
255 {
256 parseIdentifier (token->string, c);
257 token->type = TOKEN_IDENTIFIER;
258 }
259 break;
260 }
261 return true;
262 }
263
copyToken(tokenInfo * const dest,tokenInfo * const src)264 static void copyToken (tokenInfo *const dest, tokenInfo *const src)
265 {
266 dest->lineNumber = src->lineNumber;
267 dest->filePosition = src->filePosition;
268 dest->type = src->type;
269 dest->keyword = src->keyword;
270 vStringCopy (dest->string, src->string);
271 }
272
273 /*
274 * Scanning functions
275 */
276
parseTag(tokenInfo * const token,bibKind kind)277 static bool parseTag (tokenInfo *const token, bibKind kind)
278 {
279 tokenInfo * const name = newToken ();
280 vString * currentid;
281 bool eof = false;
282
283 currentid = vStringNew ();
284 /*
285 * Bib entries are of these formats:
286 * @article{identifier,
287 * author="John Doe"}
288 *
289 * When a keyword is found, loop through all words up to
290 * a comma brace for the tag name.
291 *
292 */
293 if (isType (token, TOKEN_KEYWORD))
294 {
295 copyToken (name, token);
296 if (!readToken (token))
297 {
298 eof = true;
299 goto out;
300 }
301 }
302
303 if (isType (token, TOKEN_OPEN_CURLY))
304 {
305 if (!readToken (token))
306 {
307 eof = true;
308 goto out;
309 }
310 if (isType (token, TOKEN_IDENTIFIER)){
311 vStringCat (currentid, token->string);
312 vStringStripTrailing (currentid);
313 if (vStringLength (currentid) > 0)
314 {
315 vStringCopy (name->string, currentid);
316 makeBibTag (name, kind);
317 }
318 }
319 else
320 { // should find an identifier for bib item at first place
321 eof = true;
322 goto out;
323 }
324 }
325
326
327 out:
328 deleteToken (name);
329 vStringDelete (currentid);
330 return eof;
331 }
332
parseBibFile(tokenInfo * const token)333 static void parseBibFile (tokenInfo *const token)
334 {
335 bool eof = false;
336
337 do
338 {
339 if (!readToken (token))
340 break;
341
342 if (isType (token, TOKEN_KEYWORD))
343 {
344 switch (token->keyword)
345 {
346 case KEYWORD_article:
347 eof = parseTag (token, BIBTAG_ARTICLE);
348 break;
349 case KEYWORD_book:
350 eof = parseTag (token, BIBTAG_BOOK);
351 break;
352 case KEYWORD_booklet:
353 eof = parseTag (token, BIBTAG_BOOKLET);
354 break;
355 case KEYWORD_conference:
356 eof = parseTag (token, BIBTAG_CONFERENCE);
357 break;
358 case KEYWORD_inbook:
359 eof = parseTag (token, BIBTAG_INBOOK);
360 break;
361 case KEYWORD_incollection:
362 eof = parseTag (token, BIBTAG_INCOLLECTION);
363 break;
364 case KEYWORD_inproceedings:
365 eof = parseTag (token, BIBTAG_INPROCEEDINGS);
366 break;
367 case KEYWORD_manual:
368 eof = parseTag (token, BIBTAG_MANUAL);
369 break;
370 case KEYWORD_mastersthesis:
371 eof = parseTag (token, BIBTAG_MASTERSTHESIS);
372 break;
373 case KEYWORD_misc:
374 eof = parseTag (token, BIBTAG_MISC);
375 break;
376 case KEYWORD_phdthesis:
377 eof = parseTag (token, BIBTAG_PHDTHESIS);
378 break;
379 case KEYWORD_proceedings:
380 eof = parseTag (token, BIBTAG_PROCEEDINGS);
381 break;
382 case KEYWORD_string:
383 eof = parseTag (token, BIBTAG_STRING);
384 break;
385 case KEYWORD_techreport:
386 eof = parseTag (token, BIBTAG_TECHREPORT);
387 break;
388 case KEYWORD_unpublished:
389 eof = parseTag (token, BIBTAG_UNPUBLISHED);
390 break;
391 default:
392 break;
393 }
394 }
395 if (eof)
396 break;
397 } while (true);
398 }
399
initialize(const langType language)400 static void initialize (const langType language)
401 {
402 Lang_bib = language;
403 }
404
findBibTags(void)405 static void findBibTags (void)
406 {
407 tokenInfo *const token = newToken ();
408
409 parseBibFile (token);
410
411 deleteToken (token);
412 }
413
414 /* Create parser definition structure */
BibtexParser(void)415 extern parserDefinition* BibtexParser (void)
416 {
417 Assert (ARRAY_SIZE (BibKinds) == BIBTAG_COUNT);
418 static const char *const extensions [] = { "bib", NULL };
419 parserDefinition *const def = parserNew ("BibTeX");
420 def->extensions = extensions;
421 /*
422 * New definitions for parsing instead of regex
423 */
424 def->kindTable = BibKinds;
425 def->kindCount = ARRAY_SIZE (BibKinds);
426 def->parser = findBibTags;
427 def->initialize = initialize;
428 def->keywordTable = BibKeywordTable;
429 def->keywordCount = ARRAY_SIZE (BibKeywordTable);
430 return def;
431 }
432