1 /* mclex.c -- lexer for Windows mc files parser.
2 Copyright (C) 2007-2022 Free Software Foundation, Inc.
3
4 Written by Kai Tietz, Onevision.
5
6 This file is part of GNU Binutils.
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
21 02110-1301, USA. */
22
23 /* This is a lexer used by the Windows rc file parser.
24 It basically just recognized a bunch of keywords. */
25
26 #include "sysdep.h"
27 #include "bfd.h"
28 #include "bucomm.h"
29 #include "libiberty.h"
30 #include "safe-ctype.h"
31 #include "windmc.h"
32 #include "mcparse.h"
33
34 #include <assert.h>
35
36 /* Exported globals. */
37 bool mclex_want_nl = false;
38 bool mclex_want_line = false;
39 bool mclex_want_filename = false;
40
41 /* Local globals. */
42 static unichar *input_stream = NULL;
43 static unichar *input_stream_pos = NULL;
44 static int input_line = 1;
45 static const char *input_filename = NULL;
46
47 void
mc_set_content(const unichar * src)48 mc_set_content (const unichar *src)
49 {
50 if (!src)
51 return;
52 input_stream = input_stream_pos = unichar_dup (src);
53 }
54
55 void
mc_set_inputfile(const char * name)56 mc_set_inputfile (const char *name)
57 {
58 if (! name || *name == 0)
59 input_filename = "-";
60 else
61 {
62 const char *s1 = strrchr (name, '/');
63 const char *s2 = strrchr (name, '\\');
64
65 if (! s1)
66 s1 = s2;
67 if (s1 && s2 && s1 < s2)
68 s1 = s2;
69 if (! s1)
70 s1 = name;
71 else
72 s1++;
73 s1 = xstrdup (s1);
74 input_filename = s1;
75 }
76 }
77
78 static void
show_msg(const char * kind,const char * msg,va_list argp)79 show_msg (const char *kind, const char *msg, va_list argp)
80 {
81 fprintf (stderr, "In %s at line %d: %s: ", input_filename, input_line, kind);
82 vfprintf (stderr, msg, argp);
83 fprintf (stderr, ".\n");
84 }
85
86 void
mc_warn(const char * s,...)87 mc_warn (const char *s, ...)
88 {
89 va_list argp;
90 va_start (argp, s);
91 show_msg ("warning", s, argp);
92 va_end (argp);
93 }
94
95 void
mc_fatal(const char * s,...)96 mc_fatal (const char *s, ...)
97 {
98 va_list argp;
99 va_start (argp, s);
100 show_msg ("fatal", s, argp);
101 va_end (argp);
102 xexit (1);
103 }
104
105
106 static void
mc_error(const char * s,...)107 mc_error (const char *s, ...)
108 {
109 va_list argp;
110 va_start (argp, s);
111 show_msg ("parser", s, argp);
112 va_end (argp);
113 }
114
115 void
yyerror(const char * s)116 yyerror (const char *s)
117 {
118 mc_error (s);
119 }
120
121 static unichar *
get_diff(unichar * end,unichar * start)122 get_diff (unichar *end, unichar *start)
123 {
124 unichar *ret;
125 unichar save = *end;
126
127 *end = 0;
128 ret = unichar_dup (start);
129 *end = save;
130 return ret;
131 }
132
133 static rc_uint_type
parse_digit(unichar ch)134 parse_digit (unichar ch)
135 {
136 rc_uint_type base = 10, v = 0, c;
137
138 if (ch == '0')
139 {
140 base = 8;
141 switch (input_stream_pos[0])
142 {
143 case 'x': case 'X': base = 16; input_stream_pos++; break;
144 case 'o': case 'O': base = 8; input_stream_pos++; break;
145 case 'b': case 'B': base = 2; input_stream_pos++; break;
146 }
147 }
148 else
149 v = (rc_uint_type) (ch - '0');
150
151 while ((ch = input_stream_pos[0]) != 0)
152 {
153 if (ch >= 'A' && ch <= 'F')
154 c = (rc_uint_type) (ch - 'A') + 10;
155 else if (ch >= 'a' && ch <= 'f')
156 c = (rc_uint_type) (ch - 'a') + 10;
157 else if (ch >= '0' && ch <= '9')
158 c = (rc_uint_type) (ch - '0');
159 else
160 break;
161 v *= base;
162 v += c;
163 ++input_stream_pos;
164 }
165 if (input_stream_pos[0] == 'U' || input_stream_pos[0] == 'u')
166 input_stream_pos++;
167 if (input_stream_pos[0] == 'L' || input_stream_pos[0] == 'l')
168 input_stream_pos++;
169 if (input_stream_pos[0] == 'L' || input_stream_pos[0] == 'l')
170 input_stream_pos++;
171 return v;
172 }
173
174 static mc_keyword *keyword_top = NULL;
175
176 const mc_keyword *
enum_facility(int e)177 enum_facility (int e)
178 {
179 mc_keyword *h = keyword_top;
180
181 while (h != NULL)
182 {
183 while (h && strcmp (h->group_name, "facility") != 0)
184 h = h->next;
185 if (e == 0)
186 return h;
187 --e;
188 if (h)
189 h = h->next;
190 }
191 return h;
192 }
193
194 const mc_keyword *
enum_severity(int e)195 enum_severity (int e)
196 {
197 mc_keyword *h = keyword_top;
198
199 while (h != NULL)
200 {
201 while (h && strcmp (h->group_name, "severity") != 0)
202 h = h->next;
203 if (e == 0)
204 return h;
205 --e;
206 if (h)
207 h = h->next;
208 }
209 return h;
210 }
211
212 static void
mc_add_keyword_ascii(const char * sz,int rid,const char * grp,rc_uint_type nv,const char * sv)213 mc_add_keyword_ascii (const char *sz, int rid, const char *grp, rc_uint_type nv, const char *sv)
214 {
215 unichar *usz = NULL, *usv = NULL;
216 rc_uint_type usz_len;
217
218 unicode_from_codepage (&usz_len, &usz, sz, CP_ACP);
219 if (sv)
220 unicode_from_codepage (&usz_len, &usv, sv, CP_ACP);
221 mc_add_keyword (usz, rid, grp, nv, usv);
222 }
223
224 void
mc_add_keyword(unichar * usz,int rid,const char * grp,rc_uint_type nv,unichar * sv)225 mc_add_keyword (unichar *usz, int rid, const char *grp, rc_uint_type nv, unichar *sv)
226 {
227 mc_keyword *p, *c, *n;
228 size_t len = unichar_len (usz);
229
230 c = keyword_top;
231 p = NULL;
232 while (c != NULL)
233 {
234 if (c->len > len)
235 break;
236 if (c->len == len)
237 {
238 int e = memcmp (usz, c->usz, len * sizeof (unichar));
239
240 if (e < 0)
241 break;
242 if (! e)
243 {
244 if (! strcmp (grp, "keyword") || strcmp (c->group_name, grp) != 0)
245 fatal (_("Duplicate symbol entered into keyword list."));
246 c->rid = rid;
247 c->nval = nv;
248 c->sval = (!sv ? NULL : unichar_dup (sv));
249 if (! strcmp (grp, "language"))
250 {
251 const wind_language_t *lag = wind_find_language_by_id ((unsigned) nv);
252
253 if (lag == NULL)
254 fatal ("Language ident 0x%lx is not resolvable.\n", (long) nv);
255 memcpy (&c->lang_info, lag, sizeof (*lag));
256 }
257 return;
258 }
259 }
260 c = (p = c)->next;
261 }
262 n = xmalloc (sizeof (mc_keyword));
263 n->next = c;
264 n->len = len;
265 n->group_name = grp;
266 n->usz = usz;
267 n->rid = rid;
268 n->nval = nv;
269 n->sval = (!sv ? NULL : unichar_dup (sv));
270 if (! strcmp (grp, "language"))
271 {
272 const wind_language_t *lag = wind_find_language_by_id ((unsigned) nv);
273 if (lag == NULL)
274 fatal ("Language ident 0x%lx is not resolvable.\n", (long) nv);
275 memcpy (&n->lang_info, lag, sizeof (*lag));
276 }
277 if (! p)
278 keyword_top = n;
279 else
280 p->next = n;
281 }
282
283 static int
mc_token(const unichar * t,size_t len)284 mc_token (const unichar *t, size_t len)
285 {
286 static int was_init = 0;
287 mc_keyword *k;
288
289 if (! was_init)
290 {
291 was_init = 1;
292 mc_add_keyword_ascii ("OutputBase", MCOUTPUTBASE, "keyword", 0, NULL);
293 mc_add_keyword_ascii ("MessageIdTypedef", MCMESSAGEIDTYPEDEF, "keyword", 0, NULL);
294 mc_add_keyword_ascii ("SeverityNames", MCSEVERITYNAMES, "keyword", 0, NULL);
295 mc_add_keyword_ascii ("FacilityNames", MCFACILITYNAMES, "keyword", 0, NULL);
296 mc_add_keyword_ascii ("LanguageNames", MCLANGUAGENAMES, "keyword", 0, NULL);
297 mc_add_keyword_ascii ("MessageId", MCMESSAGEID, "keyword", 0, NULL);
298 mc_add_keyword_ascii ("Severity", MCSEVERITY, "keyword", 0, NULL);
299 mc_add_keyword_ascii ("Facility", MCFACILITY, "keyword", 0, NULL);
300 mc_add_keyword_ascii ("SymbolicName", MCSYMBOLICNAME, "keyword", 0, NULL);
301 mc_add_keyword_ascii ("Language", MCLANGUAGE, "keyword", 0, NULL);
302 mc_add_keyword_ascii ("Success", MCTOKEN, "severity", 0, NULL);
303 mc_add_keyword_ascii ("Informational", MCTOKEN, "severity", 1, NULL);
304 mc_add_keyword_ascii ("Warning", MCTOKEN, "severity", 2, NULL);
305 mc_add_keyword_ascii ("Error", MCTOKEN, "severity", 3, NULL);
306 mc_add_keyword_ascii ("System", MCTOKEN, "facility", 0xff, NULL);
307 mc_add_keyword_ascii ("Application", MCTOKEN, "facility", 0xfff, NULL);
308 mc_add_keyword_ascii ("English", MCTOKEN, "language", 0x409, "MSG00001");
309 }
310 k = keyword_top;
311 if (!len || !t || *t == 0)
312 return -1;
313 while (k != NULL)
314 {
315 if (k->len > len)
316 break;
317 if (k->len == len)
318 {
319 if (! memcmp (k->usz, t, len * sizeof (unichar)))
320 {
321 if (k->rid == MCTOKEN)
322 yylval.tok = k;
323 return k->rid;
324 }
325 }
326 k = k->next;
327 }
328 return -1;
329 }
330
331 /* Skip characters in input_stream_pos up to and including a newline
332 character. Returns non-zero if the newline was found, zero otherwise. */
333
334 static int
skip_until_eol(void)335 skip_until_eol (void)
336 {
337 while (input_stream_pos[0] != 0 && input_stream_pos[0] != '\n')
338 ++input_stream_pos;
339 if (input_stream_pos[0] == 0)
340 return 0;
341 if (input_stream_pos[0] == '\n')
342 {
343 ++input_stream_pos;
344 input_line += 1;
345 }
346 return 1;
347 }
348
349 int
yylex(void)350 yylex (void)
351 {
352 unichar *start_token;
353 unichar ch;
354
355 if (! input_stream_pos)
356 {
357 fatal ("Input stream not setuped.\n");
358 return -1;
359 }
360
361 if (mclex_want_line)
362 {
363 start_token = input_stream_pos;
364 if (input_stream_pos[0] == 0)
365 return -1;
366 /* PR 26082: Reject a period followed by EOF. */
367 if (input_stream_pos[0] == '.' && input_stream_pos[1] == 0)
368 return -1;
369 if (input_stream_pos[0] == '.'
370 && (input_stream_pos[1] == '\n'
371 || (input_stream_pos[1] == '\r' && input_stream_pos[2] == '\n')))
372 {
373 mclex_want_line = false;
374 return skip_until_eol () ? MCENDLINE : -1;
375 }
376 if (!skip_until_eol ())
377 return -1;
378 yylval.ustr = get_diff (input_stream_pos, start_token);
379 return MCLINE;
380 }
381
382 while ((ch = input_stream_pos[0]) <= 0x20)
383 {
384 if (ch == 0)
385 return -1;
386 ++input_stream_pos;
387 if (ch == '\n')
388 input_line += 1;
389 if (mclex_want_nl && ch == '\n')
390 {
391 mclex_want_nl = false;
392 return NL;
393 }
394 }
395 start_token = input_stream_pos;
396 ++input_stream_pos;
397 if (mclex_want_filename)
398 {
399 mclex_want_filename = false;
400 if (ch == '"')
401 {
402 start_token++;
403 while ((ch = input_stream_pos[0]) != 0)
404 {
405 if (ch == '"')
406 break;
407 ++input_stream_pos;
408 }
409 yylval.ustr = get_diff (input_stream_pos, start_token);
410 if (ch == '"')
411 ++input_stream_pos;
412 }
413 else
414 {
415 while ((ch = input_stream_pos[0]) != 0)
416 {
417 if (ch <= 0x20 || ch == ')')
418 break;
419 ++input_stream_pos;
420 }
421 yylval.ustr = get_diff (input_stream_pos, start_token);
422 }
423 return MCFILENAME;
424 }
425 switch (ch)
426 {
427 case ';':
428 ++start_token;
429 if (!skip_until_eol ())
430 return -1;
431 yylval.ustr = get_diff (input_stream_pos, start_token);
432 return MCCOMMENT;
433 case '=':
434 return '=';
435 case '(':
436 return '(';
437 case ')':
438 return ')';
439 case '+':
440 return '+';
441 case ':':
442 return ':';
443 case '0': case '1': case '2': case '3': case '4':
444 case '5': case '6': case '7': case '8': case '9':
445 yylval.ival = parse_digit (ch);
446 return MCNUMBER;
447 default:
448 if (ch >= 0x40)
449 {
450 int ret;
451 while (input_stream_pos[0] >= 0x40 || (input_stream_pos[0] >= '0' && input_stream_pos[0] <= '9'))
452 ++input_stream_pos;
453 ret = mc_token (start_token, (size_t) (input_stream_pos - start_token));
454 if (ret != -1)
455 return ret;
456 yylval.ustr = get_diff (input_stream_pos, start_token);
457 return MCIDENT;
458 }
459 mc_error ("illegal character 0x%x.", ch);
460 }
461 return -1;
462 }
463