1 /* mclex.c -- lexer for Windows mc files parser. 2 Copyright 2007 3 Free Software Foundation, Inc. 4 5 Written by Kai Tietz, Onevision. 6 7 This file is part of GNU Binutils. 8 9 This program is free software; you can redistribute it and/or modify 10 it under the terms of the GNU General Public License as published by 11 the Free Software Foundation; either version 3 of the License, or 12 (at your option) any later version. 13 14 This program is distributed in the hope that it will be useful, 15 but WITHOUT ANY WARRANTY; without even the implied warranty of 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 GNU General Public License for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with this program; if not, write to the Free Software 21 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 22 02110-1301, USA. */ 23 24 /* This is a lexer used by the Windows rc file parser. 25 It basically just recognized a bunch of keywords. */ 26 27 #include "sysdep.h" 28 #include "bfd.h" 29 #include "bucomm.h" 30 #include "libiberty.h" 31 #include "safe-ctype.h" 32 #include "windmc.h" 33 #include "mcparse.h" 34 35 #include <assert.h> 36 37 /* Exported globals. */ 38 bfd_boolean mclex_want_nl = FALSE; 39 bfd_boolean mclex_want_line = FALSE; 40 bfd_boolean mclex_want_filename = FALSE; 41 42 /* Local globals. */ 43 static unichar *input_stream = NULL; 44 static unichar *input_stream_pos = NULL; 45 static int input_line = 1; 46 static const char *input_filename = NULL; 47 48 void 49 mc_set_content (const unichar *src) 50 { 51 if (!src) 52 return; 53 input_stream = input_stream_pos = unichar_dup (src); 54 } 55 56 void 57 mc_set_inputfile (const char *name) 58 { 59 if (! name || *name == 0) 60 input_filename = "-"; 61 else 62 { 63 const char *s1 = strrchr (name, '/'); 64 const char *s2 = strrchr (name, '\\'); 65 66 if (! s1) 67 s1 = s2; 68 if (s1 && s2 && s1 < s2) 69 s1 = s2; 70 if (! s1) 71 s1 = name; 72 else 73 s1++; 74 s1 = xstrdup (s1); 75 input_filename = s1; 76 } 77 } 78 79 static void 80 show_msg (const char *kind, const char *msg, va_list argp) 81 { 82 fprintf (stderr, "In %s at line %d: %s: ", input_filename, input_line, kind); 83 vfprintf (stderr, msg, argp); 84 fprintf (stderr, ".\n"); 85 } 86 87 void 88 mc_warn (const char *s, ...) 89 { 90 va_list argp; 91 va_start (argp, s); 92 show_msg ("warning", s, argp); 93 va_end (argp); 94 } 95 96 void 97 mc_fatal (const char *s, ...) 98 { 99 va_list argp; 100 va_start (argp, s); 101 show_msg ("fatal", s, argp); 102 va_end (argp); 103 xexit (1); 104 } 105 106 107 int 108 yyerror (const char *s, ...) 109 { 110 va_list argp; 111 va_start (argp, s); 112 show_msg ("parser", s, argp); 113 va_end (argp); 114 return 1; 115 } 116 117 static unichar * 118 get_diff (unichar *end, unichar *start) 119 { 120 unichar *ret; 121 unichar save = *end; 122 123 *end = 0; 124 ret = unichar_dup (start); 125 *end = save; 126 return ret; 127 } 128 129 static rc_uint_type 130 parse_digit (unichar ch) 131 { 132 rc_uint_type base = 10, v = 0, c; 133 134 if (ch == '0') 135 { 136 base = 8; 137 switch (input_stream_pos[0]) 138 { 139 case 'x': case 'X': base = 16; input_stream_pos++; break; 140 case 'o': case 'O': base = 8; input_stream_pos++; break; 141 case 'b': case 'B': base = 2; input_stream_pos++; break; 142 } 143 } 144 else 145 v = (rc_uint_type) (ch - '0'); 146 147 while ((ch = input_stream_pos[0]) != 0) 148 { 149 if (ch >= 'A' && ch <= 'F') 150 c = (rc_uint_type) (ch - 'A') + 10; 151 else if (ch >= 'a' && ch <= 'f') 152 c = (rc_uint_type) (ch - 'a') + 10; 153 else if (ch >= '0' && ch <= '9') 154 c = (rc_uint_type) (ch - '0'); 155 else 156 break; 157 v *= base; 158 v += c; 159 ++input_stream_pos; 160 } 161 if (input_stream_pos[0] == 'U' || input_stream_pos[0] == 'u') 162 input_stream_pos++; 163 if (input_stream_pos[0] == 'L' || input_stream_pos[0] == 'l') 164 input_stream_pos++; 165 if (input_stream_pos[0] == 'L' || input_stream_pos[0] == 'l') 166 input_stream_pos++; 167 return v; 168 } 169 170 static mc_keyword *keyword_top = NULL; 171 172 const mc_keyword * 173 enum_facility (int e) 174 { 175 mc_keyword *h = keyword_top; 176 177 while (h != NULL) 178 { 179 while (h && strcmp (h->group_name, "facility") != 0) 180 h = h->next; 181 if (e == 0) 182 return h; 183 --e; 184 if (h) 185 h = h->next; 186 } 187 return h; 188 } 189 190 const mc_keyword * 191 enum_severity (int e) 192 { 193 mc_keyword *h = keyword_top; 194 195 while (h != NULL) 196 { 197 while (h && strcmp (h->group_name, "severity") != 0) 198 h = h->next; 199 if (e == 0) 200 return h; 201 --e; 202 if (h) 203 h = h->next; 204 } 205 return h; 206 } 207 208 static void 209 mc_add_keyword_ascii (const char *sz, int rid, const char *grp, rc_uint_type nv, const char *sv) 210 { 211 unichar *usz, *usv = NULL; 212 rc_uint_type usz_len; 213 214 unicode_from_codepage (&usz_len, &usz, sz, CP_ACP); 215 if (sv) 216 unicode_from_codepage (&usz_len, &usv, sv, CP_ACP); 217 mc_add_keyword (usz, rid, grp, nv, usv); 218 } 219 220 void 221 mc_add_keyword (unichar *usz, int rid, const char *grp, rc_uint_type nv, unichar *sv) 222 { 223 mc_keyword *p, *c, *n; 224 size_t len = unichar_len (usz); 225 226 c = keyword_top; 227 p = NULL; 228 while (c != NULL) 229 { 230 if (c->len > len) 231 break; 232 if (c->len == len) 233 { 234 int e = memcmp (usz, c->usz, len * sizeof (unichar)); 235 236 if (e < 0) 237 break; 238 if (! e) 239 { 240 if (! strcmp (grp, "keyword") || strcmp (c->group_name, grp) != 0) 241 fatal (_("Duplicate symbol entered into keyword list.")); 242 c->rid = rid; 243 c->nval = nv; 244 c->sval = (!sv ? NULL : unichar_dup (sv)); 245 if (! strcmp (grp, "language")) 246 { 247 const wind_language_t *lag = wind_find_language_by_id ((unsigned) nv); 248 249 if (lag == NULL) 250 fatal ("Language ident 0x%lx is not resolvable.\n", (long) nv); 251 memcpy (&c->lang_info, lag, sizeof (*lag)); 252 } 253 return; 254 } 255 } 256 c = (p = c)->next; 257 } 258 n = xmalloc (sizeof (mc_keyword)); 259 n->next = c; 260 n->len = len; 261 n->group_name = grp; 262 n->usz = usz; 263 n->rid = rid; 264 n->nval = nv; 265 n->sval = (!sv ? NULL : unichar_dup (sv)); 266 if (! strcmp (grp, "language")) 267 { 268 const wind_language_t *lag = wind_find_language_by_id ((unsigned) nv); 269 if (lag == NULL) 270 fatal ("Language ident 0x%lx is not resolvable.\n", (long) nv); 271 memcpy (&n->lang_info, lag, sizeof (*lag)); 272 } 273 if (! p) 274 keyword_top = n; 275 else 276 p->next = n; 277 } 278 279 static int 280 mc_token (const unichar *t, size_t len) 281 { 282 static int was_init = 0; 283 mc_keyword *k; 284 285 if (! was_init) 286 { 287 was_init = 1; 288 mc_add_keyword_ascii ("OutputBase", MCOUTPUTBASE, "keyword", 0, NULL); 289 mc_add_keyword_ascii ("MessageIdTypedef", MCMESSAGEIDTYPEDEF, "keyword", 0, NULL); 290 mc_add_keyword_ascii ("SeverityNames", MCSEVERITYNAMES, "keyword", 0, NULL); 291 mc_add_keyword_ascii ("FacilityNames", MCFACILITYNAMES, "keyword", 0, NULL); 292 mc_add_keyword_ascii ("LanguageNames", MCLANGUAGENAMES, "keyword", 0, NULL); 293 mc_add_keyword_ascii ("MessageId", MCMESSAGEID, "keyword", 0, NULL); 294 mc_add_keyword_ascii ("Severity", MCSEVERITY, "keyword", 0, NULL); 295 mc_add_keyword_ascii ("Facility", MCFACILITY, "keyword", 0, NULL); 296 mc_add_keyword_ascii ("SymbolicName", MCSYMBOLICNAME, "keyword", 0, NULL); 297 mc_add_keyword_ascii ("Language", MCLANGUAGE, "keyword", 0, NULL); 298 mc_add_keyword_ascii ("Success", MCTOKEN, "severity", 0, NULL); 299 mc_add_keyword_ascii ("Informational", MCTOKEN, "severity", 1, NULL); 300 mc_add_keyword_ascii ("Warning", MCTOKEN, "severity", 2, NULL); 301 mc_add_keyword_ascii ("Error", MCTOKEN, "severity", 3, NULL); 302 mc_add_keyword_ascii ("System", MCTOKEN, "facility", 0xff, NULL); 303 mc_add_keyword_ascii ("Application", MCTOKEN, "facility", 0xfff, NULL); 304 mc_add_keyword_ascii ("English", MCTOKEN, "language", 0x409, "MSG00001"); 305 } 306 k = keyword_top; 307 if (!len || !t || *t == 0) 308 return -1; 309 while (k != NULL) 310 { 311 if (k->len > len) 312 break; 313 if (k->len == len) 314 { 315 if (! memcmp (k->usz, t, len * sizeof (unichar))) 316 { 317 if (k->rid == MCTOKEN) 318 yylval.tok = k; 319 return k->rid; 320 } 321 } 322 k = k->next; 323 } 324 return -1; 325 } 326 327 int 328 yylex (void) 329 { 330 unichar *start_token; 331 unichar ch; 332 333 if (! input_stream_pos) 334 { 335 fatal ("Input stream not setuped.\n"); 336 return -1; 337 } 338 if (mclex_want_line) 339 { 340 start_token = input_stream_pos; 341 if (input_stream_pos[0] == '.' 342 && (input_stream_pos[1] == '\n' 343 || (input_stream_pos[1] == '\r' && input_stream_pos[2] == '\n'))) 344 { 345 mclex_want_line = FALSE; 346 while (input_stream_pos[0] != 0 && input_stream_pos[0] != '\n') 347 ++input_stream_pos; 348 if (input_stream_pos[0] == '\n') 349 ++input_stream_pos; 350 return MCENDLINE; 351 } 352 while (input_stream_pos[0] != 0 && input_stream_pos[0] != '\n') 353 ++input_stream_pos; 354 if (input_stream_pos[0] == '\n') 355 ++input_stream_pos; 356 yylval.ustr = get_diff (input_stream_pos, start_token); 357 return MCLINE; 358 } 359 while ((ch = input_stream_pos[0]) <= 0x20) 360 { 361 if (ch == 0) 362 return -1; 363 ++input_stream_pos; 364 if (ch == '\n') 365 input_line += 1; 366 if (mclex_want_nl && ch == '\n') 367 { 368 mclex_want_nl = FALSE; 369 return NL; 370 } 371 } 372 start_token = input_stream_pos; 373 ++input_stream_pos; 374 if (mclex_want_filename) 375 { 376 mclex_want_filename = FALSE; 377 if (ch == '"') 378 { 379 start_token++; 380 while ((ch = input_stream_pos[0]) != 0) 381 { 382 if (ch == '"') 383 break; 384 ++input_stream_pos; 385 } 386 yylval.ustr = get_diff (input_stream_pos, start_token); 387 if (ch == '"') 388 ++input_stream_pos; 389 } 390 else 391 { 392 while ((ch = input_stream_pos[0]) != 0) 393 { 394 if (ch <= 0x20 || ch == ')') 395 break; 396 ++input_stream_pos; 397 } 398 yylval.ustr = get_diff (input_stream_pos, start_token); 399 } 400 return MCFILENAME; 401 } 402 switch (ch) 403 { 404 case ';': 405 ++start_token; 406 while (input_stream_pos[0] != '\n' && input_stream_pos[0] != 0) 407 ++input_stream_pos; 408 if (input_stream_pos[0] == '\n') 409 input_stream_pos++; 410 yylval.ustr = get_diff (input_stream_pos, start_token); 411 return MCCOMMENT; 412 case '=': 413 return '='; 414 case '(': 415 return '('; 416 case ')': 417 return ')'; 418 case '+': 419 return '+'; 420 case ':': 421 return ':'; 422 case '0': case '1': case '2': case '3': case '4': 423 case '5': case '6': case '7': case '8': case '9': 424 yylval.ival = parse_digit (ch); 425 return MCNUMBER; 426 default: 427 if (ch >= 0x40) 428 { 429 int ret; 430 while (input_stream_pos[0] >= 0x40 || (input_stream_pos[0] >= '0' && input_stream_pos[0] <= '9')) 431 ++input_stream_pos; 432 ret = mc_token (start_token, (size_t) (input_stream_pos - start_token)); 433 if (ret != -1) 434 return ret; 435 yylval.ustr = get_diff (input_stream_pos, start_token); 436 return MCIDENT; 437 } 438 yyerror ("illegal character 0x%x.", ch); 439 } 440 return -1; 441 } 442