1 /* Symbol table manager for Bison.
2 
3    Copyright (C) 1984, 1989, 2000-2002, 2004-2015, 2018-2021 Free
4    Software Foundation, Inc.
5 
6    This file is part of Bison, the GNU Compiler Compiler.
7 
8    This program is free software: you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation, either version 3 of the License, or
11    (at your option) any later version.
12 
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17 
18    You should have received a copy of the GNU General Public License
19    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
20 
21 #include <config.h>
22 #include "symtab.h"
23 
24 #include "system.h"
25 
26 #include <assure.h>
27 #include <fstrcmp.h>
28 #include <hash.h>
29 #include <quote.h>
30 
31 #include "complain.h"
32 #include "getargs.h"
33 #include "gram.h"
34 #include "intprops.h"
35 
36 /** Undefined token code.  */
37 #define CODE_UNDEFINED (-1)
38 
39 /* Undefined symbol number.  */
40 #define NUMBER_UNDEFINED (-1)
41 
42 
43 static struct hash_table *symbol_table = NULL;
44 static struct hash_table *semantic_type_table = NULL;
45 
46 /*----------------------------------------------------------------.
47 | Symbols sorted by tag.  Allocated by table_sort, after which no |
48 | more symbols should be created.                                 |
49 `----------------------------------------------------------------*/
50 
51 static symbol **symbols_sorted = NULL;
52 static semantic_type **semantic_types_sorted = NULL;
53 
54 
55 /*------------------------.
56 | Distinguished symbols.  |
57 `------------------------*/
58 
59 symbol *errtoken = NULL;
60 symbol *undeftoken = NULL;
61 symbol *eoftoken = NULL;
62 symbol *acceptsymbol = NULL;
63 symbol *startsymbol = NULL;
64 location startsymbol_loc;
65 
66 /* Precedence relation graph. */
67 static symgraph **prec_nodes;
68 
69 /* Store which associativity is used.  */
70 static bool *used_assoc = NULL;
71 
72 bool tag_seen = false;
73 
74 
75 /* Whether SYM was defined by the user.  */
76 
77 static bool
symbol_is_user_defined(symbol * sym)78 symbol_is_user_defined (symbol *sym)
79 {
80   const bool eof_is_user_defined
81     = !eoftoken->alias || STRNEQ (eoftoken->alias->tag, "$end");
82   return sym->tag[0] != '$'
83     && (eof_is_user_defined || (sym != eoftoken && sym->alias != errtoken))
84     && sym != errtoken && sym->alias != errtoken
85     && sym != undeftoken && sym->alias != undeftoken;
86 }
87 
88 
89 /*--------------------------.
90 | Create a new sym_content. |
91 `--------------------------*/
92 
93 static sym_content *
sym_content_new(symbol * s)94 sym_content_new (symbol *s)
95 {
96   sym_content *res = xmalloc (sizeof *res);
97 
98   res->symbol = s;
99 
100   res->type_name = NULL;
101   res->type_loc = empty_loc;
102   for (int i = 0; i < CODE_PROPS_SIZE; ++i)
103     code_props_none_init (&res->props[i]);
104 
105   res->number = NUMBER_UNDEFINED;
106   res->prec_loc = empty_loc;
107   res->prec = 0;
108   res->assoc = undef_assoc;
109   res->code = CODE_UNDEFINED;
110 
111   res->class = unknown_sym;
112   res->status = undeclared;
113 
114   return res;
115 }
116 
117 /*---------------------------------.
118 | Create a new symbol, named TAG.  |
119 `---------------------------------*/
120 
121 static symbol *
symbol_new(uniqstr tag,location loc)122 symbol_new (uniqstr tag, location loc)
123 {
124   symbol *res = xmalloc (sizeof *res);
125   uniqstr_assert (tag);
126 
127   /* If the tag is not a string (starts with a double quote), check
128      that it is valid for Yacc. */
129   if (tag[0] != '\"' && tag[0] != '\'' && strchr (tag, '-'))
130     complain (&loc, Wyacc,
131               _("POSIX Yacc forbids dashes in symbol names: %s"), tag);
132 
133   res->tag = tag;
134   res->location = loc;
135   res->translatable = false;
136   res->location_of_lhs = false;
137   res->alias = NULL;
138   res->content = sym_content_new (res);
139   res->is_alias = false;
140   return res;
141 }
142 
143 /*--------------------.
144 | Free a sym_content. |
145 `--------------------*/
146 
147 static void
sym_content_free(sym_content * sym)148 sym_content_free (sym_content *sym)
149 {
150   free (sym);
151 }
152 
153 
154 /*---------------------------------------------------------.
155 | Free a symbol and its associated content if appropriate. |
156 `---------------------------------------------------------*/
157 
158 static void
symbol_free(void * ptr)159 symbol_free (void *ptr)
160 {
161   symbol *sym = (symbol *)ptr;
162   if (!sym->is_alias)
163     sym_content_free (sym->content);
164   free (sym);
165 
166 }
167 
168 /* If needed, swap first and second so that first has the earliest
169    location (according to location_cmp).
170 
171    Many symbol features (e.g., token codes) are not assigned during
172    parsing, but in a second step, via a traversal of the symbol table
173    sorted on tag.
174 
175    However, error messages make more sense if we keep the first
176    declaration first.
177 */
178 
179 static void
symbols_sort(const symbol ** first,const symbol ** second)180 symbols_sort (const symbol **first, const symbol **second)
181 {
182   if (0 < location_cmp ((*first)->location, (*second)->location))
183     {
184       const symbol* tmp = *first;
185       *first = *second;
186       *second = tmp;
187     }
188 }
189 
190 /* Likewise, for locations.  */
191 
192 static void
locations_sort(location * first,location * second)193 locations_sort (location *first, location *second)
194 {
195   if (0 < location_cmp (*first, *second))
196     {
197       location tmp = *first;
198       *first = *second;
199       *second = tmp;
200     }
201 }
202 
203 char const *
code_props_type_string(code_props_type kind)204 code_props_type_string (code_props_type kind)
205 {
206   switch (kind)
207     {
208     case destructor:
209       return "%destructor";
210     case printer:
211       return "%printer";
212     }
213   abort ();
214 }
215 
216 
217 /*----------------------------------------.
218 | Create a new semantic type, named TAG.  |
219 `----------------------------------------*/
220 
221 static semantic_type *
semantic_type_new(uniqstr tag,const location * loc)222 semantic_type_new (uniqstr tag, const location *loc)
223 {
224   semantic_type *res = xmalloc (sizeof *res);
225 
226   uniqstr_assert (tag);
227   res->tag = tag;
228   res->location = loc ? *loc : empty_loc;
229   res->status = undeclared;
230   for (int i = 0; i < CODE_PROPS_SIZE; ++i)
231     code_props_none_init (&res->props[i]);
232 
233   return res;
234 }
235 
236 
237 /*-----------------.
238 | Print a symbol.  |
239 `-----------------*/
240 
241 #define SYMBOL_INT_ATTR_PRINT(Attr)                     \
242   if (s->content)                                       \
243     fprintf (f, " %s = %d", #Attr, s->content->Attr)
244 
245 #define SYMBOL_STR_ATTR_PRINT(Attr)                     \
246   if (s->content && s->content->Attr)                   \
247     fprintf (f, " %s { %s }", #Attr, s->content->Attr)
248 
249 #define SYMBOL_CODE_PRINT(Attr)                                         \
250   if (s->content && s->content->props[Attr].code)                       \
251     fprintf (f, " %s { %s }", #Attr, s->content->props[Attr].code)
252 
253 void
symbol_print(symbol const * s,FILE * f)254 symbol_print (symbol const *s, FILE *f)
255 {
256   if (s)
257     {
258       symbol_class c = s->content->class;
259       fprintf (f, "%s: %s",
260                c == unknown_sym    ? "unknown"
261                : c == pct_type_sym ? "%type"
262                : c == token_sym    ? "token"
263                : c == nterm_sym    ? "nterm"
264                : NULL, /* abort.  */
265                s->tag);
266       putc (' ', f);
267       location_print (s->location, f);
268       SYMBOL_INT_ATTR_PRINT (code);
269       SYMBOL_INT_ATTR_PRINT (number);
270       SYMBOL_STR_ATTR_PRINT (type_name);
271       SYMBOL_CODE_PRINT (destructor);
272       SYMBOL_CODE_PRINT (printer);
273     }
274   else
275     fputs ("<NULL>", f);
276 }
277 
278 #undef SYMBOL_ATTR_PRINT
279 #undef SYMBOL_CODE_PRINT
280 
281 
282 /*----------------------------------.
283 | Whether S is a valid identifier.  |
284 `----------------------------------*/
285 
286 static bool
is_identifier(uniqstr s)287 is_identifier (uniqstr s)
288 {
289   static char const alphanum[26 + 26 + 1 + 10] =
290     "abcdefghijklmnopqrstuvwxyz"
291     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
292     "_"
293     "0123456789";
294   if (!s || ! memchr (alphanum, *s, sizeof alphanum - 10))
295     return false;
296   for (++s; *s; ++s)
297     if (! memchr (alphanum, *s, sizeof alphanum))
298       return false;
299   return true;
300 }
301 
302 
303 /*-----------------------------------------------.
304 | Get the identifier associated to this symbol.  |
305 `-----------------------------------------------*/
306 
307 uniqstr
symbol_id_get(symbol const * sym)308 symbol_id_get (symbol const *sym)
309 {
310   if (sym->alias)
311     sym = sym->alias;
312   return is_identifier (sym->tag) ? sym->tag : 0;
313 }
314 
315 
316 /*------------------------------------------------------------------.
317 | Complain that S's WHAT is redeclared at SECOND, and was first set |
318 | at FIRST.                                                         |
319 `------------------------------------------------------------------*/
320 
321 static void
complain_symbol_redeclared(symbol * s,const char * what,location first,location second)322 complain_symbol_redeclared (symbol *s, const char *what, location first,
323                             location second)
324 {
325   locations_sort (&first, &second);
326   complain (&second, complaint, _("%s redeclaration for %s"), what, s->tag);
327   subcomplain (&first, complaint, _("previous declaration"));
328 }
329 
330 static void
complain_semantic_type_redeclared(semantic_type * s,const char * what,location first,location second)331 complain_semantic_type_redeclared (semantic_type *s, const char *what, location first,
332                                    location second)
333 {
334   locations_sort (&first, &second);
335   complain (&second, complaint, _("%s redeclaration for <%s>"), what, s->tag);
336   subcomplain (&first, complaint, _("previous declaration"));
337 }
338 
339 static void
complain_class_redeclared(symbol * sym,symbol_class class,location second)340 complain_class_redeclared (symbol *sym, symbol_class class, location second)
341 {
342   complain (&second, complaint,
343             class == token_sym
344             ? _("symbol %s redeclared as a token")
345             : _("symbol %s redeclared as a nonterminal"), sym->tag);
346   if (!location_empty (sym->location))
347     subcomplain (&sym->location, complaint, _("previous definition"));
348 }
349 
350 static const symbol *
symbol_from_uniqstr_fuzzy(const uniqstr key)351 symbol_from_uniqstr_fuzzy (const uniqstr key)
352 {
353   aver (symbols_sorted);
354 #define FSTRCMP_THRESHOLD 0.6
355   double best_similarity = FSTRCMP_THRESHOLD;
356   const symbol *res = NULL;
357   size_t count = hash_get_n_entries (symbol_table);
358   for (int i = 0; i < count; ++i)
359     {
360       symbol *sym = symbols_sorted[i];
361       if (STRNEQ (key, sym->tag)
362           && (sym->content->status == declared
363               || sym->content->status == undeclared))
364         {
365           double similarity = fstrcmp_bounded (key, sym->tag, best_similarity);
366           if (best_similarity < similarity)
367             {
368               res = sym;
369               best_similarity = similarity;
370             }
371         }
372     }
373   return res;
374 }
375 
376 static void
complain_symbol_undeclared(const symbol * sym)377 complain_symbol_undeclared (const symbol *sym)
378 {
379   assert (sym->content->status != declared);
380   const symbol *best = symbol_from_uniqstr_fuzzy (sym->tag);
381   if (best)
382     {
383       complain (&sym->location,
384                 sym->content->status == needed ? complaint : Wother,
385                 _("symbol %s is used, but is not defined as a token"
386                   " and has no rules; did you mean %s?"),
387                 quote_n (0, sym->tag),
388                 quote_n (1, best->tag));
389       if (feature_flag & feature_caret)
390         location_caret_suggestion (sym->location, best->tag, stderr);
391     }
392   else
393     complain (&sym->location,
394               sym->content->status == needed ? complaint : Wother,
395               _("symbol %s is used, but is not defined as a token"
396                 " and has no rules"),
397               quote (sym->tag));
398 }
399 
400 void
symbol_location_as_lhs_set(symbol * sym,location loc)401 symbol_location_as_lhs_set (symbol *sym, location loc)
402 {
403   if (!sym->location_of_lhs)
404     {
405       sym->location = loc;
406       sym->location_of_lhs = true;
407     }
408 }
409 
410 
411 /*-----------------------------------------------------------------.
412 | Set the TYPE_NAME associated with SYM.  Does nothing if passed 0 |
413 | as TYPE_NAME.                                                    |
414 `-----------------------------------------------------------------*/
415 
416 void
symbol_type_set(symbol * sym,uniqstr type_name,location loc)417 symbol_type_set (symbol *sym, uniqstr type_name, location loc)
418 {
419   if (type_name)
420     {
421       tag_seen = true;
422       if (sym->content->type_name)
423         complain_symbol_redeclared (sym, "%type",
424                                     sym->content->type_loc, loc);
425       else
426         {
427           uniqstr_assert (type_name);
428           sym->content->type_name = type_name;
429           sym->content->type_loc = loc;
430         }
431     }
432 }
433 
434 /*--------------------------------------------------------.
435 | Set the DESTRUCTOR or PRINTER associated with the SYM.  |
436 `--------------------------------------------------------*/
437 
438 void
symbol_code_props_set(symbol * sym,code_props_type kind,code_props const * code)439 symbol_code_props_set (symbol *sym, code_props_type kind,
440                        code_props const *code)
441 {
442   if (sym->content->props[kind].code)
443     complain_symbol_redeclared (sym, code_props_type_string (kind),
444                                 sym->content->props[kind].location,
445                                 code->location);
446   else
447     sym->content->props[kind] = *code;
448 }
449 
450 
451 /*-----------------------------------------------------.
452 | Set the DESTRUCTOR or PRINTER associated with TYPE.  |
453 `-----------------------------------------------------*/
454 
455 void
semantic_type_code_props_set(semantic_type * type,code_props_type kind,code_props const * code)456 semantic_type_code_props_set (semantic_type *type,
457                               code_props_type kind,
458                               code_props const *code)
459 {
460   if (type->props[kind].code)
461     complain_semantic_type_redeclared (type, code_props_type_string (kind),
462                                        type->props[kind].location,
463                                        code->location);
464   else
465     type->props[kind] = *code;
466 }
467 
468 /*---------------------------------------------------.
469 | Get the computed %destructor or %printer for SYM.  |
470 `---------------------------------------------------*/
471 
472 code_props *
symbol_code_props_get(symbol * sym,code_props_type kind)473 symbol_code_props_get (symbol *sym, code_props_type kind)
474 {
475   /* Per-symbol code props.  */
476   if (sym->content->props[kind].code)
477     return &sym->content->props[kind];
478 
479   /* Per-type code props.  */
480   if (sym->content->type_name)
481     {
482       code_props *code =
483         &semantic_type_get (sym->content->type_name, NULL)->props[kind];
484       if (code->code)
485         return code;
486     }
487 
488   /* Apply default code props's only to user-defined symbols.  */
489   if (symbol_is_user_defined (sym))
490     {
491       code_props *code = &semantic_type_get (sym->content->type_name ? "*" : "",
492                                              NULL)->props[kind];
493       if (code->code)
494         return code;
495     }
496   return &code_props_none;
497 }
498 
499 /*-----------------------------------------------------------------.
500 | Set the PRECEDENCE associated with SYM.  Does nothing if invoked |
501 | with UNDEF_ASSOC as ASSOC.                                       |
502 `-----------------------------------------------------------------*/
503 
504 void
symbol_precedence_set(symbol * sym,int prec,assoc a,location loc)505 symbol_precedence_set (symbol *sym, int prec, assoc a, location loc)
506 {
507   if (a != undef_assoc)
508     {
509       sym_content *s = sym->content;
510       if (s->prec)
511         complain_symbol_redeclared (sym, assoc_to_string (a),
512                                     s->prec_loc, loc);
513       else
514         {
515           s->prec = prec;
516           s->assoc = a;
517           s->prec_loc = loc;
518         }
519     }
520 
521   /* Only terminals have a precedence. */
522   symbol_class_set (sym, token_sym, loc, false);
523 }
524 
525 
526 /*------------------------------------.
527 | Set the CLASS associated with SYM.  |
528 `------------------------------------*/
529 
530 static void
complain_pct_type_on_token(location * loc)531 complain_pct_type_on_token (location *loc)
532 {
533   complain (loc, Wyacc,
534             _("POSIX yacc reserves %%type to nonterminals"));
535 }
536 
537 void
symbol_class_set(symbol * sym,symbol_class class,location loc,bool declaring)538 symbol_class_set (symbol *sym, symbol_class class, location loc, bool declaring)
539 {
540   aver (class != unknown_sym);
541   sym_content *s = sym->content;
542   if (class == pct_type_sym)
543     {
544       if (s->class == token_sym)
545         complain_pct_type_on_token (&loc);
546       else if (s->class == unknown_sym)
547         s->class = class;
548     }
549   else if (s->class != unknown_sym && s->class != pct_type_sym
550            && s->class != class)
551     complain_class_redeclared (sym, class, loc);
552   else
553     {
554       if (class == token_sym && s->class == pct_type_sym)
555         complain_pct_type_on_token (&sym->location);
556 
557       s->class = class;
558 
559       if (declaring)
560         {
561           if (s->status == declared)
562             {
563               complain (&loc, Wother,
564                         _("symbol %s redeclared"), sym->tag);
565               subcomplain (&sym->location, Wother,
566                            _("previous declaration"));
567             }
568           else
569             {
570               sym->location = loc;
571               s->status = declared;
572             }
573         }
574     }
575 }
576 
577 
578 /*----------------------------.
579 | Set the token code of SYM.  |
580 `----------------------------*/
581 
582 void
symbol_code_set(symbol * sym,int code,location loc)583 symbol_code_set (symbol *sym, int code, location loc)
584 {
585   int *codep = &sym->content->code;
586   if (sym->content->class != token_sym)
587     complain (&loc, complaint,
588               _("nonterminals cannot be given a token code"));
589   else if (*codep != CODE_UNDEFINED
590            && *codep != code)
591     complain (&loc, complaint, _("redefining code of token %s"),
592               sym->tag);
593   else if (code == INT_MAX)
594     complain (&loc, complaint, _("code of token %s too large"),
595               sym->tag);
596   else
597     {
598       *codep = code;
599       /* User defined $end token? */
600       if (code == 0 && !eoftoken)
601         {
602           eoftoken = sym->content->symbol;
603           eoftoken->content->number = 0;
604         }
605     }
606 }
607 
608 
609 /*----------------------------------------------------------.
610 | If SYM is not defined, report an error, and consider it a |
611 | nonterminal.                                              |
612 `----------------------------------------------------------*/
613 
614 static void
symbol_check_defined(symbol * sym)615 symbol_check_defined (symbol *sym)
616 {
617   sym_content *s = sym->content;
618   if (s->class == unknown_sym || s->class == pct_type_sym)
619     {
620       complain_symbol_undeclared (sym);
621       s->class = nterm_sym;
622     }
623 
624   if (s->number == NUMBER_UNDEFINED)
625     s->number = s->class == token_sym ? ntokens++ : nnterms++;
626 
627   if (s->class == token_sym
628       && sym->tag[0] == '"'
629       && !sym->is_alias)
630     complain (&sym->location, Wdangling_alias,
631               _("string literal %s not attached to a symbol"),
632               sym->tag);
633 
634   for (int i = 0; i < 2; ++i)
635     symbol_code_props_get (sym, i)->is_used = true;
636 
637   /* Set the semantic type status associated to the current symbol to
638      'declared' so that we could check semantic types unnecessary uses. */
639   if (s->type_name)
640     {
641       semantic_type *sem_type = semantic_type_get (s->type_name, NULL);
642       if (sem_type)
643         sem_type->status = declared;
644     }
645 }
646 
647 static void
semantic_type_check_defined(semantic_type * sem_type)648 semantic_type_check_defined (semantic_type *sem_type)
649 {
650   /* <*> and <> do not have to be "declared".  */
651   if (sem_type->status == declared
652       || !*sem_type->tag
653       || STREQ (sem_type->tag, "*"))
654     {
655       for (int i = 0; i < 2; ++i)
656         if (sem_type->props[i].kind != CODE_PROPS_NONE
657             && ! sem_type->props[i].is_used)
658           complain (&sem_type->location, Wother,
659                     _("useless %s for type <%s>"),
660                     code_props_type_string (i), sem_type->tag);
661     }
662   else
663     complain (&sem_type->location, Wother,
664               _("type <%s> is used, but is not associated to any symbol"),
665               sem_type->tag);
666 }
667 
668 /*-------------------------------------------------------------------.
669 | Merge the properties (precedence, associativity, etc.) of SYM, and |
670 | its string-named alias STR; check consistency.                     |
671 `-------------------------------------------------------------------*/
672 
673 static void
symbol_merge_properties(symbol * sym,symbol * str)674 symbol_merge_properties (symbol *sym, symbol *str)
675 {
676   if (str->content->type_name != sym->content->type_name)
677     {
678       if (str->content->type_name)
679         symbol_type_set (sym,
680                          str->content->type_name, str->content->type_loc);
681       else
682         symbol_type_set (str,
683                          sym->content->type_name, sym->content->type_loc);
684     }
685 
686 
687   for (int i = 0; i < CODE_PROPS_SIZE; ++i)
688     if (str->content->props[i].code)
689       symbol_code_props_set (sym, i, &str->content->props[i]);
690     else if (sym->content->props[i].code)
691       symbol_code_props_set (str, i, &sym->content->props[i]);
692 
693   if (sym->content->prec || str->content->prec)
694     {
695       if (str->content->prec)
696         symbol_precedence_set (sym, str->content->prec, str->content->assoc,
697                                str->content->prec_loc);
698       else
699         symbol_precedence_set (str, sym->content->prec, sym->content->assoc,
700                                sym->content->prec_loc);
701     }
702 }
703 
704 void
symbol_make_alias(symbol * sym,symbol * str,location loc)705 symbol_make_alias (symbol *sym, symbol *str, location loc)
706 {
707   if (sym->content->class != token_sym)
708     complain (&loc, complaint,
709               _("nonterminals cannot be given a string alias"));
710   else if (str->alias)
711     complain (&loc, Wother,
712               _("symbol %s used more than once as a literal string"), str->tag);
713   else if (sym->alias)
714     complain (&loc, Wother,
715               _("symbol %s given more than one literal string"), sym->tag);
716   else
717     {
718       symbol_merge_properties (sym, str);
719       sym_content_free (str->content);
720       str->content = sym->content;
721       str->content->symbol = str;
722       str->is_alias = true;
723       str->alias = sym;
724       sym->alias = str;
725     }
726 }
727 
728 
729 /*-------------------------------------------------------------------.
730 | Assign a symbol number, and write the definition of the token name |
731 | into FDEFINES.  Put in SYMBOLS.                                    |
732 `-------------------------------------------------------------------*/
733 
734 static void
symbol_pack(symbol * sym)735 symbol_pack (symbol *sym)
736 {
737   aver (sym->content->number != NUMBER_UNDEFINED);
738   if (sym->content->class == nterm_sym)
739     sym->content->number += ntokens;
740 
741   symbols[sym->content->number] = sym->content->symbol;
742 }
743 
744 static void
complain_code_redeclared(int num,const symbol * first,const symbol * second)745 complain_code_redeclared (int num, const symbol *first, const symbol *second)
746 {
747   symbols_sort (&first, &second);
748   complain (&second->location, complaint,
749             _("code %d reassigned to token %s"),
750             num, second->tag);
751   subcomplain (&first->location, complaint,
752                _("previous declaration for %s"),
753                first->tag);
754 }
755 
756 /*-------------------------------------------------.
757 | Put SYM in TOKEN_TRANSLATIONS if it is a token.  |
758 `-------------------------------------------------*/
759 
760 static void
symbol_translation(const symbol * sym)761 symbol_translation (const symbol *sym)
762 {
763   if (sym->content->class == token_sym && !sym->is_alias)
764     {
765       /* A token whose translation has already been set? */
766       if (token_translations[sym->content->code]
767           != undeftoken->content->number)
768         complain_code_redeclared
769           (sym->content->code,
770            symbols[token_translations[sym->content->code]], sym);
771       else
772         token_translations[sym->content->code]
773           = sym->content->number;
774     }
775 }
776 
777 
778 /*---------------------------------------.
779 | Symbol and semantic type hash tables.  |
780 `---------------------------------------*/
781 
782 /* Initial capacity of symbol and semantic type hash table.  */
783 #define HT_INITIAL_CAPACITY 257
784 
785 static inline bool
hash_compare_symbol(const symbol * m1,const symbol * m2)786 hash_compare_symbol (const symbol *m1, const symbol *m2)
787 {
788   /* Since tags are unique, we can compare the pointers themselves.  */
789   return UNIQSTR_EQ (m1->tag, m2->tag);
790 }
791 
792 static inline bool
hash_compare_semantic_type(const semantic_type * m1,const semantic_type * m2)793 hash_compare_semantic_type (const semantic_type *m1, const semantic_type *m2)
794 {
795   /* Since names are unique, we can compare the pointers themselves.  */
796   return UNIQSTR_EQ (m1->tag, m2->tag);
797 }
798 
799 static bool
hash_symbol_comparator(void const * m1,void const * m2)800 hash_symbol_comparator (void const *m1, void const *m2)
801 {
802   return hash_compare_symbol (m1, m2);
803 }
804 
805 static bool
hash_semantic_type_comparator(void const * m1,void const * m2)806 hash_semantic_type_comparator (void const *m1, void const *m2)
807 {
808   return hash_compare_semantic_type (m1, m2);
809 }
810 
811 static inline size_t
hash_symbol(const symbol * m,size_t tablesize)812 hash_symbol (const symbol *m, size_t tablesize)
813 {
814   /* Since tags are unique, we can hash the pointer itself.  */
815   return ((uintptr_t) m->tag) % tablesize;
816 }
817 
818 static inline size_t
hash_semantic_type(const semantic_type * m,size_t tablesize)819 hash_semantic_type (const semantic_type *m, size_t tablesize)
820 {
821   /* Since names are unique, we can hash the pointer itself.  */
822   return ((uintptr_t) m->tag) % tablesize;
823 }
824 
825 static size_t
hash_symbol_hasher(void const * m,size_t tablesize)826 hash_symbol_hasher (void const *m, size_t tablesize)
827 {
828   return hash_symbol (m, tablesize);
829 }
830 
831 static size_t
hash_semantic_type_hasher(void const * m,size_t tablesize)832 hash_semantic_type_hasher (void const *m, size_t tablesize)
833 {
834   return hash_semantic_type (m, tablesize);
835 }
836 
837 /*-------------------------------.
838 | Create the symbol hash table.  |
839 `-------------------------------*/
840 
841 void
symbols_new(void)842 symbols_new (void)
843 {
844   symbol_table = hash_xinitialize (HT_INITIAL_CAPACITY,
845                                    NULL,
846                                    hash_symbol_hasher,
847                                    hash_symbol_comparator,
848                                    symbol_free);
849 
850   /* Construct the acceptsymbol symbol. */
851   acceptsymbol = symbol_get ("$accept", empty_loc);
852   acceptsymbol->content->class = nterm_sym;
853   acceptsymbol->content->number = nnterms++;
854 
855   /* Construct the YYerror/"error" token */
856   errtoken = symbol_get ("YYerror", empty_loc);
857   errtoken->content->class = token_sym;
858   errtoken->content->number = ntokens++;
859   {
860     symbol *alias = symbol_get ("error", empty_loc);
861     symbol_class_set (alias, token_sym, empty_loc, false);
862     symbol_make_alias (errtoken, alias, empty_loc);
863   }
864 
865   /* Construct the YYUNDEF/"$undefined" token that represents all
866      undefined literal tokens.  It is always symbol number 2.  */
867   undeftoken = symbol_get ("YYUNDEF", empty_loc);
868   undeftoken->content->class = token_sym;
869   undeftoken->content->number = ntokens++;
870   {
871     symbol *alias = symbol_get ("$undefined", empty_loc);
872     symbol_class_set (alias, token_sym, empty_loc, false);
873     symbol_make_alias (undeftoken, alias, empty_loc);
874   }
875 
876   semantic_type_table = hash_xinitialize (HT_INITIAL_CAPACITY,
877                                           NULL,
878                                           hash_semantic_type_hasher,
879                                           hash_semantic_type_comparator,
880                                           free);
881 }
882 
883 
884 /*----------------------------------------------------------------.
885 | Find the symbol named KEY, and return it.  If it does not exist |
886 | yet, create it.                                                 |
887 `----------------------------------------------------------------*/
888 
889 symbol *
symbol_from_uniqstr(const uniqstr key,location loc)890 symbol_from_uniqstr (const uniqstr key, location loc)
891 {
892   symbol probe;
893 
894   probe.tag = key;
895   symbol *res = hash_lookup (symbol_table, &probe);
896 
897   if (!res)
898     {
899       /* First insertion in the hash. */
900       aver (!symbols_sorted);
901       res = symbol_new (key, loc);
902       hash_xinsert (symbol_table, res);
903     }
904   return res;
905 }
906 
907 
908 /*-----------------------------------------------------------------------.
909 | Find the semantic type named KEY, and return it.  If it does not exist |
910 | yet, create it.                                                        |
911 `-----------------------------------------------------------------------*/
912 
913 semantic_type *
semantic_type_from_uniqstr(const uniqstr key,const location * loc)914 semantic_type_from_uniqstr (const uniqstr key, const location *loc)
915 {
916   semantic_type probe;
917 
918   probe.tag = key;
919   semantic_type *res = hash_lookup (semantic_type_table, &probe);
920 
921   if (!res)
922     {
923       /* First insertion in the hash. */
924       res = semantic_type_new (key, loc);
925       hash_xinsert (semantic_type_table, res);
926     }
927   return res;
928 }
929 
930 
931 /*----------------------------------------------------------------.
932 | Find the symbol named KEY, and return it.  If it does not exist |
933 | yet, create it.                                                 |
934 `----------------------------------------------------------------*/
935 
936 symbol *
symbol_get(const char * key,location loc)937 symbol_get (const char *key, location loc)
938 {
939   return symbol_from_uniqstr (uniqstr_new (key), loc);
940 }
941 
942 
943 /*-----------------------------------------------------------------------.
944 | Find the semantic type named KEY, and return it.  If it does not exist |
945 | yet, create it.                                                        |
946 `-----------------------------------------------------------------------*/
947 
948 semantic_type *
semantic_type_get(const char * key,const location * loc)949 semantic_type_get (const char *key, const location *loc)
950 {
951   return semantic_type_from_uniqstr (uniqstr_new (key), loc);
952 }
953 
954 
955 /*------------------------------------------------------------------.
956 | Generate a dummy nonterminal, whose name cannot conflict with the |
957 | user's names.                                                     |
958 `------------------------------------------------------------------*/
959 
960 symbol *
dummy_symbol_get(location loc)961 dummy_symbol_get (location loc)
962 {
963   /* Incremented for each generated symbol.  */
964   static int dummy_count = 0;
965   char buf[32];
966   int len = snprintf (buf, sizeof buf, "$@%d", ++dummy_count);
967   assure (len < sizeof buf);
968   symbol *sym = symbol_get (buf, loc);
969   sym->content->class = nterm_sym;
970   return sym;
971 }
972 
973 bool
symbol_is_dummy(symbol const * sym)974 symbol_is_dummy (symbol const *sym)
975 {
976   return sym->tag[0] == '@' || (sym->tag[0] == '$' && sym->tag[1] == '@');
977 }
978 
979 /*-------------------.
980 | Free the symbols.  |
981 `-------------------*/
982 
983 void
symbols_free(void)984 symbols_free (void)
985 {
986   hash_free (symbol_table);
987   hash_free (semantic_type_table);
988   free (symbols);
989   free (symbols_sorted);
990   free (semantic_types_sorted);
991 }
992 
993 
994 static int
symbol_cmp(void const * a,void const * b)995 symbol_cmp (void const *a, void const *b)
996 {
997   return location_cmp ((*(symbol * const *)a)->location,
998                        (*(symbol * const *)b)->location);
999 }
1000 
1001 /* Store in *SORTED an array of pointers to the symbols contained in
1002    TABLE, sorted by order of appearance (i.e., by location). */
1003 
1004 static void
table_sort(struct hash_table * table,symbol *** sorted)1005 table_sort (struct hash_table *table, symbol ***sorted)
1006 {
1007   aver (!*sorted);
1008   size_t count = hash_get_n_entries (table);
1009   *sorted = xnmalloc (count + 1, sizeof **sorted);
1010   hash_get_entries (table, (void**)*sorted, count);
1011   qsort (*sorted, count, sizeof **sorted, symbol_cmp);
1012   (*sorted)[count] = NULL;
1013 }
1014 
1015 /*--------------------------------------------------------------.
1016 | Check that all the symbols are defined.  Report any undefined |
1017 | symbols and consider them nonterminals.                       |
1018 `--------------------------------------------------------------*/
1019 
1020 void
symbols_check_defined(void)1021 symbols_check_defined (void)
1022 {
1023   table_sort (symbol_table, &symbols_sorted);
1024   /* semantic_type, like symbol, starts with a 'tag' field and then a
1025      'location' field.  And here we only deal with arrays/hashes of
1026      pointers, sizeof is not an issue.
1027 
1028      So instead of implementing table_sort (and symbol_cmp) once for
1029      each type, let's lie a bit to the typing system, and treat
1030      'semantic_type' as if it were 'symbol'. */
1031   table_sort (semantic_type_table, (symbol ***) &semantic_types_sorted);
1032 
1033   for (int i = 0; symbols_sorted[i]; ++i)
1034     symbol_check_defined (symbols_sorted[i]);
1035   for (int i = 0; semantic_types_sorted[i]; ++i)
1036     semantic_type_check_defined (semantic_types_sorted[i]);
1037 }
1038 
1039 /*------------------------------------------------------------------.
1040 | Set TOKEN_TRANSLATIONS.  Check that no two symbols share the same |
1041 | number.                                                           |
1042 `------------------------------------------------------------------*/
1043 
1044 static void
symbols_token_translations_init(void)1045 symbols_token_translations_init (void)
1046 {
1047   bool code_256_available_p = true;
1048 
1049   /* Find the highest token code, and whether 256, the POSIX preferred
1050      token code for the error token, is used.  */
1051   max_code = 0;
1052   for (int i = 0; i < ntokens; ++i)
1053     {
1054       sym_content *sym = symbols[i]->content;
1055       if (sym->code != CODE_UNDEFINED)
1056         {
1057           if (sym->code > max_code)
1058             max_code = sym->code;
1059           if (sym->code == 256)
1060             code_256_available_p = false;
1061         }
1062     }
1063 
1064   /* If 256 is not used, assign it to error, to follow POSIX.  */
1065   if (code_256_available_p
1066       && errtoken->content->code == CODE_UNDEFINED)
1067     errtoken->content->code = 256;
1068 
1069   /* Set the missing codes. */
1070   if (max_code < 256)
1071     max_code = 256;
1072 
1073   for (int i = 0; i < ntokens; ++i)
1074     {
1075       sym_content *sym = symbols[i]->content;
1076       if (sym->code == CODE_UNDEFINED)
1077         {
1078           IGNORE_TYPE_LIMITS_BEGIN
1079           if (INT_ADD_WRAPV (max_code, 1, &max_code))
1080             complain (NULL, fatal, _("token number too large"));
1081           IGNORE_TYPE_LIMITS_END
1082           sym->code = max_code;
1083         }
1084       if (sym->code > max_code)
1085         max_code = sym->code;
1086     }
1087 
1088   token_translations = xnmalloc (max_code + 1,
1089                                  sizeof *token_translations);
1090 
1091   /* Initialize all entries for literal tokens to the internal token
1092      number for $undefined, which represents all invalid inputs.  */
1093   for (int i = 0; i < max_code + 1; ++i)
1094     token_translations[i] = undeftoken->content->number;
1095   for (int i = 0; symbols_sorted[i]; ++i)
1096     symbol_translation (symbols_sorted[i]);
1097 }
1098 
1099 
1100 /* Whether some symbol requires internationalization.  */
1101 static bool
has_translations(void)1102 has_translations (void)
1103 {
1104   for (const void *entry = hash_get_first (symbol_table);
1105        entry;
1106        entry = hash_get_next (symbol_table, entry))
1107     {
1108       const symbol *sym = (const symbol *) entry;
1109       if (sym->translatable)
1110         return true;
1111     }
1112   return false;
1113 }
1114 
1115 /*----------------------------------------------------------------.
1116 | Assign symbol numbers, and write definition of token names into |
1117 | FDEFINES.  Set up vectors SYMBOL_TABLE, TAGS of symbols.        |
1118 `----------------------------------------------------------------*/
1119 
1120 void
symbols_pack(void)1121 symbols_pack (void)
1122 {
1123   symbols = xcalloc (nsyms, sizeof *symbols);
1124   for (int i = 0; symbols_sorted[i]; ++i)
1125     symbol_pack (symbols_sorted[i]);
1126 
1127   /* Aliases leave empty slots in symbols, so remove them.  */
1128   {
1129     int nsyms_old = nsyms;
1130     for (int writei = 0, readi = 0; readi < nsyms_old; readi += 1)
1131       {
1132         if (symbols[readi] == NULL)
1133           {
1134             nsyms -= 1;
1135             ntokens -= 1;
1136           }
1137         else
1138           {
1139             symbols[writei] = symbols[readi];
1140             symbols[writei]->content->number = writei;
1141             writei += 1;
1142           }
1143       }
1144   }
1145   symbols = xnrealloc (symbols, nsyms, sizeof *symbols);
1146 
1147   symbols_token_translations_init ();
1148 
1149   if (startsymbol->content->class == unknown_sym)
1150     complain (&startsymbol_loc, fatal,
1151               _("the start symbol %s is undefined"),
1152               startsymbol->tag);
1153   else if (startsymbol->content->class == token_sym)
1154     complain (&startsymbol_loc, fatal,
1155               _("the start symbol %s is a token"),
1156               startsymbol->tag);
1157 
1158   // If some user tokens are internationalized, the internal ones
1159   // should be too.
1160   if (has_translations ())
1161     {
1162       const bool eof_is_user_defined
1163         = !eoftoken->alias || STRNEQ (eoftoken->alias->tag, "$end");
1164       if (!eof_is_user_defined)
1165         eoftoken->alias->translatable = true;
1166       undeftoken->alias->translatable = true;
1167       errtoken->alias->translatable = true;
1168     }
1169 }
1170 
1171 /*---------------------------------.
1172 | Initialize relation graph nodes. |
1173 `---------------------------------*/
1174 
1175 static void
init_prec_nodes(void)1176 init_prec_nodes (void)
1177 {
1178   prec_nodes = xcalloc (nsyms, sizeof *prec_nodes);
1179   for (int i = 0; i < nsyms; ++i)
1180     {
1181       prec_nodes[i] = xmalloc (sizeof *prec_nodes[i]);
1182       symgraph *s = prec_nodes[i];
1183       s->id = i;
1184       s->succ = 0;
1185       s->pred = 0;
1186     }
1187 }
1188 
1189 /*----------------.
1190 | Create a link.  |
1191 `----------------*/
1192 
1193 static symgraphlink *
symgraphlink_new(graphid id,symgraphlink * next)1194 symgraphlink_new (graphid id, symgraphlink *next)
1195 {
1196   symgraphlink *res = xmalloc (sizeof *res);
1197   res->id = id;
1198   res->next = next;
1199   return res;
1200 }
1201 
1202 
1203 /*------------------------------------------------------------------.
1204 | Register the second symbol of the precedence relation, and return |
1205 | whether this relation is new.  Use only in register_precedence.   |
1206 `------------------------------------------------------------------*/
1207 
1208 static bool
register_precedence_second_symbol(symgraphlink ** first,graphid sym)1209 register_precedence_second_symbol (symgraphlink **first, graphid sym)
1210 {
1211   if (!*first || sym < (*first)->id)
1212     *first = symgraphlink_new (sym, *first);
1213   else
1214     {
1215       symgraphlink *slist = *first;
1216 
1217       while (slist->next && slist->next->id <= sym)
1218         slist = slist->next;
1219 
1220       if (slist->id == sym)
1221         /* Relation already present. */
1222         return false;
1223 
1224       slist->next = symgraphlink_new (sym, slist->next);
1225     }
1226   return true;
1227 }
1228 
1229 /*------------------------------------------------------------------.
1230 | Register a new relation between symbols as used. The first symbol |
1231 | has a greater precedence than the second one.                     |
1232 `------------------------------------------------------------------*/
1233 
1234 void
register_precedence(graphid first,graphid snd)1235 register_precedence (graphid first, graphid snd)
1236 {
1237   if (!prec_nodes)
1238     init_prec_nodes ();
1239   register_precedence_second_symbol (&(prec_nodes[first]->succ), snd);
1240   register_precedence_second_symbol (&(prec_nodes[snd]->pred), first);
1241 }
1242 
1243 
1244 /*---------------------------------------.
1245 | Deep clear a linked / adjacency list). |
1246 `---------------------------------------*/
1247 
1248 static void
linkedlist_free(symgraphlink * node)1249 linkedlist_free (symgraphlink *node)
1250 {
1251   if (node)
1252     {
1253       while (node->next)
1254         {
1255           symgraphlink *tmp = node->next;
1256           free (node);
1257           node = tmp;
1258         }
1259       free (node);
1260     }
1261 }
1262 
1263 /*----------------------------------------------.
1264 | Clear and destroy association tracking table. |
1265 `----------------------------------------------*/
1266 
1267 static void
assoc_free(void)1268 assoc_free (void)
1269 {
1270   for (int i = 0; i < nsyms; ++i)
1271     {
1272       linkedlist_free (prec_nodes[i]->pred);
1273       linkedlist_free (prec_nodes[i]->succ);
1274       free (prec_nodes[i]);
1275     }
1276   free (prec_nodes);
1277 }
1278 
1279 /*---------------------------------------.
1280 | Initialize association tracking table. |
1281 `---------------------------------------*/
1282 
1283 static void
init_assoc(void)1284 init_assoc (void)
1285 {
1286   used_assoc = xcalloc (nsyms, sizeof *used_assoc);
1287   for (graphid i = 0; i < nsyms; ++i)
1288     used_assoc[i] = false;
1289 }
1290 
1291 /*------------------------------------------------------------------.
1292 | Test if the associativity for the symbols is defined and useless. |
1293 `------------------------------------------------------------------*/
1294 
1295 static inline bool
is_assoc_useless(symbol * s)1296 is_assoc_useless (symbol *s)
1297 {
1298   return s
1299       && s->content->assoc != undef_assoc
1300       && s->content->assoc != precedence_assoc
1301       && !used_assoc[s->content->number];
1302 }
1303 
1304 /*-------------------------------.
1305 | Register a used associativity. |
1306 `-------------------------------*/
1307 
1308 void
register_assoc(graphid i,graphid j)1309 register_assoc (graphid i, graphid j)
1310 {
1311   if (!used_assoc)
1312     init_assoc ();
1313   used_assoc[i] = true;
1314   used_assoc[j] = true;
1315 }
1316 
1317 /*--------------------------------------------------.
1318 | Print a warning for unused precedence relations.  |
1319 `--------------------------------------------------*/
1320 
1321 void
print_precedence_warnings(void)1322 print_precedence_warnings (void)
1323 {
1324   if (!prec_nodes)
1325     init_prec_nodes ();
1326   if (!used_assoc)
1327     init_assoc ();
1328   for (int i = 0; i < nsyms; ++i)
1329     {
1330       symbol *s = symbols[i];
1331       if (s
1332           && s->content->prec != 0
1333           && !prec_nodes[i]->pred
1334           && !prec_nodes[i]->succ)
1335         {
1336           if (is_assoc_useless (s))
1337             complain (&s->content->prec_loc, Wprecedence,
1338                       _("useless precedence and associativity for %s"), s->tag);
1339           else if (s->content->assoc == precedence_assoc)
1340             complain (&s->content->prec_loc, Wprecedence,
1341                       _("useless precedence for %s"), s->tag);
1342         }
1343       else if (is_assoc_useless (s))
1344         complain (&s->content->prec_loc, Wprecedence,
1345                   _("useless associativity for %s, use %%precedence"), s->tag);
1346     }
1347   free (used_assoc);
1348   assoc_free ();
1349 }
1350