1 /*
2   Copyright 2002-2004 John Plevyak, All Rights Reserved
3 */
4 
5 #include "d.h"
6 
7 #define INITIAL_ALLITEMS 3359
8 
9 #define item_hash(_i) \
10   (((uint)(_i)->rule->index << 8) + ((uint)((_i)->kind != ELEM_END ? (_i)->index : (_i)->rule->elems.n)))
11 
insert_item(State * s,Elem * e)12 static int insert_item(State *s, Elem *e) {
13   Item *i = e;
14   if (set_add(&s->items_hash, i)) {
15     vec_add(&s->items, i);
16     return 1;
17   }
18   return 0;
19 }
20 
itemcmp(const void * ai,const void * aj)21 static int itemcmp(const void *ai, const void *aj) {
22   uint i = item_hash(*(Item **)ai);
23   uint j = item_hash(*(Item **)aj);
24   return (i > j) ? 1 : ((i < j) ? -1 : 0);
25 }
26 
new_state()27 static State *new_state() {
28   State *s = MALLOC(sizeof(State));
29   memset(s, 0, sizeof(State));
30   return s;
31 }
32 
free_state(State * s)33 static void free_state(State *s) {
34   vec_free(&s->items);
35   vec_free(&s->items_hash);
36   FREE(s);
37 }
38 
maybe_add_state(Grammar * g,State * s)39 static State *maybe_add_state(Grammar *g, State *s) {
40   uint i, j;
41 
42   for (i = 0; i < g->states.n; i++) {
43     if (s->hash == g->states.v[i]->hash && s->items.n == g->states.v[i]->items.n) {
44       for (j = 0; j < s->items.n; j++)
45         if (s->items.v[j] != g->states.v[i]->items.v[j]) goto Lcont;
46       free_state(s);
47       return g->states.v[i];
48     Lcont:;
49     }
50   }
51   s->index = g->states.n;
52   vec_add(&g->states, s);
53   return s;
54 }
55 
next_elem(Item * i)56 static Elem *next_elem(Item *i) {
57   if (i->index + 1 >= i->rule->elems.n)
58     return i->rule->end;
59   else
60     return i->rule->elems.v[i->index + 1];
61 }
62 
build_closure(Grammar * g,State * s)63 static State *build_closure(Grammar *g, State *s) {
64   uint j, k;
65 
66   for (j = 0; j < s->items.n; j++) {
67     Item *i = s->items.v[j];
68     Elem *e = i;
69     if (e->kind == ELEM_NTERM) {
70       Production *pp = e->e.nterm;
71       for (k = 0; k < e->e.nterm->rules.n; k++)
72         insert_item(s, pp->rules.v[k]->elems.v ? pp->rules.v[k]->elems.v[0] : pp->rules.v[k]->end);
73     }
74   }
75   qsort(s->items.v, s->items.n, sizeof(Item *), itemcmp);
76   s->hash = 0;
77   for (j = 0; j < s->items.n; j++) s->hash += item_hash(s->items.v[j]);
78   return maybe_add_state(g, s);
79 }
80 
clone_elem(Elem * e)81 static Elem *clone_elem(Elem *e) {
82   Elem *ee = MALLOC(sizeof(*ee));
83   memcpy(ee, e, sizeof(*ee));
84   return ee;
85 }
86 
add_goto(State * s,State * ss,Elem * e)87 static void add_goto(State *s, State *ss, Elem *e) {
88   Goto *g = MALLOC(sizeof(Goto));
89   g->state = ss;
90   g->elem = clone_elem(e);
91   vec_add(&s->gotos, g);
92 }
93 
build_state_for(Grammar * g,State * s,Elem * e)94 static void build_state_for(Grammar *g, State *s, Elem *e) {
95   uint j;
96   Item *i;
97   State *ss = NULL;
98 
99   for (j = 0; j < s->items.n; j++) {
100     i = s->items.v[j];
101     if (i->kind != ELEM_END && i->kind == e->kind && i->e.term_or_nterm == e->e.term_or_nterm) {
102       if (!ss) ss = new_state();
103       insert_item(ss, next_elem(i));
104     }
105   }
106   if (ss) add_goto(s, build_closure(g, ss), e);
107 }
108 
build_new_states(Grammar * g)109 static void build_new_states(Grammar *g) {
110   uint i, j;
111   State *s;
112   Elem e;
113 
114   for (i = 0; i < g->states.n; i++) {
115     s = g->states.v[i];
116     for (j = 0; j < g->terminals.n; j++) {
117       e.kind = ELEM_TERM;
118       e.e.term = g->terminals.v[j];
119       build_state_for(g, s, &e);
120     }
121     for (j = 0; j < g->productions.n; j++) {
122       e.kind = ELEM_NTERM;
123       e.e.nterm = g->productions.v[j];
124       build_state_for(g, s, &e);
125     }
126   }
127 }
128 
build_states_for_each_production(Grammar * g)129 static void build_states_for_each_production(Grammar *g) {
130   uint i;
131   for (i = 0; i < g->productions.n; i++)
132     if (!g->productions.v[i]->internal && g->productions.v[i]->elem) {
133       State *s = new_state();
134       insert_item(s, g->productions.v[i]->elem);
135       g->productions.v[i]->state = build_closure(g, s);
136     }
137 }
138 
elem_symbol(Grammar * g,Elem * e)139 uint elem_symbol(Grammar *g, Elem *e) {
140   if (e->kind == ELEM_NTERM)
141     return e->e.nterm->index;
142   else
143     return g->productions.n + e->e.term->index;
144 }
145 
gotocmp(const void * aa,const void * bb)146 static int gotocmp(const void *aa, const void *bb) {
147   Goto *a = *(Goto **)aa, *b = *(Goto **)bb;
148   int i = a->state->index, j = b->state->index;
149   return ((i > j) ? 1 : ((i < j) ? -1 : 0));
150 }
151 
sort_Gotos(Grammar * g)152 static void sort_Gotos(Grammar *g) {
153   uint i;
154 
155   for (i = 0; i < g->states.n; i++) {
156     VecGoto *vg = &g->states.v[i]->gotos;
157     qsort(vg->v, vg->n, sizeof(Goto *), gotocmp);
158   }
159 }
160 
build_LR_sets(Grammar * g)161 static void build_LR_sets(Grammar *g) {
162   State *s = new_state();
163   insert_item(s, g->productions.v[0]->rules.v[0]->elems.v[0]);
164   build_closure(g, s);
165   build_states_for_each_production(g);
166   build_new_states(g);
167   sort_Gotos(g);
168 }
169 
new_Action(Grammar * g,int akind,Term * aterm,Rule * arule,State * astate)170 static Action *new_Action(Grammar *g, int akind, Term *aterm, Rule *arule, State *astate) {
171   Action *a = MALLOC(sizeof(Action));
172   memset(a, 0, sizeof(Action));
173   a->kind = akind;
174   a->term = aterm;
175   a->rule = arule;
176   a->state = astate;
177   a->index = g->action_count++;
178   vec_add(&g->actions, a);
179   return a;
180 }
181 
free_Action(Action * a)182 void free_Action(Action *a) {
183   if (a->temp_string) FREE(a->temp_string);
184   FREE(a);
185 }
186 
add_action(Grammar * g,State * s,uint akind,Term * aterm,Rule * arule,State * astate)187 static void add_action(Grammar *g, State *s, uint akind, Term *aterm, Rule *arule, State *astate) {
188   uint i;
189   Action *a;
190 
191   if (akind == ACTION_REDUCE) {
192     /* eliminate duplicates */
193     for (i = 0; i < s->reduce_actions.n; i++)
194       if (s->reduce_actions.v[i]->rule == arule) return;
195     a = new_Action(g, akind, aterm, arule, astate);
196     vec_add(&s->reduce_actions, a);
197   } else {
198     /* eliminate duplicates */
199     for (i = 0; i < s->shift_actions.n; i++)
200       if (s->shift_actions.v[i]->term == aterm && s->shift_actions.v[i]->state == astate &&
201           s->shift_actions.v[i]->kind == akind)
202         return;
203     a = new_Action(g, akind, aterm, arule, astate);
204     vec_add(&s->shift_actions, a);
205   }
206 }
207 
init_LR(Grammar * g)208 static void init_LR(Grammar *g) { g->action_count = 0; }
209 
actioncmp(const void * aa,const void * bb)210 static int actioncmp(const void *aa, const void *bb) {
211   Action *a = *(Action **)aa, *b = *(Action **)bb;
212   uint i, j;
213   if (a->kind == ACTION_SHIFT_TRAILING)
214     i = a->term->index + 11000000;
215   else if (a->kind == ACTION_SHIFT)
216     i = a->term->index + 1000000;
217   else
218     i = a->rule->index;
219   if (b->kind == ACTION_SHIFT_TRAILING)
220     j = b->term->index + 11000000;
221   else if (b->kind == ACTION_SHIFT)
222     j = b->term->index + 1000000;
223   else
224     j = b->rule->index;
225   return ((i > j) ? 1 : ((i < j) ? -1 : 0));
226 }
227 
sort_VecAction(VecAction * v)228 void sort_VecAction(VecAction *v) { qsort(v->v, v->n, sizeof(Action *), actioncmp); }
229 
build_actions(Grammar * g)230 static void build_actions(Grammar *g) {
231   uint x, y, z;
232   State *s;
233   Elem *e;
234 
235   for (x = 0; x < g->states.n; x++) {
236     s = g->states.v[x];
237     for (y = 0; y < s->items.n; y++) {
238       e = s->items.v[y];
239       if (e->kind != ELEM_END) {
240         if (e->kind == ELEM_TERM) {
241           for (z = 0; z < s->gotos.n; z++) {
242             if (s->gotos.v[z]->elem->e.term == e->e.term)
243               add_action(g, s, ACTION_SHIFT, e->e.term, 0, s->gotos.v[z]->state);
244           }
245         }
246       } else if (e->rule->prod->index)
247         add_action(g, s, ACTION_REDUCE, NULL, e->rule, 0);
248       else
249         s->accept = 1;
250     }
251     sort_VecAction(&s->shift_actions);
252     sort_VecAction(&s->reduce_actions);
253   }
254 }
255 
goto_State(State * s,Elem * e)256 State *goto_State(State *s, Elem *e) {
257   uint i;
258   for (i = 0; i < s->gotos.n; i++)
259     if (s->gotos.v[i]->elem->e.term_or_nterm == e->e.term_or_nterm) return s->gotos.v[i]->state;
260   return NULL;
261 }
262 
new_Hint(uint d,State * s,Rule * r)263 static Hint *new_Hint(uint d, State *s, Rule *r) {
264   Hint *h = MALLOC(sizeof(*h));
265   h->depth = d;
266   h->state = s;
267   h->rule = r;
268   return h;
269 }
270 
hintcmp(const void * ai,const void * aj)271 static int hintcmp(const void *ai, const void *aj) {
272   Hint *i = *(Hint **)ai;
273   Hint *j = *(Hint **)aj;
274   return (i->depth > j->depth)
275              ? 1
276              : ((i->depth < j->depth)
277                     ? -1
278                     : ((i->rule->index > j->rule->index) ? 1 : ((i->rule->index < j->rule->index) ? -1 : 0)));
279 }
280 
build_right_epsilon_hints(Grammar * g)281 static void build_right_epsilon_hints(Grammar *g) {
282   uint x, y, z;
283   State *s, *ss;
284   Elem *e;
285   Rule *r;
286 
287   for (x = 0; x < g->states.n; x++) {
288     s = g->states.v[x];
289     for (y = 0; y < s->items.n; y++) {
290       e = s->items.v[y];
291       r = e->rule;
292       if (e->kind != ELEM_END) {
293         for (z = e->index; z < r->elems.n; z++) {
294           if ((r->elems.v[z]->kind != ELEM_NTERM || !r->elems.v[z]->e.nterm->nullable)) goto Lnext;
295         }
296         ss = s;
297         for (z = e->index; z < r->elems.n; z++) ss = goto_State(ss, r->elems.v[z]);
298         if (ss && r->elems.n)
299           vec_add(&s->right_epsilon_hints, new_Hint(r->elems.n - e->index - 1, ss, r));
300         else /* ignore for states_for_each_productions */
301           ;
302       }
303     Lnext:;
304     }
305     if (s->right_epsilon_hints.n > 1)
306       qsort(s->right_epsilon_hints.v, s->right_epsilon_hints.n, sizeof(Hint *), hintcmp);
307   }
308 }
309 
build_error_recovery(Grammar * g)310 static void build_error_recovery(Grammar *g) {
311   uint i, j, k, depth;
312   State *s;
313   Rule *r, *rr;
314   Elem *e, *ee;
315 
316   for (i = 0; i < g->states.n; i++) {
317     s = g->states.v[i];
318     for (j = 0; j < s->items.n; j++) {
319       r = s->items.v[j]->rule;
320       if (r->elems.n > 1 && r->elems.v[r->elems.n - 1]->kind == ELEM_TERM &&
321           r->elems.v[r->elems.n - 1]->e.term->kind == TERM_STRING) {
322         depth = s->items.v[j]->index;
323         e = r->elems.v[r->elems.n - 1];
324         for (k = 0; k < s->error_recovery_hints.n; k++) {
325           rr = s->error_recovery_hints.v[k]->rule;
326           ee = rr->elems.v[rr->elems.n - 1];
327           if (e->e.term->string_len == ee->e.term->string_len && !strcmp(e->e.term->string, ee->e.term->string)) {
328             if (s->error_recovery_hints.v[k]->depth > depth) s->error_recovery_hints.v[k]->depth = depth;
329             goto Ldone;
330           }
331         }
332         vec_add(&s->error_recovery_hints, new_Hint(depth, NULL, r));
333       Ldone:;
334       }
335     }
336     qsort(s->error_recovery_hints.v, s->error_recovery_hints.n, sizeof(Hint *), hintcmp);
337   }
338 }
339 
build_LR_tables(Grammar * g)340 void build_LR_tables(Grammar *g) {
341   init_LR(g);
342   build_LR_sets(g);
343   build_actions(g);
344   build_right_epsilon_hints(g);
345   build_error_recovery(g);
346 }
347