1 /*
2   Copyright 2002-2008 John Plevyak, All Rights Reserved
3 */
4 
5 #include "d.h"
6 
7 /* tunables */
8 #define DEFAULT_COMMIT_ACTIONS_INTERVAL 100
9 #define PNODE_HASH_INITIAL_SIZE_INDEX 10
10 #define SNODE_HASH_INITIAL_SIZE_INDEX 8
11 #define ERROR_RECOVERY_QUEUE_SIZE 10000
12 
13 #define LATEST(_p, _pn)                              \
14   do {                                               \
15     while ((_pn)->latest != (_pn)->latest->latest) { \
16       PNode *t = (_pn)->latest->latest;              \
17       ref_pn(t);                                     \
18       unref_pn((_p), (_pn)->latest);                 \
19       (_pn)->latest = t;                             \
20     }                                                \
21     (_pn) = (_pn)->latest;                           \
22   } while (0)
23 
24 #ifndef USE_GC
25 static void free_SNode(struct Parser *p, struct SNode *s);
26 #define ref_pn(_pn)    \
27   do {                 \
28     (_pn)->refcount++; \
29   } while (0)
30 #define ref_sn(_sn)    \
31   do {                 \
32     (_sn)->refcount++; \
33   } while (0)
34 #define unref_pn(_p, _pn)                        \
35   do {                                           \
36     if (!--(_pn)->refcount) free_PNode(_p, _pn); \
37   } while (0)
38 #define unref_sn(_p, _sn)                        \
39   do {                                           \
40     if (!--(_sn)->refcount) free_SNode(_p, _sn); \
41   } while (0)
42 #else
43 #define ref_pn(_pn)
44 #define ref_sn(_sn)
45 #define unref_pn(_p, _pn)
46 #define unref_sn(_p, _sn)
47 #endif
48 
49 typedef Stack(struct PNode *) StackPNode;
50 typedef Stack(struct SNode *) StackSNode;
51 typedef Stack(int) StackInt;
52 
53 static int exhaustive_parse(Parser *p, int state);
54 static void free_PNode(Parser *p, PNode *pn);
55 
print_paren(Parser * pp,PNode * p)56 void print_paren(Parser *pp, PNode *p) {
57   uint i;
58   char *c;
59   LATEST(pp, p);
60   if (!p->error_recovery) {
61     if (p->children.n) {
62       if (p->children.n > 1) printf("(");
63       for (i = 0; i < p->children.n; i++) print_paren(pp, p->children.v[i]);
64       if (p->children.n > 1) printf(")");
65     } else if (p->parse_node.start_loc.s != p->parse_node.end_skip) {
66       printf(" ");
67       for (c = p->parse_node.start_loc.s; c < p->parse_node.end_skip; c++) printf("%c", *c);
68       printf(" ");
69     }
70   }
71 }
72 
xprint_paren(Parser * pp,PNode * p)73 void xprint_paren(Parser *pp, PNode *p) {
74   uint i;
75   char *c;
76   LATEST(pp, p);
77   if (!p->error_recovery) {
78     printf("[%p %s]", (void *)p, pp->t->symbols[p->parse_node.symbol].name);
79     if (p->children.n) {
80       printf("(");
81       for (i = 0; i < p->children.n; i++) xprint_paren(pp, p->children.v[i]);
82       printf(")");
83     } else if (p->parse_node.start_loc.s != p->parse_node.end_skip) {
84       printf(" ");
85       for (c = p->parse_node.start_loc.s; c < p->parse_node.end_skip; c++) printf("%c", *c);
86       printf(" ");
87     }
88     if (p->ambiguities) {
89       printf(" |OR| ");
90       xprint_paren(pp, p->ambiguities);
91     }
92   }
93 }
94 
xPP(Parser * pp,PNode * p)95 void xPP(Parser *pp, PNode *p) {
96   xprint_paren(pp, p);
97   printf("\n");
98 }
PP(Parser * pp,PNode * p)99 void PP(Parser *pp, PNode *p) {
100   print_paren(pp, p);
101   printf("\n");
102 }
103 
104 #define D_ParseNode_to_PNode(_apn) ((PNode *)(D_PN(_apn, -(intptr_t) & ((PNode *)(NULL))->parse_node)))
105 
106 #define PNode_to_D_ParseNode(_apn) ((D_ParseNode *)&((PNode *)(_apn))->parse_node)
107 
d_get_child(D_ParseNode * apn,int child)108 D_ParseNode *d_get_child(D_ParseNode *apn, int child) {
109   PNode *pn = D_ParseNode_to_PNode(apn);
110   if (child < 0 || (uint)child >= pn->children.n) return NULL;
111   return &pn->children.v[child]->parse_node;
112 }
113 
d_get_number_of_children(D_ParseNode * apn)114 int d_get_number_of_children(D_ParseNode *apn) {
115   PNode *pn = D_ParseNode_to_PNode(apn);
116   return pn->children.n;
117 }
118 
d_find_in_tree(D_ParseNode * apn,int symbol)119 D_ParseNode *d_find_in_tree(D_ParseNode *apn, int symbol) {
120   PNode *pn = D_ParseNode_to_PNode(apn);
121   D_ParseNode *res;
122   uint i;
123 
124   if (pn->parse_node.symbol == symbol) return apn;
125   for (i = 0; i < pn->children.n; i++)
126     if ((res = d_find_in_tree(&pn->children.v[i]->parse_node, symbol))) return res;
127   return NULL;
128 }
129 
d_ws_before(D_Parser * ap,D_ParseNode * apn)130 char *d_ws_before(D_Parser *ap, D_ParseNode *apn) {
131   PNode *pn = D_ParseNode_to_PNode(apn);
132   (void)ap;
133   return pn->ws_before;
134 }
135 
d_ws_after(D_Parser * ap,D_ParseNode * apn)136 char *d_ws_after(D_Parser *ap, D_ParseNode *apn) {
137   PNode *pn = D_ParseNode_to_PNode(apn);
138   (void)ap;
139   return pn->ws_after;
140 }
141 
142 #define SNODE_HASH(_s, _sc, _g) ((((uintptr_t)(_s)) << 12) + (((uintptr_t)(_sc))) + ((uintptr_t)(_g)))
143 
find_SNode(Parser * p,uint state,D_Scope * sc,void * g)144 SNode *find_SNode(Parser *p, uint state, D_Scope *sc, void *g) {
145   SNodeHash *ph = &p->snode_hash;
146   SNode *sn;
147   uint h = SNODE_HASH(state, sc, g);
148   if (ph->v)
149     for (sn = ph->v[h % ph->m]; sn; sn = sn->bucket_next)
150       if (sn->state - p->t->state == state && sn->initial_scope == sc && sn->initial_globals == g) return sn;
151   return NULL;
152 }
153 
insert_SNode_internal(Parser * p,SNode * sn)154 void insert_SNode_internal(Parser *p, SNode *sn) {
155   SNodeHash *ph = &p->snode_hash;
156   uint h = SNODE_HASH(sn->state - p->t->state, sn->initial_scope, sn->initial_globals), i;
157   SNode *t;
158 
159   if (ph->n + 1 > ph->m) {
160     SNode **v = ph->v;
161     uint m = ph->m;
162     ph->i++;
163     ph->m = d_prime2[ph->i];
164     ph->v = (SNode **)MALLOC(ph->m * sizeof(*ph->v));
165     memset(ph->v, 0, ph->m * sizeof(*ph->v));
166     for (i = 0; i < m; i++)
167       while ((t = v[i])) {
168         v[i] = v[i]->bucket_next;
169         insert_SNode_internal(p, t);
170       }
171     FREE(v);
172   }
173   sn->bucket_next = ph->v[h % ph->m];
174   assert(sn->bucket_next != sn);
175   ph->v[h % ph->m] = sn;
176   ph->n++;
177 }
178 
insert_SNode(Parser * p,SNode * sn)179 static void insert_SNode(Parser *p, SNode *sn) {
180   insert_SNode_internal(p, sn);
181   ref_sn(sn);
182   sn->all_next = p->snode_hash.all;
183   p->snode_hash.all = sn;
184 }
185 
new_SNode(Parser * p,D_State * state,d_loc_t * loc,D_Scope * sc,void * g)186 static SNode *new_SNode(Parser *p, D_State *state, d_loc_t *loc, D_Scope *sc, void *g) {
187   SNode *sn = p->free_snodes;
188   if (!sn)
189     sn = MALLOC(sizeof *sn);
190   else
191     p->free_snodes = sn->all_next;
192   sn->depth = 0;
193   vec_clear(&sn->zns);
194 #ifndef USE_GC
195   sn->refcount = 0;
196 #endif
197   sn->all_next = 0;
198   p->states++;
199   sn->state = state;
200   sn->initial_scope = sc;
201   sn->initial_globals = g;
202   sn->last_pn = NULL;
203   sn->loc = *loc;
204   insert_SNode(p, sn);
205   if (sn->state->accept) {
206     if (!p->accept) {
207       ref_sn(sn);
208       p->accept = sn;
209     } else if (sn->loc.s > p->accept->loc.s) {
210       ref_sn(sn);
211       unref_sn(p, p->accept);
212       p->accept = sn;
213     }
214   }
215   return sn;
216 }
217 
new_ZNode(Parser * p,PNode * pn)218 static ZNode *new_ZNode(Parser *p, PNode *pn) {
219   ZNode *z = p->free_znodes;
220   if (!z)
221     z = MALLOC(sizeof *z);
222   else
223     p->free_znodes = znode_next(z);
224   z->pn = pn;
225   ref_pn(pn);
226   vec_clear(&z->sns);
227   return z;
228 }
229 
free_PNode(Parser * p,PNode * pn)230 static void free_PNode(Parser *p, PNode *pn) {
231   PNode *amb;
232   uint i;
233   if (p->user.free_node_fn) p->user.free_node_fn(&pn->parse_node);
234   for (i = 0; i < pn->children.n; i++) unref_pn(p, pn->children.v[i]);
235   vec_free(&pn->children);
236   if ((amb = pn->ambiguities)) {
237     pn->ambiguities = NULL;
238     unref_pn(p, amb);
239   }
240   if (pn->latest != pn) unref_pn(p, pn->latest);
241 #ifdef USE_FREELISTS
242   pn->all_next = p->free_pnodes;
243   p->free_pnodes = pn;
244 #else
245   FREE(pn);
246 #endif
247 #ifdef TRACK_PNODES
248   if (pn->xprev)
249     pn->xprev->xnext = pn->xnext;
250   else
251     p->xall = pn->xnext;
252   if (pn->xnext) pn->xnext->xprev = pn->xprev;
253   pn->xprev = NULL;
254   pn->xnext = NULL;
255 #endif
256 }
257 
258 #ifndef USE_GC
free_ZNode(Parser * p,ZNode * z,SNode * s)259 static void free_ZNode(Parser *p, ZNode *z, SNode *s) {
260   uint i;
261   unref_pn(p, z->pn);
262   for (i = 0; i < z->sns.n; i++)
263     if (s != z->sns.v[i]) unref_sn(p, z->sns.v[i]);
264   vec_free(&z->sns);
265 #ifdef USE_FREELISTS
266   znode_next(z) = p->free_znodes;
267   p->free_znodes = z;
268 #else
269   FREE(z);
270 #endif
271 }
272 
free_SNode(Parser * p,struct SNode * s)273 static void free_SNode(Parser *p, struct SNode *s) {
274   uint i;
275   for (i = 0; i < s->zns.n; i++)
276     if (s->zns.v[i]) free_ZNode(p, s->zns.v[i], s);
277   vec_free(&s->zns);
278   if (s->last_pn) unref_pn(p, s->last_pn);
279 #ifdef USE_FREELISTS
280   s->all_next = p->free_snodes;
281   p->free_snodes = s;
282 #else
283   FREE(s);
284 #endif
285 }
286 #else
287 #define free_ZNode(_p, _z, _s)
288 #endif
289 
290 #define PNODE_HASH(_si, _ei, _s, _sc, _g) \
291   ((((uintptr_t)_si) << 8) + (((uintptr_t)_ei) << 16) + (((uintptr_t)_s)) + (((uintptr_t)_sc)) + (((uintptr_t)_g)))
292 
find_PNode(Parser * p,char * start,char * end_skip,int symbol,D_Scope * sc,void * g,uint * hash)293 PNode *find_PNode(Parser *p, char *start, char *end_skip, int symbol, D_Scope *sc, void *g, uint *hash) {
294   PNodeHash *ph = &p->pnode_hash;
295   PNode *pn;
296   uint h = PNODE_HASH(start, end_skip, symbol, sc, g);
297   *hash = h;
298   if (ph->v)
299     for (pn = ph->v[h % ph->m]; pn; pn = pn->bucket_next)
300       if (pn->hash == h && pn->parse_node.symbol == symbol && pn->parse_node.start_loc.s == start &&
301           pn->parse_node.end_skip == end_skip && pn->initial_scope == sc && pn->initial_globals == g) {
302         LATEST(p, pn);
303         return pn;
304       }
305   return NULL;
306 }
307 
insert_PNode_internal(Parser * p,PNode * pn)308 void insert_PNode_internal(Parser *p, PNode *pn) {
309   PNodeHash *ph = &p->pnode_hash;
310   uint h = PNODE_HASH(pn->parse_node.start_loc.s, pn->parse_node.end_skip, pn->parse_node.symbol, pn->initial_scope,
311                       pn->initial_globals),
312        i;
313   PNode *t;
314 
315   if (ph->n + 1 > ph->m) {
316     PNode **v = ph->v;
317     uint m = ph->m;
318     ph->i++;
319     ph->m = d_prime2[ph->i];
320     ph->v = (PNode **)MALLOC(ph->m * sizeof(*ph->v));
321     memset(ph->v, 0, ph->m * sizeof(*ph->v));
322     for (i = 0; i < m; i++)
323       while ((t = v[i])) {
324         v[i] = v[i]->bucket_next;
325         insert_PNode_internal(p, t);
326       }
327     FREE(v);
328   }
329   pn->bucket_next = ph->v[h % ph->m];
330   ph->v[h % ph->m] = pn;
331   ph->n++;
332 }
333 
insert_PNode(Parser * p,PNode * pn)334 static void insert_PNode(Parser *p, PNode *pn) {
335   insert_PNode_internal(p, pn);
336   ref_pn(pn);
337   pn->all_next = p->pnode_hash.all;
338   p->pnode_hash.all = pn;
339 }
340 
free_old_nodes(Parser * p)341 static void free_old_nodes(Parser *p) {
342   uint i, h;
343   PNode *pn = p->pnode_hash.all, *tpn, **lpn;
344   SNode *sn = p->snode_hash.all, *tsn, **lsn;
345   while (sn) {
346     h = SNODE_HASH(sn->state - p->t->state, sn->initial_scope, sn->initial_globals);
347     lsn = &p->snode_hash.v[h % p->snode_hash.m];
348     tsn = sn;
349     sn = sn->all_next;
350     while (*lsn != tsn) lsn = &(*lsn)->bucket_next;
351     *lsn = (*lsn)->bucket_next;
352   }
353   sn = p->snode_hash.last_all;
354   p->snode_hash.last_all = 0;
355   while (sn) {
356     tsn = sn;
357     sn = sn->all_next;
358     unref_sn(p, tsn);
359   }
360   p->snode_hash.last_all = p->snode_hash.all;
361   p->snode_hash.all = NULL;
362   while (pn) {
363     for (i = 0; i < pn->children.n; i++) {
364       while (pn->children.v[i] != pn->children.v[i]->latest) {
365         tpn = pn->children.v[i]->latest;
366         ref_pn(tpn);
367         unref_pn(p, pn->children.v[i]);
368         pn->children.v[i] = tpn;
369       }
370     }
371     h = PNODE_HASH(pn->parse_node.start_loc.s, pn->parse_node.end_skip, pn->parse_node.symbol, pn->initial_scope,
372                    pn->initial_globals);
373     lpn = &p->pnode_hash.v[h % p->pnode_hash.m];
374     tpn = pn;
375     pn = pn->all_next;
376     while (*lpn != tpn) lpn = &(*lpn)->bucket_next;
377     *lpn = (*lpn)->bucket_next;
378     unref_pn(p, tpn);
379   }
380   p->pnode_hash.n = 0;
381   p->pnode_hash.all = NULL;
382 }
383 
alloc_parser_working_data(Parser * p)384 static void alloc_parser_working_data(Parser *p) {
385   p->pnode_hash.i = PNODE_HASH_INITIAL_SIZE_INDEX;
386   p->pnode_hash.m = d_prime2[p->pnode_hash.i];
387   p->pnode_hash.v = (PNode **)MALLOC(p->pnode_hash.m * sizeof(*p->pnode_hash.v));
388   memset(p->pnode_hash.v, 0, p->pnode_hash.m * sizeof(*p->pnode_hash.v));
389   p->snode_hash.i = SNODE_HASH_INITIAL_SIZE_INDEX;
390   p->snode_hash.m = d_prime2[p->snode_hash.i];
391   p->snode_hash.v = (SNode **)MALLOC(p->snode_hash.m * sizeof(*p->snode_hash.v));
392   memset(p->snode_hash.v, 0, p->snode_hash.m * sizeof(*p->snode_hash.v));
393   p->nshift_results = 0;
394   p->ncode_shifts = 0;
395 }
396 
free_parser_working_data(Parser * p)397 static void free_parser_working_data(Parser *p) {
398   uint i;
399 
400   free_old_nodes(p);
401   free_old_nodes(p); /* to catch SNodes saved for error repair */
402   if (p->pnode_hash.v) FREE(p->pnode_hash.v);
403   if (p->snode_hash.v) FREE(p->snode_hash.v);
404   memset(&p->pnode_hash, 0, sizeof(p->pnode_hash));
405   memset(&p->snode_hash, 0, sizeof(p->snode_hash));
406   while (p->reductions_todo) {
407     Reduction *r = p->free_reductions->next;
408     unref_sn(p, p->reductions_todo->snode);
409     FREE(p->free_reductions);
410     p->free_reductions = r;
411   }
412   while (p->shifts_todo) {
413     Shift *s = p->free_shifts->next;
414     unref_sn(p, p->shifts_todo->snode);
415     FREE(p->free_shifts);
416     p->free_shifts = s;
417   }
418   while (p->free_reductions) {
419     Reduction *r = p->free_reductions->next;
420     FREE(p->free_reductions);
421     p->free_reductions = r;
422   }
423   while (p->free_shifts) {
424     Shift *s = p->free_shifts->next;
425     FREE(p->free_shifts);
426     p->free_shifts = s;
427   }
428   while (p->free_pnodes) {
429     PNode *pn = p->free_pnodes->all_next;
430     FREE(p->free_pnodes);
431     p->free_pnodes = pn;
432   }
433   while (p->free_znodes) {
434     ZNode *zn = znode_next(p->free_znodes);
435     FREE(p->free_znodes);
436     p->free_znodes = zn;
437   }
438   while (p->free_snodes) {
439     SNode *sn = p->free_snodes->all_next;
440     FREE(p->free_snodes);
441     p->free_snodes = sn;
442   }
443   for (i = 0; i < p->error_reductions.n; i++) FREE(p->error_reductions.v[i]);
444   vec_free(&p->error_reductions);
445   if (p->whitespace_parser) free_parser_working_data(p->whitespace_parser);
446   FREE(p->shift_results);
447   p->shift_results = NULL;
448   p->nshift_results = 0;
449   FREE(p->code_shifts);
450   p->code_shifts = NULL;
451   p->ncode_shifts = 0;
452 }
453 
znode_depth(ZNode * z)454 static int znode_depth(ZNode *z) {
455   uint i, d = 0;
456   if (!z) return INT_MAX;
457   for (i = 0; i < z->sns.n; i++) d = d < z->sns.v[i]->depth ? z->sns.v[i]->depth : d;
458   return d;
459 }
460 
add_Reduction(Parser * p,ZNode * z,SNode * sn,D_Reduction * reduction)461 static Reduction *add_Reduction(Parser *p, ZNode *z, SNode *sn, D_Reduction *reduction) {
462   Reduction *x, **l = &p->reductions_todo;
463   uint d = znode_depth(z), dd;
464   for (x = p->reductions_todo; x; l = &x->next, x = x->next) {
465     if (sn->loc.s < x->snode->loc.s) break;
466     dd = znode_depth(x->znode);
467     if ((sn->loc.s == x->snode->loc.s && d >= dd)) {
468       if (d == dd)
469         while (x) {
470           if (sn == x->snode && z == x->znode && reduction == x->reduction) return NULL;
471           x = x->next;
472         }
473       break;
474     }
475   }
476   {
477     Reduction *r = p->free_reductions;
478     if (!r)
479       r = MALLOC(sizeof *r);
480     else
481       p->free_reductions = r->next;
482     r->znode = z;
483     r->snode = sn;
484     r->new_snode = NULL;
485     ref_sn(sn);
486     r->reduction = reduction;
487     r->next = *l;
488     *l = r;
489     return r;
490   }
491 }
492 
add_Shift(Parser * p,SNode * snode)493 static void add_Shift(Parser *p, SNode *snode) {
494   Shift *x, **l = &p->shifts_todo;
495   Shift *s = p->free_shifts;
496   if (!s)
497     s = MALLOC(sizeof *s);
498   else
499     p->free_shifts = s->next;
500   s->snode = snode;
501   ref_sn(s->snode);
502   for (x = p->shifts_todo; x; l = &x->next, x = x->next)
503     if (snode->loc.s <= x->snode->loc.s) break;
504   s->next = *l;
505   *l = s;
506 }
507 
add_SNode(Parser * p,D_State * state,d_loc_t * loc,D_Scope * sc,void * g)508 static SNode *add_SNode(Parser *p, D_State *state, d_loc_t *loc, D_Scope *sc, void *g) {
509   uint i;
510   SNode *sn = find_SNode(p, state - p->t->state, sc, g);
511   if (sn) return sn;
512   sn = new_SNode(p, state, loc, sc, g);
513   if (sn->state->shifts) add_Shift(p, sn);
514   for (i = 0; i < sn->state->reductions.n; i++)
515     if (!sn->state->reductions.v[i]->nelements) add_Reduction(p, 0, sn, sn->state->reductions.v[i]);
516   return sn;
517 }
518 
reduce_actions(Parser * p,PNode * pn,D_Reduction * r)519 static int reduce_actions(Parser *p, PNode *pn, D_Reduction *r) {
520   uint i, height = 0;
521   PNode *c;
522 
523   for (i = 0; i < pn->children.n; i++) {
524     c = pn->children.v[i];
525     if (c->op_assoc) {
526       pn->assoc = c->op_assoc;
527       pn->priority = c->op_priority;
528     }
529     if (c->height >= height) height = c->height + 1;
530   }
531   pn->op_assoc = r->op_assoc;
532   pn->op_priority = r->op_priority;
533   pn->height = height;
534   if (r->rule_assoc) {
535     pn->assoc = r->rule_assoc;
536     pn->priority = r->rule_priority;
537   }
538   if (r->speculative_code)
539     return r->speculative_code(pn, (void **)&pn->children.v[0], pn->children.n,
540                                (intptr_t) & ((PNode *)(NULL))->parse_node, (D_Parser *)p);
541   return 0;
542 }
543 
544 #define x 666 /* impossible */
545 static int child_table[4][3][6] = {{
546                                        /* binary parent, child on left */
547                                        /* priority of child vs parent, or = with child|parent associativity
548                                           > < =LL =LR =RL =RR
549                                         */
550                                        {1, 0, 1, 1, 0, 0}, /* binary child */
551                                        {1, 1, 1, 1, x, x}, /* left unary child */
552                                        {1, 0, x, x, 1, 1}  /* right unary child */
553                                    },
554                                    {
555                                        /* binary parent, child on right */
556                                        {1, 0, 0, 0, 1, 1}, /* binary child */
557                                        {1, 0, 1, 1, x, x}, /* left unary child */
558                                        {1, 1, x, x, 1, 1}  /* right unary child */
559                                    },
560                                    {
561                                        /* left unary parent */
562                                        {1, 0, 0, x, 0, x}, /* binary child */
563                                        {1, 1, 1, x, x, x}, /* left unary child */
564                                        {1, 0, x, x, 1, x}  /* right unary child */
565                                    },
566                                    {
567                                        /* right unary parent */
568                                        {1, 0, x, 0, x, 0}, /* binary child */
569                                        {1, 0, x, 1, x, x}, /* left unary child */
570                                        {1, 1, x, x, x, 1}  /* right unary child */
571                                    }};
572 #undef x
573 
574 /* returns 1 if legal for child reduction and illegal for child shift */
check_child(int ppri,AssocKind passoc,int cpri,AssocKind cassoc,int left,int right)575 static int check_child(int ppri, AssocKind passoc, int cpri, AssocKind cassoc, int left, int right) {
576   uint p = IS_BINARY_NARY_ASSOC(passoc) ? (right ? 1 : 0) : (passoc == ASSOC_UNARY_LEFT ? 2 : 3);
577   uint c = IS_BINARY_NARY_ASSOC(cassoc) ? 0 : (cassoc == ASSOC_UNARY_LEFT ? 1 : 2);
578   uint r =
579       cpri > ppri ? 0 : (cpri < ppri ? 1 : (2 + ((IS_RIGHT_ASSOC(cassoc) ? 2 : 0) + (IS_RIGHT_ASSOC(passoc) ? 1 : 0))));
580   (void)left;
581   return child_table[p][c][r];
582 }
583 
584 /* check assoc/priority legality, 0 is OK, -1 is bad */
check_assoc_priority(PNode * pn0,PNode * pn1,PNode * pn2)585 static int check_assoc_priority(PNode *pn0, PNode *pn1, PNode *pn2) {
586   if (!IS_UNARY_BINARY_ASSOC(pn0->op_assoc)) {
587     if (IS_UNARY_BINARY_ASSOC(pn1->op_assoc)) { /* second token is operator */
588       /* check expression pn0 (child of pn1) */
589       if (pn0->assoc) {
590         if (!check_child(pn1->op_priority, pn1->op_assoc, pn0->priority, pn0->assoc, 0, 1)) return -1;
591       }
592     }
593   } else { /* pn0 is an operator */
594     if (pn1->op_assoc) {
595       /* check pn0 (child of operator pn1) */
596       if (!check_child(pn1->op_priority, pn1->op_assoc, pn0->op_priority, pn0->op_assoc, 0, 1)) return -1;
597     } else if (pn2) {
598       /* check pn0 (child of operator pn2) */
599       if (pn2->op_assoc && !check_child(pn2->op_priority, pn2->op_assoc, pn0->op_priority, pn0->op_assoc, 0, 1))
600         return -1;
601     }
602     /* check expression pn1 (child of pn0)  */
603     if (pn1->assoc) {
604       if (!check_child(pn0->op_priority, pn0->op_assoc, pn1->priority, pn1->assoc, 1, 0)) return -1;
605     }
606   }
607   return 0;
608 }
609 
610 /* check to see if a path is legal with respect to
611    the associativity and priority of its operators */
check_path_priorities_internal(VecZNode * path)612 static int check_path_priorities_internal(VecZNode *path) {
613   uint i = 0, j, k, jj, kk, one = 0;
614   ZNode *z, *zz, *zzz;
615   PNode *pn0, *pn1;
616 
617   if (path->n < i + 1) return 0;
618   pn0 = path->v[i]->pn;
619   if (!pn0->op_assoc) { /* deal with top expression directly */
620     i = 1;
621     if (path->n < i + 1) return 0;
622     pn1 = path->v[i]->pn;
623     if (!pn1->op_assoc) return 0;
624     if (pn0->assoc) {
625       if (!check_child(pn1->op_priority, pn1->op_assoc, pn0->priority, pn0->assoc, 0, 1)) return -1;
626     }
627     pn0 = pn1;
628   }
629   if (path->n > i + 1) { /* entirely in the path */
630     pn1 = path->v[i + 1]->pn;
631     if (path->n > i + 2)
632       return check_assoc_priority(pn0, pn1, path->v[i + 2]->pn);
633     else { /* one level from the stack beyond the path */
634       z = path->v[i + 1];
635       for (k = 0; k < z->sns.n; k++)
636         for (j = 0; j < z->sns.v[k]->zns.n; j++) {
637           one = 1;
638           zz = z->sns.v[k]->zns.v[j];
639           if (zz && !check_assoc_priority(pn0, pn1, zz->pn)) return 0;
640         }
641       if (!one) return check_assoc_priority(pn0, pn1, NULL);
642     }
643   } else { /* two levels from the stack beyond the path */
644     z = path->v[i];
645     for (k = 0; k < z->sns.n; k++)
646       for (j = 0; j < z->sns.v[k]->zns.n; j++) {
647         zz = z->sns.v[k]->zns.v[j];
648         if (zz)
649           for (kk = 0; kk < zz->sns.n; kk++)
650             for (jj = 0; jj < zz->sns.v[kk]->zns.n; jj++) {
651               one = 1;
652               zzz = zz->sns.v[kk]->zns.v[jj];
653               if (zzz && !check_assoc_priority(pn0, zz->pn, zzz->pn)) return 0;
654             }
655       }
656     return 0;
657   }
658   return -1;
659 }
660 
661 /* avoid cases without operator priorities */
662 #define check_path_priorities(_p) \
663   ((_p)->n > 1 && ((_p)->v[0]->pn->op_assoc || (_p)->v[1]->pn->op_assoc) && check_path_priorities_internal(_p))
664 
compare_priorities(int xpri[],int xn,int ypri[],int yn)665 static int compare_priorities(int xpri[], int xn, int ypri[], int yn) {
666   int i = 0;
667 
668   while (i < xn && i < yn) {
669     if (xpri[i] > ypri[i]) return -1;
670     if (xpri[i] < ypri[i]) return 1;
671     i++;
672   }
673   return 0;
674 }
675 
intreverse(int * xp,int n)676 static void intreverse(int *xp, int n) {
677   int *a = xp, *b = xp + n - 1;
678   while (a < b) {
679     int t = *a;
680     *a = *b;
681     *b = t;
682     a++;
683     b--;
684   }
685 }
686 
687 /* sort by deepest, then by location */
priority_insert(StackPNode * psx,PNode * x)688 static void priority_insert(StackPNode *psx, PNode *x) {
689   PNode *t, **start, **cur;
690 
691   stack_push(psx, x);
692   start = psx->start;
693   cur = psx->cur;
694   for (; cur > start + 1; cur--) {
695     if (cur[-1]->height > cur[-2]->height) continue;
696     if (cur[-1]->height == cur[-2]->height && cur[-1]->parse_node.start_loc.s > cur[-2]->parse_node.start_loc.s)
697       continue;
698     t = cur[-1];
699     cur[-1] = cur[-2];
700     cur[-2] = t;
701   }
702 }
703 
get_exp_all(Parser * p,PNode * x,StackInt * psx)704 static void get_exp_all(Parser *p, PNode *x, StackInt *psx) {
705   uint i;
706 
707   if (x->assoc) stack_push(psx, x->priority);
708   for (i = 0; i < x->children.n; i++) {
709     PNode *pn = x->children.v[i];
710     LATEST(p, pn);
711     get_exp_all(p, pn, psx);
712   }
713 }
714 
get_exp_one(Parser * p,PNode * x,StackPNode * psx,StackInt * isx)715 static void get_exp_one(Parser *p, PNode *x, StackPNode *psx, StackInt *isx) {
716   uint i;
717 
718   LATEST(p, x);
719   if (!IS_NARY_ASSOC(x->assoc))
720     priority_insert(psx, x);
721   else {
722     stack_push(isx, x->priority);
723     for (i = 0; i < x->children.n; i++)
724       if (x->children.v[i]->assoc) get_exp_one(p, x->children.v[i], psx, isx);
725   }
726 }
727 
get_exp_one_down(Parser * p,PNode * x,StackPNode * psx,StackInt * isx)728 static void get_exp_one_down(Parser *p, PNode *x, StackPNode *psx, StackInt *isx) {
729   uint i;
730 
731   LATEST(p, x);
732   stack_push(isx, x->priority);
733   for (i = 0; i < x->children.n; i++)
734     if (x->children.v[i]->assoc) get_exp_one(p, x->children.v[i], psx, isx);
735 }
736 
737 /* get the set of priorities for unshared nodes,
738    eliminating shared subtrees via priority queues */
get_unshared_priorities(Parser * p,StackPNode * psx,StackPNode * psy,StackInt * isx,StackInt * isy)739 static void get_unshared_priorities(Parser *p, StackPNode *psx, StackPNode *psy, StackInt *isx, StackInt *isy) {
740   StackPNode *psr;
741   PNode *t;
742 
743   while (1) {
744     if (is_stack_empty(psx)) {
745       psr = psy;
746       break;
747     } else if (is_stack_empty(psy)) {
748       psr = psx;
749       break;
750     }
751     if (stack_head(psx)->height > stack_head(psy)->height)
752       psr = psx;
753     else if (stack_head(psx)->height < stack_head(psy)->height)
754       psr = psy;
755     else if (stack_head(psx) > stack_head(psy))
756       psr = psx;
757     else if (stack_head(psx) < stack_head(psy))
758       psr = psy;
759     else {
760       (void)stack_pop(psx);
761       (void)stack_pop(psy);
762       continue;
763     }
764     t = stack_pop(psr);
765     if (psr == psx)
766       get_exp_one_down(p, t, psx, isx);
767     else
768       get_exp_one_down(p, t, psy, isy);
769   }
770   while (!is_stack_empty(psr)) {
771     t = stack_pop(psr);
772     if (psr == psx)
773       get_exp_all(p, t, isx);
774     else
775       get_exp_all(p, t, isy);
776   }
777   return;
778 }
779 
780 /* compare the priorities of operators in two trees
781    while eliminating common subtrees for efficiency.
782 */
cmp_priorities(Parser * p,PNode * x,PNode * y)783 static int cmp_priorities(Parser *p, PNode *x, PNode *y) {
784   StackPNode psx, psy;
785   StackInt isx, isy;
786   int r;
787 
788   stack_clear(&psx);
789   stack_clear(&psy);
790   stack_clear(&isx);
791   stack_clear(&isy);
792   get_exp_one(p, x, &psx, &isx);
793   get_exp_one(p, y, &psy, &isy);
794   get_unshared_priorities(p, &psx, &psy, &isx, &isy);
795   intreverse(isx.start, stack_depth(&isx));
796   intreverse(isy.start, stack_depth(&isy));
797   r = compare_priorities(isx.start, stack_depth(&isx), isy.start, stack_depth(&isy));
798   stack_free(&psx);
799   stack_free(&psy);
800   stack_free(&isx);
801   stack_free(&isy);
802   return r;
803 }
804 
get_all(Parser * p,PNode * x,VecPNode * vx)805 static void get_all(Parser *p, PNode *x, VecPNode *vx) {
806   uint i;
807   if (set_add(vx, x)) {
808     for (i = 0; i < x->children.n; i++) {
809       PNode *pn = x->children.v[i];
810       LATEST(p, pn);
811       get_all(p, pn, vx);
812     }
813   }
814 }
815 
get_unshared_pnodes(Parser * p,PNode * x,PNode * y,VecPNode * pvx,VecPNode * pvy)816 static void get_unshared_pnodes(Parser *p, PNode *x, PNode *y, VecPNode *pvx, VecPNode *pvy) {
817   uint i;
818   VecPNode vx, vy;
819   vec_clear(&vx);
820   vec_clear(&vy);
821   LATEST(p, x);
822   LATEST(p, y);
823   get_all(p, x, &vx);
824   get_all(p, y, &vy);
825   for (i = 0; i < vx.n; i++)
826     if (vx.v[i] && !set_find(&vy, vx.v[i])) vec_add(pvx, vx.v[i]);
827   for (i = 0; i < vy.n; i++)
828     if (vy.v[i] && !set_find(&vx, vy.v[i])) vec_add(pvy, vy.v[i]);
829   vec_free(&vx);
830   vec_free(&vy);
831 }
832 
greedycmp(const void * ax,const void * ay)833 static int greedycmp(const void *ax, const void *ay) {
834   PNode *x = *(PNode **)ax;
835   PNode *y = *(PNode **)ay;
836   /* first by start */
837   if (x->parse_node.start_loc.s < y->parse_node.start_loc.s) return -1;
838   if (x->parse_node.start_loc.s > y->parse_node.start_loc.s) return 1;
839   /* second by symbol */
840   if (x->parse_node.symbol < y->parse_node.symbol) return -1;
841   if (x->parse_node.symbol > y->parse_node.symbol) return 1;
842   /* third by length */
843   if (x->parse_node.end < y->parse_node.end) return -1;
844   if (x->parse_node.end > y->parse_node.end) return 1;
845   return 0;
846 }
847 
848 #define RET(_x)   \
849   do {            \
850     ret = (_x);   \
851     goto Lreturn; \
852   } while (0)
853 
cmp_greediness(Parser * p,PNode * x,PNode * y)854 static int cmp_greediness(Parser *p, PNode *x, PNode *y) {
855   uint ix = 0, iy = 0;
856   int ret = 0;
857 
858   VecPNode pvx, pvy;
859   vec_clear(&pvx);
860   vec_clear(&pvy);
861   get_unshared_pnodes(p, x, y, &pvx, &pvy);
862   /* set_to_vec(&pvx); set_to_vec(&pvy); */
863   qsort(pvx.v, pvx.n, sizeof(PNode *), greedycmp);
864   qsort(pvy.v, pvy.n, sizeof(PNode *), greedycmp);
865   while (1) {
866     if (pvx.n <= ix || pvy.n <= iy) RET(0);
867     x = pvx.v[ix];
868     y = pvy.v[iy];
869     if (x == y) {
870       ix++;
871       iy++;
872     } else if (x->parse_node.start_loc.s < y->parse_node.start_loc.s)
873       ix++;
874     else if (x->parse_node.start_loc.s > y->parse_node.start_loc.s)
875       iy++;
876     else if (x->parse_node.symbol < y->parse_node.symbol)
877       ix++;
878     else if (x->parse_node.symbol > y->parse_node.symbol)
879       iy++;
880     else if (x->parse_node.end > y->parse_node.end)
881       RET(-1);
882     else if (x->parse_node.end < y->parse_node.end)
883       RET(1);
884     else if (x->children.n < y->children.n)
885       RET(-1);
886     else if (x->children.n > y->children.n)
887       RET(1);
888     else {
889       ix++;
890       iy++;
891     }
892   }
893 Lreturn:
894   vec_free(&pvx);
895   vec_free(&pvy);
896   return ret;
897 }
898 
resolve_amb_greedy(D_Parser * dp,int n,D_ParseNode ** v)899 int resolve_amb_greedy(D_Parser *dp, int n, D_ParseNode **v) {
900   int i, result, selected_node = 0;
901 
902   for (i = 1; i < n; i++) {
903     result = cmp_greediness((Parser *)dp, D_ParseNode_to_PNode(v[i]), D_ParseNode_to_PNode(v[selected_node]));
904     if (result < 0 ||
905         (result == 0 && D_ParseNode_to_PNode(v[i])->height < D_ParseNode_to_PNode(v[selected_node])->height))
906       selected_node = i;
907   }
908   return selected_node;
909 }
910 
911 /* return -1 for x, 1 for y and 0 if they are ambiguous */
cmp_pnodes(Parser * p,PNode * x,PNode * y)912 static int cmp_pnodes(Parser *p, PNode *x, PNode *y) {
913   uint r = 0;
914   if (x->assoc && y->assoc) {
915     if ((r = cmp_priorities(p, x, y))) return r;
916   }
917   if (!p->user.dont_use_greediness_for_disambiguation)
918     if ((r = cmp_greediness(p, x, y))) return r;
919   if (!p->user.dont_use_height_for_disambiguation) {
920     if (x->height < y->height) return -1;
921     if (x->height > y->height) return 1;
922   }
923   return r;
924 }
925 
make_PNode(Parser * p,uint hash,int symbol,d_loc_t * start_loc,char * e,PNode * pn,D_Reduction * r,VecZNode * path,D_Shift * sh,D_Scope * scope)926 static PNode *make_PNode(Parser *p, uint hash, int symbol, d_loc_t *start_loc, char *e, PNode *pn, D_Reduction *r,
927                          VecZNode *path, D_Shift *sh, D_Scope *scope) {
928   int i;
929   uint l = sizeof(PNode) - sizeof(d_voidp) + p->user.sizeof_user_parse_node;
930   PNode *new_pn = p->free_pnodes;
931   if (!new_pn)
932     new_pn = MALLOC(l);
933   else
934     p->free_pnodes = new_pn->all_next;
935   p->pnodes++;
936   memset(new_pn, 0, l);
937 #ifdef TRACK_PNODES
938   new_pn->xnext = p->xall;
939   if (p->xall) p->xall->xprev = new_pn;
940   p->xall = new_pn;
941 #endif
942   new_pn->hash = hash;
943   new_pn->parse_node.symbol = symbol;
944   new_pn->parse_node.start_loc = *start_loc;
945   new_pn->ws_before = start_loc->ws;
946   if (!r || !path) /* end of last parse node of path for non-epsilon reductions */
947     new_pn->parse_node.end = e;
948   else
949     new_pn->parse_node.end = pn->parse_node.end;
950   new_pn->parse_node.end_skip = e;
951   new_pn->shift = sh;
952   new_pn->reduction = r;
953   new_pn->parse_node.scope = pn->parse_node.scope;
954   new_pn->initial_scope = scope;
955   new_pn->parse_node.globals = pn->parse_node.globals;
956   new_pn->initial_globals = new_pn->parse_node.globals;
957   new_pn->parse_node.white_space = pn->parse_node.white_space;
958   new_pn->latest = new_pn;
959   new_pn->ws_after = e;
960   if (sh) {
961     new_pn->op_assoc = sh->op_assoc;
962     new_pn->op_priority = sh->op_priority;
963     if (sh->speculative_code && sh->action_index != -1) {
964       D_Reduction dummy;
965       memset(&dummy, 0, sizeof(dummy));
966       dummy.action_index = sh->action_index;
967       new_pn->reduction = &dummy;
968       if (sh->speculative_code(new_pn, (void **)&new_pn->children.v[0], new_pn->children.n,
969                                (intptr_t) & ((PNode *)(NULL))->parse_node, (D_Parser *)p)) {
970         free_PNode(p, new_pn);
971         return NULL;
972       }
973       new_pn->reduction = NULL;
974     }
975   } else if (r) {
976     if (path)
977       for (i = path->n - 1; i >= 0; i--) {
978         PNode *latest = path->v[i]->pn;
979         LATEST(p, latest);
980         ref_pn(latest);
981         vec_add(&new_pn->children, latest);
982       }
983     if (reduce_actions(p, new_pn, r)) {
984       free_PNode(p, new_pn);
985       return NULL;
986     }
987     if (path && path->n > 1) {
988       uint n = path->n, i;
989       for (i = 0; i < n; i += n - 1) {
990         PNode *child = new_pn->children.v[i];
991         if (child->assoc && new_pn->assoc &&
992             !check_child(new_pn->priority, new_pn->assoc, child->priority, child->assoc, i == 0, i == n - 1)) {
993           free_PNode(p, new_pn);
994           return NULL;
995         }
996       }
997     }
998   }
999   return new_pn;
1000 }
1001 
PNode_equal(Parser * p,PNode * pn,D_Reduction * r,VecZNode * path,D_Shift * sh)1002 static int PNode_equal(Parser *p, PNode *pn, D_Reduction *r, VecZNode *path, D_Shift *sh) {
1003   uint i, n = pn->children.n;
1004   if (sh) return sh == pn->shift;
1005   if (r != pn->reduction) return 0;
1006   if (!path && !n) return 1;
1007   if (n == path->n) {
1008     for (i = 0; i < n; i++) {
1009       PNode *x = pn->children.v[i], *y = path->v[n - i - 1]->pn;
1010       LATEST(p, x);
1011       LATEST(p, y);
1012       if (x != y) return 0;
1013     }
1014     return 1;
1015   }
1016   return 0;
1017 }
1018 
1019 /* find/create PNode */
add_PNode(Parser * p,int symbol,d_loc_t * start_loc,char * e,PNode * pn,D_Reduction * r,VecZNode * path,D_Shift * sh)1020 static PNode *add_PNode(Parser *p, int symbol, d_loc_t *start_loc, char *e, PNode *pn, D_Reduction *r, VecZNode *path,
1021                         D_Shift *sh) {
1022   D_Scope *scope = equiv_D_Scope(pn->parse_node.scope);
1023   uint hash;
1024   PNode *old_pn = find_PNode(p, start_loc->s, e, symbol, scope, pn->parse_node.globals, &hash), *new_pn;
1025   if (old_pn) {
1026     PNode *amb = 0;
1027     if (PNode_equal(p, old_pn, r, path, sh)) return old_pn;
1028     for (amb = old_pn->ambiguities; amb; amb = amb->ambiguities) {
1029       if (PNode_equal(p, amb, r, path, sh)) return old_pn;
1030     }
1031   }
1032   new_pn = make_PNode(p, hash, symbol, start_loc, e, pn, r, path, sh, scope);
1033   if (!old_pn) {
1034     old_pn = new_pn;
1035     if (!new_pn) return NULL;
1036     insert_PNode(p, new_pn);
1037     goto Lreturn;
1038   }
1039   if (!new_pn) goto Lreturn;
1040   p->compares++;
1041   switch (cmp_pnodes(p, new_pn, old_pn)) {
1042     case 0:
1043       ref_pn(new_pn);
1044       new_pn->ambiguities = old_pn->ambiguities;
1045       old_pn->ambiguities = new_pn;
1046       break;
1047     case -1:
1048       insert_PNode(p, new_pn);
1049       LATEST(p, old_pn);
1050       ref_pn(new_pn);
1051       old_pn->latest = new_pn;
1052       old_pn = new_pn;
1053       break;
1054     case 1:
1055       free_PNode(p, new_pn);
1056       break;
1057   }
1058 Lreturn:
1059   return old_pn;
1060 }
1061 
1062 /* The set of znodes associated with a state can be very large
1063    because of cascade reductions (for example, large expression trees).
1064    Use an adaptive data structure starting with a short list and
1065    then falling back to a direct map hash table.
1066 */
1067 
1068 static void set_add_znode(VecZNode *v, ZNode *z);
1069 
set_union_znode(VecZNode * v,VecZNode * vv)1070 static void set_union_znode(VecZNode *v, VecZNode *vv) {
1071   uint i;
1072   for (i = 0; i < vv->n; i++)
1073     if (vv->v[i]) set_add_znode(v, vv->v[i]);
1074 }
1075 
set_find_znode(VecZNode * v,PNode * pn)1076 static ZNode *set_find_znode(VecZNode *v, PNode *pn) {
1077   uint i, j, n = v->n, h;
1078   if (n <= INTEGRAL_VEC_SIZE) {
1079     for (i = 0; i < n; i++)
1080       if (v->v[i]->pn == pn) return v->v[i];
1081     return NULL;
1082   }
1083   h = ((uintptr_t)pn) % n;
1084   for (i = h, j = 0; i < v->n && j < SET_MAX_SEQUENTIAL; i = ((i + 1) % n), j++) {
1085     if (!v->v[i])
1086       return NULL;
1087     else if (v->v[i]->pn == pn)
1088       return v->v[i];
1089   }
1090   return NULL;
1091 }
1092 
set_add_znode_hash(VecZNode * v,ZNode * z)1093 static void set_add_znode_hash(VecZNode *v, ZNode *z) {
1094   uint i, j, n = v->n;
1095   VecZNode vv;
1096   vec_clear(&vv);
1097   if (n) {
1098     uint h = ((uintptr_t)z->pn) % n;
1099     for (i = h, j = 0; i < v->n && j < SET_MAX_SEQUENTIAL; i = ((i + 1) % n), j++) {
1100       if (!v->v[i]) {
1101         v->v[i] = z;
1102         return;
1103       }
1104     }
1105   }
1106   if (!n) {
1107     vv.v = NULL;
1108     v->i = INITIAL_SET_SIZE_INDEX;
1109   } else {
1110     vv.v = (void *)v->v;
1111     vv.n = v->n;
1112     v->i = v->i + 2;
1113   }
1114   v->n = d_prime2[v->i];
1115   v->v = MALLOC(v->n * sizeof(void *));
1116   memset(v->v, 0, v->n * sizeof(void *));
1117   if (vv.v) {
1118     set_union_znode(v, &vv);
1119     FREE(vv.v);
1120   }
1121   set_add_znode(v, z);
1122 }
1123 
set_add_znode(VecZNode * v,ZNode * z)1124 static void set_add_znode(VecZNode *v, ZNode *z) {
1125   uint i, n = v->n;
1126   VecZNode vv;
1127   vec_clear(&vv);
1128   if (n < INTEGRAL_VEC_SIZE) {
1129     vec_add(v, z);
1130     return;
1131   }
1132   if (n == INTEGRAL_VEC_SIZE) {
1133     vv = *v;
1134     vec_clear(v);
1135     for (i = 0; i < n; i++) set_add_znode_hash(v, vv.v[i]);
1136   }
1137   set_add_znode_hash(v, z);
1138 }
1139 
1140 #define GOTO_STATE(_p, _pn, _ps) ((_p)->t->goto_table[(_pn)->parse_node.symbol - (_ps)->state->goto_table_offset] - 1)
goto_PNode(Parser * p,d_loc_t * loc,PNode * pn,SNode * ps)1141 static SNode *goto_PNode(Parser *p, d_loc_t *loc, PNode *pn, SNode *ps) {
1142   SNode *new_ps, *pre_ps;
1143   ZNode *z = NULL;
1144   D_State *state;
1145   uint i, j, k, state_index;
1146 
1147   if (!IS_BIT_SET(ps->state->goto_valid, pn->parse_node.symbol)) return NULL;
1148   state_index = GOTO_STATE(p, pn, ps);
1149   state = &p->t->state[state_index];
1150   new_ps = add_SNode(p, state, loc, pn->parse_node.scope, pn->parse_node.globals);
1151   if (new_ps->last_pn) unref_pn(p, new_ps->last_pn);
1152   ref_pn(pn);
1153   new_ps->last_pn = pn;
1154 
1155   DBG(printf("goto %d (%s) -> %d %p\n", (int)(ps->state - p->t->state), p->t->symbols[pn->parse_node.symbol].name,
1156              state_index, new_ps));
1157   if (ps != new_ps && new_ps->depth < ps->depth + 1) new_ps->depth = ps->depth + 1;
1158   /* find/create ZNode */
1159   z = set_find_znode(&new_ps->zns, pn);
1160   if (!z) { /* not found */
1161     set_add_znode(&new_ps->zns, (z = new_ZNode(p, pn)));
1162     for (j = 0; j < new_ps->state->reductions.n; j++)
1163       if (new_ps->state->reductions.v[j]->nelements) add_Reduction(p, z, new_ps, new_ps->state->reductions.v[j]);
1164     if (!pn->shift)
1165       for (j = 0; j < new_ps->state->right_epsilon_hints.n; j++) {
1166         D_RightEpsilonHint *h = &new_ps->state->right_epsilon_hints.v[j];
1167         pre_ps = find_SNode(p, h->preceeding_state, new_ps->initial_scope, new_ps->initial_globals);
1168         if (!pre_ps) continue;
1169         for (k = 0; k < pre_ps->zns.n; k++)
1170           if (pre_ps->zns.v[k]) {
1171             Reduction *r = add_Reduction(p, pre_ps->zns.v[k], pre_ps, h->reduction);
1172             if (r) {
1173               r->new_snode = new_ps;
1174               r->new_depth = h->depth;
1175             }
1176           }
1177       }
1178   }
1179   for (i = 0; i < z->sns.n; i++)
1180     if (z->sns.v[i] == ps) break;
1181   if (i >= z->sns.n) { /* not found */
1182     vec_add(&z->sns, ps);
1183     if (new_ps != ps) ref_sn(ps);
1184   }
1185   return new_ps;
1186 }
1187 
parse_whitespace(D_Parser * ap,d_loc_t * loc,void ** p_globals)1188 void parse_whitespace(D_Parser *ap, d_loc_t *loc, void **p_globals) {
1189   Parser *pp = ((Parser *)ap)->whitespace_parser;
1190   (void)p_globals;
1191   pp->start = loc->s;
1192   if (!exhaustive_parse(pp, ((Parser *)ap)->t->whitespace_state)) {
1193     if (pp->accept) {
1194       uint old_col = loc->col, old_line = loc->line;
1195       *loc = pp->accept->loc;
1196       if (loc->line == 1) loc->col = old_col + loc->col;
1197       loc->line = old_line + (pp->accept->loc.line - 1);
1198       unref_sn(pp, pp->accept);
1199       pp->accept = NULL;
1200     }
1201   }
1202 }
1203 
shift_all(Parser * p,char * pos)1204 static void shift_all(Parser *p, char *pos) {
1205   uint i, j, nshifts = 0;
1206   int ncode = 0;
1207   d_loc_t loc, skip_loc;
1208   D_WhiteSpaceFn skip_fn = NULL;
1209   PNode *new_pn;
1210   D_State *state;
1211   ShiftResult *r;
1212   Shift *saved_s = p->shifts_todo, *s = saved_s, *ss;
1213 
1214   loc = s->snode->loc;
1215   skip_loc.s = NULL;
1216 
1217   for (; (s = p->shifts_todo) && s->snode->loc.s == pos;) {
1218     if (p->nshift_results - nshifts < p->t->nsymbols * 2) {
1219       p->nshift_results = nshifts + p->t->nsymbols * 3;
1220       p->shift_results = REALLOC(p->shift_results, p->nshift_results * sizeof(ShiftResult));
1221     }
1222     p->shifts_todo = p->shifts_todo->next;
1223     p->scans++;
1224     state = s->snode->state;
1225     if (state->scanner_code) {
1226       if (p->ncode_shifts < ncode + 1) {
1227         p->ncode_shifts = ncode + 2;
1228         p->code_shifts = REALLOC(p->code_shifts, p->ncode_shifts * sizeof(D_Shift));
1229       }
1230       p->code_shifts[ncode].shift_kind = D_SCAN_ALL;
1231       p->code_shifts[ncode].term_priority = 0;
1232       p->code_shifts[ncode].op_assoc = 0;
1233       p->code_shifts[ncode].action_index = 0;
1234       p->code_shifts[ncode].speculative_code = 0;
1235       p->shift_results[nshifts].loc = loc;
1236       if ((state->scanner_code(&p->shift_results[nshifts].loc, &p->code_shifts[ncode].symbol,
1237                                &p->code_shifts[ncode].term_priority, &p->code_shifts[ncode].op_assoc,
1238                                &p->code_shifts[ncode].op_priority))) {
1239         p->shift_results[nshifts].snode = s->snode;
1240         p->shift_results[nshifts++].shift = &p->code_shifts[ncode++];
1241       }
1242     }
1243     if (state->scanner_table) {
1244       uint n = scan_buffer(&loc, state, &p->shift_results[nshifts]);
1245       for (i = 0; i < n; i++) p->shift_results[nshifts + i].snode = s->snode;
1246       nshifts += n;
1247     }
1248   }
1249   for (i = 0; i < nshifts; i++) {
1250     r = &p->shift_results[i];
1251     if (!r->shift) continue;
1252     if (r->shift->shift_kind == D_SCAN_TRAILING) {
1253       uint symbol = r->shift->symbol;
1254       SNode *sn = r->snode;
1255       r->shift = 0;
1256       for (j = i + 1; j < nshifts; j++) {
1257         if (p->shift_results[j].shift && sn == p->shift_results[j].snode &&
1258             symbol == p->shift_results[j].shift->symbol) {
1259           r->shift = p->shift_results[j].shift;
1260           p->shift_results[j].shift = 0;
1261         }
1262       }
1263     }
1264     if (r->shift && r->shift->term_priority) {
1265       /* potentially n^2 but typically small */
1266       for (j = 0; j < nshifts; j++) {
1267         if (!p->shift_results[j].shift) continue;
1268         if (r->loc.s == p->shift_results[j].loc.s && j != i) {
1269           if (r->shift->term_priority < p->shift_results[j].shift->term_priority) {
1270             r->shift = 0;
1271             break;
1272           }
1273           if (r->shift->term_priority > p->shift_results[j].shift->term_priority) p->shift_results[j].shift = 0;
1274         }
1275       }
1276     }
1277   }
1278   for (i = 0; i < nshifts; i++) {
1279     r = &p->shift_results[i];
1280     if (!r->shift) continue;
1281     p->shifts++;
1282     DBG(printf("shift %d %p %d (%s)\n", (int)(r->snode->state - p->t->state), r->snode, r->shift->symbol,
1283                p->t->symbols[r->shift->symbol].name));
1284     new_pn = add_PNode(p, r->shift->symbol, &r->snode->loc, r->loc.s, r->snode->last_pn, NULL, NULL, r->shift);
1285     if (new_pn) {
1286       if (!skip_loc.s || skip_loc.s != r->loc.s || skip_fn != new_pn->parse_node.white_space) {
1287         skip_loc = r->loc;
1288         skip_fn = new_pn->parse_node.white_space;
1289         new_pn->parse_node.white_space((D_Parser *)p, &skip_loc, (void **)&new_pn->parse_node.globals);
1290         skip_loc.ws = r->loc.s;
1291         new_pn->ws_before = loc.ws;
1292         new_pn->ws_after = skip_loc.s;
1293       }
1294       goto_PNode(p, &skip_loc, new_pn, r->snode);
1295     }
1296   }
1297   for (s = saved_s; s && s->snode->loc.s == pos;) {
1298     ss = s;
1299     s = s->next;
1300     unref_sn(p, ss->snode);
1301     ss->next = p->free_shifts;
1302     p->free_shifts = ss;
1303   }
1304 }
1305 
1306 static VecZNode path1; /* static first path for speed */
1307 
new_VecZNode(VecVecZNode * paths,int n,int parent)1308 static VecZNode *new_VecZNode(VecVecZNode *paths, int n, int parent) {
1309   int i;
1310   VecZNode *pv;
1311 
1312   if (!paths->n)
1313     pv = &path1;
1314   else
1315     pv = MALLOC(sizeof *pv);
1316   vec_clear(pv);
1317   if (parent >= 0)
1318     for (i = 0; i < n; i++) vec_add(pv, paths->v[parent]->v[i]);
1319   return pv;
1320 }
1321 
build_paths_internal(ZNode * z,VecVecZNode * paths,int parent,int n,int n_to_go)1322 static void build_paths_internal(ZNode *z, VecVecZNode *paths, int parent, int n, int n_to_go) {
1323   uint j, k, l;
1324 
1325   vec_add(paths->v[parent], z);
1326   if (n_to_go <= 1) return;
1327   for (k = 0; k < z->sns.n; k++)
1328     for (j = 0, l = 0; j < z->sns.v[k]->zns.n; j++) {
1329       if (z->sns.v[k]->zns.v[j]) {
1330         if (k + l) {
1331           vec_add(paths, new_VecZNode(paths, n - (n_to_go - 1), parent));
1332           parent = paths->n - 1;
1333         }
1334         build_paths_internal(z->sns.v[k]->zns.v[j], paths, parent, n, n_to_go - 1);
1335         l++;
1336       }
1337     }
1338 }
1339 
build_paths(ZNode * z,VecVecZNode * paths,int nchildren_to_go)1340 static void build_paths(ZNode *z, VecVecZNode *paths, int nchildren_to_go) {
1341   if (!nchildren_to_go) return;
1342   vec_add(paths, new_VecZNode(paths, 0, -1));
1343   build_paths_internal(z, paths, 0, nchildren_to_go, nchildren_to_go);
1344 }
1345 
free_paths(VecVecZNode * paths)1346 static void free_paths(VecVecZNode *paths) {
1347   uint i;
1348   for (i = 0; i < paths->n; i++) {
1349     vec_free(paths->v[i]);
1350     if (paths->v[i] != &path1) FREE(paths->v[i]);
1351   }
1352   vec_free(paths);
1353 }
1354 
reduce_one(Parser * p,Reduction * r)1355 static void reduce_one(Parser *p, Reduction *r) {
1356   SNode *sn = r->snode;
1357   PNode *pn, *last_pn;
1358   ZNode *first_z;
1359   uint i, j, n = r->reduction->nelements;
1360   VecVecZNode paths;
1361   VecZNode *path;
1362 
1363   if (!r->znode) { /* epsilon reduction */
1364     if ((pn = add_PNode(p, r->reduction->symbol, &sn->loc, sn->loc.s, sn->last_pn, r->reduction, 0, 0)))
1365       goto_PNode(p, &sn->loc, pn, sn);
1366   } else {
1367     DBG(printf("reduce %d %p %d\n", (int)(r->snode->state - p->t->state), sn, n));
1368     vec_clear(&paths);
1369     build_paths(r->znode, &paths, n);
1370     for (i = 0; i < paths.n; i++) {
1371       path = paths.v[i];
1372       if (r->new_snode) { /* prune paths by new right epsilon node */
1373         for (j = 0; j < path->v[r->new_depth]->sns.n; j++)
1374           if (path->v[r->new_depth]->sns.v[j] == r->new_snode) break;
1375         if (j >= path->v[r->new_depth]->sns.n) continue;
1376       }
1377       if (check_path_priorities(path)) continue;
1378       p->reductions++;
1379       last_pn = path->v[0]->pn;
1380       first_z = path->v[n - 1];
1381       pn = add_PNode(p, r->reduction->symbol, &first_z->pn->parse_node.start_loc, sn->loc.s, last_pn, r->reduction,
1382                      path, NULL);
1383       if (pn)
1384         for (j = 0; j < first_z->sns.n; j++) goto_PNode(p, &sn->loc, pn, first_z->sns.v[j]);
1385     }
1386     free_paths(&paths);
1387   }
1388   unref_sn(p, sn);
1389   r->next = p->free_reductions;
1390   p->free_reductions = r;
1391 }
1392 
VecSNode_equal(VecSNode * vsn1,VecSNode * vsn2)1393 static int VecSNode_equal(VecSNode *vsn1, VecSNode *vsn2) {
1394   uint i, j;
1395   if (vsn1->n != vsn2->n) return 0;
1396   for (i = 0; i < vsn1->n; i++) {
1397     for (j = 0; j < vsn2->n; j++) {
1398       if (vsn1->v[i] == vsn2->v[j]) break;
1399     }
1400     if (j >= vsn2->n) return 0;
1401   }
1402   return 1;
1403 }
1404 
binary_op_ZNode(SNode * sn)1405 static ZNode *binary_op_ZNode(SNode *sn) {
1406   ZNode *z;
1407   if (sn->zns.n != 1) return NULL;
1408   z = sn->zns.v[0];
1409   if (z->pn->op_assoc == ASSOC_UNARY_RIGHT) {
1410     if (z->sns.n != 1) return NULL;
1411     sn = z->sns.v[0];
1412     if (sn->zns.n != 1) return NULL;
1413     z = sn->zns.v[0];
1414   }
1415   if (!IS_BINARY_ASSOC(z->pn->op_assoc)) return NULL;
1416   return z;
1417 }
1418 
1419 #ifdef D_DEBUG
1420 
1421 static const char *spaces =
1422     "                                                                                                  ";
print_stack(Parser * p,SNode * s,int indent)1423 static void print_stack(Parser *p, SNode *s, int indent) {
1424   uint i, j;
1425 
1426   printf("%d", (int)(s->state - p->t->state));
1427   indent += 2;
1428   for (i = 0; i < s->zns.n; i++) {
1429     if (!s->zns.v[i]) continue;
1430     if (s->zns.n > 1) printf("\n%s[", &spaces[99 - indent]);
1431     printf("(%s:", p->t->symbols[s->zns.v[i]->pn->parse_node.symbol].name);
1432     print_paren(p, s->zns.v[i]->pn);
1433     printf(")");
1434     for (j = 0; j < s->zns.v[i]->sns.n; j++) {
1435       if (s->zns.v[i]->sns.n > 1) printf("\n%s[", &spaces[98 - indent]);
1436       print_stack(p, s->zns.v[i]->sns.v[j], indent);
1437       if (s->zns.v[i]->sns.n > 1) printf("]");
1438     }
1439     if (s->zns.n > 1) printf("]");
1440   }
1441 }
1442 #endif
1443 
1444 /* compare two stacks with operators on top of identical substacks
1445    eliminating the stack with the lower priority binary operator
1446    - used to eliminate unnecessary stacks created by the
1447      (empty) application binary operator
1448 */
cmp_stacks(Parser * p)1449 static void cmp_stacks(Parser *p) {
1450   char *pos;
1451   Shift *a, *b, **al, **bl;
1452   ZNode *az, *bz;
1453 
1454   pos = p->shifts_todo->snode->loc.s;
1455   DBG({
1456     uint i = 0;
1457     for (al = &p->shifts_todo, a = *al; a && a->snode->loc.s == pos; al = &a->next, a = a->next) {
1458       if (++i < 2) printf("\n");
1459       print_stack(p, a->snode, 0);
1460       printf("\n");
1461     }
1462   });
1463   for (al = &p->shifts_todo, a = *al; a && a->snode->loc.s == pos; al = &a->next, a = a->next) {
1464     if (!(az = binary_op_ZNode(a->snode))) continue;
1465     for (bl = &a->next, b = a->next; b && b->snode->loc.s == pos; bl = &b->next, b = b->next) {
1466       if (!(bz = binary_op_ZNode(b->snode))) continue;
1467       if (!VecSNode_equal(&az->sns, &bz->sns)) continue;
1468       if ((a->snode->state->reduces_to != b->snode->state - p->t->state) &&
1469           (b->snode->state->reduces_to != a->snode->state - p->t->state))
1470         continue;
1471       if (az->pn->op_priority > bz->pn->op_priority) {
1472         DBG(printf("DELETE "); print_stack(p, b->snode, 0); printf("\n"));
1473         *bl = b->next;
1474         unref_sn(p, b->snode);
1475         FREE(b);
1476         b = *bl;
1477         break;
1478       }
1479       if (az->pn->op_priority < bz->pn->op_priority) {
1480         DBG(printf("DELETE "); print_stack(p, a->snode, 0); printf("\n"));
1481         *al = a->next;
1482         unref_sn(p, a->snode);
1483         FREE(a);
1484         a = *al;
1485         goto Lbreak2;
1486       }
1487     }
1488   Lbreak2:;
1489   }
1490 }
1491 
free_ParseTreeBelow(Parser * p,PNode * pn)1492 static void free_ParseTreeBelow(Parser *p, PNode *pn) {
1493   uint i;
1494   PNode *amb;
1495 
1496   for (i = 0; i < pn->children.n; i++) unref_pn(p, pn->children.v[i]);
1497   vec_free(&pn->children);
1498   if ((amb = pn->ambiguities)) {
1499     pn->ambiguities = NULL;
1500     free_PNode(p, amb);
1501   }
1502 }
1503 
free_D_ParseTreeBelow(D_Parser * p,D_ParseNode * dpn)1504 void free_D_ParseTreeBelow(D_Parser *p, D_ParseNode *dpn) { free_ParseTreeBelow((Parser *)p, DPN_TO_PN(dpn)); }
1505 
ambiguity_count_fn(D_Parser * pp,int n,D_ParseNode ** v)1506 D_ParseNode *ambiguity_count_fn(D_Parser *pp, int n, D_ParseNode **v) {
1507   Parser *p = (Parser *)pp;
1508   p->ambiguities += n - 1;
1509   return v[0];
1510 }
1511 
ambiguity_abort_fn(D_Parser * pp,int n,D_ParseNode ** v)1512 D_ParseNode *ambiguity_abort_fn(D_Parser *pp, int n, D_ParseNode **v) {
1513   int i;
1514   if (d_verbose_level) {
1515     for (i = 0; i < n; i++) {
1516       print_paren((Parser *)pp, D_ParseNode_to_PNode(v[i]));
1517       printf("\n");
1518     }
1519   }
1520   d_fail("unresolved ambiguity line %d file %s", v[0]->start_loc.line, v[0]->start_loc.pathname);
1521   return v[0];
1522 }
1523 
final_actionless(PNode * pn)1524 static int final_actionless(PNode *pn) {
1525   uint i;
1526   if (pn->reduction && pn->reduction->final_code) return 0;
1527   for (i = 0; i < pn->children.n; i++)
1528     if (!final_actionless(pn->children.v[i])) return 0;
1529   return 1;
1530 }
1531 
resolve_ambiguities(Parser * p,PNode * pn)1532 static PNode *resolve_ambiguities(Parser *p, PNode *pn) {
1533   PNode *amb;
1534   D_ParseNode *res;
1535   uint efa;
1536   Vec(D_ParseNode *) pns;
1537 
1538   vec_clear(&pns);
1539   efa = is_epsilon_PNode(pn) && final_actionless(pn);
1540   vec_add(&pns, &pn->parse_node);
1541   for (amb = pn->ambiguities; amb; amb = amb->ambiguities) {
1542     uint i, found = 0;
1543     LATEST(p, amb);
1544     if (!p->user.dont_merge_epsilon_trees)
1545       if (efa && is_epsilon_PNode(amb) && final_actionless(amb)) continue;
1546     for (i = 0; i < pns.n; i++)
1547       if (pns.v[i] == &amb->parse_node) found = 1;
1548     if (!found) vec_add(&pns, &amb->parse_node);
1549   }
1550   if (pns.n == 1) {
1551     res = pns.v[0];
1552     goto Ldone;
1553   }
1554   res = p->user.ambiguity_fn((D_Parser *)p, pns.n, pns.v);
1555 Ldone:
1556   vec_free(&pns);
1557   return D_ParseNode_to_PNode(res);
1558 }
1559 
fixup_internal_symbol(Parser * p,PNode * pn,int ichild)1560 static void fixup_internal_symbol(Parser *p, PNode *pn, int ichild) {
1561   PNode *child = pn->children.v[ichild];
1562   int j, n, pnn;
1563   n = child->children.n, pnn = pn->children.n;
1564   if (pn == child) d_fail("circular parse: unable to fixup internal symbol");
1565   if (n == 0) {
1566     for (j = ichild; j < pnn - 1; j++) pn->children.v[j] = pn->children.v[j + 1];
1567     pn->children.n--;
1568   } else if (n == 1) {
1569     ref_pn(child->children.v[0]);
1570     pn->children.v[ichild] = child->children.v[0];
1571   } else {
1572     for (j = 0; j < n - 1; j++) /* expand children vector */
1573       vec_add(&pn->children, NULL);
1574     for (j = pnn - 1; j >= ichild + 1; j--) /* move to new places */
1575       pn->children.v[j - 1 + n] = pn->children.v[j];
1576     for (j = 0; j < n; j++) {
1577       ref_pn(child->children.v[j]);
1578       pn->children.v[ichild + j] = child->children.v[j];
1579     }
1580   }
1581   unref_pn(p, child);
1582 }
1583 
1584 #define is_symbol_internal_or_EBNF(_p, _pn)                                \
1585   ((_p)->t->symbols[(_pn)->parse_node.symbol].kind == D_SYMBOL_INTERNAL || \
1586    (_p)->t->symbols[(_pn)->parse_node.symbol].kind == D_SYMBOL_EBNF)
1587 #define is_symbol_internal(_p, _pn) ((_p)->t->symbols[(_pn)->parse_node.symbol].kind == D_SYMBOL_INTERNAL)
1588 #define is_unreduced_epsilon_PNode(_pn) (is_epsilon_PNode(_pn) && ((_pn)->reduction && (_pn)->reduction->final_code))
1589 
commit_tree(Parser * p,PNode * pn)1590 static PNode *commit_tree(Parser *p, PNode *pn) {
1591   uint i, fixup_ebnf = 0, fixup = 0, internal = 0;
1592   LATEST(p, pn);
1593   if (pn->evaluated) return pn;
1594   if (!is_unreduced_epsilon_PNode(pn)) pn->evaluated = 1;
1595   if (pn->ambiguities) pn = resolve_ambiguities(p, pn);
1596   fixup_ebnf = p->user.fixup_EBNF_productions;
1597   internal = is_symbol_internal_or_EBNF(p, pn);
1598   fixup = !p->user.dont_fixup_internal_productions && internal;
1599   for (i = 0; i < pn->children.n; i++) {
1600     PNode *tpn = commit_tree(p, pn->children.v[i]);
1601     if (tpn != pn->children.v[i]) {
1602       ref_pn(tpn);
1603       unref_pn(p, pn->children.v[i]);
1604       pn->children.v[i] = tpn;
1605     }
1606     if (fixup &&
1607         (fixup_ebnf ? is_symbol_internal_or_EBNF(p, pn->children.v[i]) : is_symbol_internal(p, pn->children.v[i]))) {
1608       fixup_internal_symbol(p, pn, i);
1609       i -= 1;
1610       continue;
1611     }
1612   }
1613   if (pn->reduction) DBG(printf("commit %p (%s)\n", pn, p->t->symbols[pn->parse_node.symbol].name));
1614   if (pn->reduction && pn->reduction->final_code)
1615     pn->reduction->final_code(pn, (void **)&pn->children.v[0], pn->children.n,
1616                               (intptr_t) & ((PNode *)(NULL))->parse_node, (D_Parser *)p);
1617   if (pn->evaluated) {
1618     if (!p->user.save_parse_tree && !internal) free_ParseTreeBelow(p, pn);
1619   }
1620   return pn;
1621 }
1622 
commit_stack(Parser * p,SNode * sn)1623 static int commit_stack(Parser *p, SNode *sn) {
1624   int res = 0;
1625   PNode *tpn;
1626 
1627   if (sn->zns.n != 1) return -1;
1628   if (sn->zns.v[0]->sns.n > 1) return -2;
1629   if (is_unreduced_epsilon_PNode(sn->zns.v[0]->pn)) /* wait till reduced */
1630     return -3;
1631   if (sn->zns.v[0]->sns.n)
1632     if ((res = commit_stack(p, sn->zns.v[0]->sns.v[0])) < 0) return res;
1633   tpn = commit_tree(p, sn->zns.v[0]->pn);
1634   if (tpn != sn->zns.v[0]->pn) {
1635     ref_pn(tpn);
1636     unref_pn(p, sn->zns.v[0]->pn);
1637     sn->zns.v[0]->pn = tpn;
1638   }
1639   return res;
1640 }
1641 
find_substr(const char * str,const char * s)1642 static const char *find_substr(const char *str, const char *s) {
1643   uint len = strlen(s);
1644   if (len == 1) {
1645     while (*str && *str != *s) str++;
1646     if (*str == *s) return str + 1;
1647   } else
1648     while (*str) {
1649       if (!strncmp(s, str, len)) return str + len;
1650       str++;
1651     }
1652   return NULL;
1653 }
1654 
syntax_error_report_fn(struct D_Parser * ap)1655 static void syntax_error_report_fn(struct D_Parser *ap) {
1656   Parser *p = (Parser *)ap;
1657   char *fn = d_dup_pathname_str(p->user.loc.pathname);
1658   char *after = 0;
1659   ZNode *z = p->snode_hash.last_all ? p->snode_hash.last_all->zns.v[0] : 0;
1660   while (z && z->pn->parse_node.start_loc.s == z->pn->parse_node.end)
1661     z = (z->sns.v && z->sns.v[0]->zns.v) ? z->sns.v[0]->zns.v[0] : 0;
1662   if (z && z->pn->parse_node.start_loc.s != z->pn->parse_node.end)
1663     after = dup_str(z->pn->parse_node.start_loc.s, z->pn->parse_node.end);
1664   if (after)
1665     fprintf(stderr, "%s:%d: syntax error after '%s'\n", fn, p->user.loc.line, after);
1666   else
1667     fprintf(stderr, "%s:%d: syntax error\n", fn, p->user.loc.line);
1668   if (after) FREE(after);
1669   FREE(fn);
1670 }
1671 
update_line(const char * s,const char * e,int * line)1672 static void update_line(const char *s, const char *e, int *line) {
1673   for (; s < e; s++)
1674     if (*s == '\n') (*line)++;
1675 }
1676 
error_recovery(Parser * p)1677 static int error_recovery(Parser *p) {
1678   SNode *sn, *best_sn = NULL;
1679   const char *best_s = NULL, *ss, *s;
1680   uint i, j, head = 0, tail = 0, res = 1;
1681   D_ErrorRecoveryHint *best_er = NULL;
1682   SNode **q = 0;
1683   PNode *best_pn = NULL;
1684 
1685   if (!p->snode_hash.last_all) return res;
1686   p->user.loc = p->snode_hash.last_all->loc;
1687   if (!p->user.error_recovery) return res;
1688   q = MALLOC(ERROR_RECOVERY_QUEUE_SIZE * sizeof(SNode *));
1689   if (p->user.loc.line > p->last_syntax_error_line) {
1690     p->last_syntax_error_line = p->user.loc.line;
1691     p->user.syntax_errors++;
1692     p->user.syntax_error_fn((D_Parser *)p);
1693   }
1694   for (sn = p->snode_hash.last_all; sn; sn = sn->all_next) {
1695     if (tail < ERROR_RECOVERY_QUEUE_SIZE - 1) q[tail++] = sn;
1696   }
1697   s = p->snode_hash.last_all->loc.s;
1698   while (tail > head) {
1699     sn = q[head++];
1700     if (sn->state->error_recovery_hints.n) {
1701       for (i = 0; i < sn->state->error_recovery_hints.n; i++) {
1702         D_ErrorRecoveryHint *er = &sn->state->error_recovery_hints.v[i];
1703         if ((ss = find_substr(s, er->string))) {
1704           if (!best_sn || ss < best_s ||
1705               (best_sn && ss == best_s &&
1706                (best_sn->depth < sn->depth || (best_sn->depth == sn->depth && best_er->depth < er->depth)))) {
1707             best_sn = sn;
1708             best_s = ss;
1709             best_er = er;
1710           }
1711         }
1712       }
1713     }
1714     for (i = 0; i < sn->zns.n; i++)
1715       if (sn->zns.v[i])
1716         for (j = 0; j < sn->zns.v[i]->sns.n; j++) {
1717           if (tail < ERROR_RECOVERY_QUEUE_SIZE - 1) q[tail++] = sn->zns.v[i]->sns.v[j];
1718         }
1719   }
1720   if (best_sn) {
1721     D_Reduction *rr = MALLOC(sizeof(*rr));
1722     Reduction *r = MALLOC(sizeof(*r));
1723     d_loc_t best_loc = p->user.loc;
1724     PNode *new_pn;
1725     SNode *new_sn;
1726     ZNode *z;
1727 
1728     memset(rr, 0, sizeof(*rr));
1729     vec_add(&p->error_reductions, rr);
1730     rr->nelements = best_er->depth + 1;
1731     rr->symbol = best_er->symbol;
1732     update_line(best_loc.s, best_s, &best_loc.line);
1733     best_loc.s = (char *)best_s;
1734     for (i = 0; i < best_sn->zns.n; i++)
1735       if (best_sn->zns.v[i]) {
1736         best_pn = best_sn->zns.v[i]->pn;
1737         break;
1738       }
1739     best_pn->parse_node.white_space((D_Parser *)p, &best_loc, (void **)&best_pn->parse_node.globals);
1740     new_pn = add_PNode(p, 0, &p->user.loc, best_loc.s, best_pn, 0, 0, 0);
1741     new_sn = new_SNode(p, best_sn->state, &best_loc, new_pn->initial_scope, new_pn->initial_globals);
1742     ref_pn(new_pn);
1743     new_sn->last_pn = new_pn;
1744     set_add_znode(&new_sn->zns, (z = new_ZNode(p, new_pn)));
1745     vec_add(&z->sns, best_sn);
1746     ref_sn(best_sn);
1747     r->znode = z;
1748     ref_sn(new_sn);
1749     r->snode = new_sn;
1750     r->reduction = rr;
1751     r->new_snode = NULL;
1752     r->next = NULL;
1753     free_old_nodes(p);
1754     free_old_nodes(p);
1755     reduce_one(p, r);
1756     for (i = 0; i < p->snode_hash.m; i++)
1757       for (sn = p->snode_hash.v[i]; sn; sn = sn->bucket_next)
1758         for (j = 0; j < sn->zns.n; j++)
1759           if ((z = sn->zns.v[j]))
1760             if (z->pn->reduction == rr) {
1761               z->pn->evaluated = 1;
1762               z->pn->error_recovery = 1;
1763             }
1764     if (p->shifts_todo || p->reductions_todo) res = 0;
1765   }
1766   FREE(q);
1767   return res;
1768 }
1769 
1770 #define PASS_CODE_FOUND(_p, _pn) \
1771   ((_pn)->reduction && (uint)(_pn)->reduction->npass_code > (_p)->index && (_pn)->reduction->pass_code[(_p)->index])
1772 
pass_call(Parser * p,D_Pass * pp,PNode * pn)1773 static void pass_call(Parser *p, D_Pass *pp, PNode *pn) {
1774   if (PASS_CODE_FOUND(pp, pn))
1775     pn->reduction->pass_code[pp->index](pn, (void **)&pn->children.v[0], pn->children.n,
1776                                         (intptr_t) & ((PNode *)(NULL))->parse_node, (D_Parser *)p);
1777 }
1778 
pass_preorder(Parser * p,D_Pass * pp,PNode * pn)1779 static void pass_preorder(Parser *p, D_Pass *pp, PNode *pn) {
1780   uint found = PASS_CODE_FOUND(pp, pn), i;
1781   pass_call(p, pp, pn);
1782   if ((pp->kind & D_PASS_FOR_ALL) || ((pp->kind & D_PASS_FOR_UNDEFINED) && !found))
1783     for (i = 0; i < pn->children.n; i++) pass_preorder(p, pp, pn->children.v[i]);
1784 }
1785 
pass_postorder(Parser * p,D_Pass * pp,PNode * pn)1786 static void pass_postorder(Parser *p, D_Pass *pp, PNode *pn) {
1787   uint found = PASS_CODE_FOUND(pp, pn), i;
1788   if ((pp->kind & D_PASS_FOR_ALL) || ((pp->kind & D_PASS_FOR_UNDEFINED) && !found))
1789     for (i = 0; i < pn->children.n; i++) pass_postorder(p, pp, pn->children.v[i]);
1790   pass_call(p, pp, pn);
1791 }
1792 
d_pass(D_Parser * ap,D_ParseNode * apn,int pass_number)1793 void d_pass(D_Parser *ap, D_ParseNode *apn, int pass_number) {
1794   PNode *pn = D_ParseNode_to_PNode(apn);
1795   Parser *p = (Parser *)ap;
1796   D_Pass *pp;
1797 
1798   if (pass_number >= (int)p->t->npasses) d_fail("bad pass number: %d\n", pass_number);
1799   pp = &p->t->passes[pass_number];
1800   if (pp->kind & D_PASS_MANUAL)
1801     pass_call(p, pp, pn);
1802   else if (pp->kind & D_PASS_PRE_ORDER)
1803     pass_preorder(p, pp, pn);
1804   else if (pp->kind & D_PASS_POST_ORDER)
1805     pass_postorder(p, pp, pn);
1806 }
1807 
exhaustive_parse(Parser * p,int state)1808 static int exhaustive_parse(Parser *p, int state) {
1809   Reduction *r;
1810   char *pos, *epos = NULL;
1811   PNode *pn, tpn;
1812   SNode *sn;
1813   ZNode *z;
1814   int progress = 0, ready = 0;
1815   d_loc_t loc;
1816 
1817   pos = p->user.loc.ws = p->user.loc.s = p->start;
1818   loc = p->user.loc;
1819   p->user.initial_white_space_fn((D_Parser *)p, &loc, &p->user.initial_globals);
1820   /* initial state */
1821   sn = add_SNode(p, &p->t->state[state], &loc, p->top_scope, p->user.initial_globals);
1822   memset(&tpn, 0, sizeof(tpn));
1823   tpn.parse_node.white_space = p->user.initial_white_space_fn;
1824   tpn.parse_node.globals = p->user.initial_globals;
1825   tpn.initial_scope = tpn.parse_node.scope = p->top_scope;
1826   tpn.parse_node.end = loc.s;
1827   if (sn->last_pn) unref_pn(p, sn->last_pn);
1828   pn = add_PNode(p, 0, &loc, loc.s, &tpn, 0, 0, 0);
1829   ref_pn(pn);
1830   sn->last_pn = pn;
1831   set_add_znode(&sn->zns, (z = new_ZNode(p, pn)));
1832   while (1) {
1833     /* reduce all */
1834     while (p->reductions_todo) {
1835       pos = p->reductions_todo->snode->loc.s;
1836       if (p->shifts_todo && p->shifts_todo->snode->loc.s < pos) break;
1837       if (pos > epos) {
1838         epos = pos;
1839         free_old_nodes(p);
1840       }
1841       for (; (r = p->reductions_todo) && r->snode->loc.s == pos;) {
1842         p->reductions_todo = p->reductions_todo->next;
1843         reduce_one(p, r);
1844       }
1845     }
1846     /* done? */
1847     if (!p->shifts_todo) {
1848       if (p->accept && (p->accept->loc.s == p->end || p->user.partial_parses))
1849         return 0;
1850       else {
1851         if (error_recovery(p)) return 1;
1852         continue;
1853       }
1854     } else if (!p->user.dont_compare_stacks && p->shifts_todo->next)
1855       cmp_stacks(p);
1856     /* shift all */
1857     pos = p->shifts_todo->snode->loc.s;
1858     if (pos > epos) {
1859       epos = pos;
1860       free_old_nodes(p);
1861     }
1862     progress++;
1863     ready = progress > p->user.commit_actions_interval;
1864     if (ready && !p->shifts_todo->next && !p->reductions_todo) {
1865       commit_stack(p, p->shifts_todo->snode);
1866       ready = progress = 0;
1867     }
1868     shift_all(p, pos);
1869     if (ready && p->reductions_todo && !p->reductions_todo->next) {
1870       commit_stack(p, p->reductions_todo->snode);
1871       progress = 0;
1872     }
1873   }
1874 }
1875 
1876 /* doesn't include nl */
1877 char _wspace[256] = {
1878     0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0,
1879     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 /* zero padded */
1880 };
1881 
1882 #define wspace(_x) (_wspace[(unsigned char)_x])
1883 
white_space(D_Parser * p,d_loc_t * loc,void ** p_user_globals)1884 static void white_space(D_Parser *p, d_loc_t *loc, void **p_user_globals) {
1885   uint rec = 0;
1886   char *s = loc->s, *scol = 0;
1887   (void)p;
1888   (void)p_user_globals;
1889 
1890   if (*s == '#' && loc->col == 0) {
1891   Ldirective : {
1892     char *save = s;
1893     s++;
1894     while (wspace(*s)) s++;
1895     if (!strncmp("line", s, 4)) {
1896       if (wspace(s[4])) {
1897         s += 5;
1898         while (wspace(*s)) s++;
1899       }
1900     }
1901     if (isdigit_(*s)) {
1902       loc->line = atoi(s) - 1;
1903       while (isdigit_(*s)) s++;
1904       while (wspace(*s)) s++;
1905       if (*s == '"') loc->pathname = s;
1906     } else {
1907       s = save;
1908       goto Ldone;
1909     }
1910   }
1911     while (*s && *s != '\n') s++;
1912   }
1913 Lmore:
1914   while (wspace(*s)) s++;
1915   if (*s == '\n') {
1916     loc->line++;
1917     scol = s + 1;
1918     s++;
1919     if (*s == '#')
1920       goto Ldirective;
1921     else
1922       goto Lmore;
1923   }
1924   if (s[0] == '/') {
1925     if (s[1] == '/') {
1926       while (*s && *s != '\n') {
1927         s++;
1928       }
1929       goto Lmore;
1930     }
1931     if (s[1] == '*') {
1932       s += 2;
1933     LnestComment:
1934       rec++;
1935     LmoreComment:
1936       while (*s) {
1937         if (s[0] == '*' && s[1] == '/') {
1938           s += 2;
1939           rec--;
1940           if (!rec) goto Lmore;
1941           goto LmoreComment;
1942         }
1943         if (s[0] == '/' && s[1] == '*') {
1944           s += 2;
1945           goto LnestComment;
1946         }
1947         if (*s == '\n') {
1948           loc->line++;
1949           scol = s + 1;
1950         }
1951         s++;
1952       }
1953     }
1954   }
1955 Ldone:
1956   if (scol)
1957     loc->col = s - scol;
1958   else
1959     loc->col += s - loc->s;
1960   loc->s = s;
1961   return;
1962 }
1963 
null_white_space(D_Parser * p,d_loc_t * loc,void ** p_globals)1964 void null_white_space(D_Parser *p, d_loc_t *loc, void **p_globals) {
1965   (void)p;
1966   (void)loc;
1967   (void)p_globals;
1968 }
1969 
new_D_Parser(D_ParserTables * t,int sizeof_ParseNode_User)1970 D_Parser *new_D_Parser(D_ParserTables *t, int sizeof_ParseNode_User) {
1971   Parser *p = MALLOC(sizeof(Parser));
1972   memset(p, 0, sizeof(Parser));
1973   p->t = t;
1974   p->user.loc.line = 1;
1975   p->user.sizeof_user_parse_node = sizeof_ParseNode_User;
1976   p->user.commit_actions_interval = DEFAULT_COMMIT_ACTIONS_INTERVAL;
1977   p->user.syntax_error_fn = syntax_error_report_fn;
1978   p->user.ambiguity_fn = ambiguity_abort_fn;
1979   p->user.error_recovery = 1;
1980   p->user.save_parse_tree = t->save_parse_tree;
1981   if (p->t->default_white_space)
1982     p->user.initial_white_space_fn = p->t->default_white_space;
1983   else if (p->t->whitespace_state)
1984     p->user.initial_white_space_fn = parse_whitespace;
1985   else
1986     p->user.initial_white_space_fn = white_space;
1987   return (D_Parser *)p;
1988 }
1989 
free_D_Parser(D_Parser * ap)1990 void free_D_Parser(D_Parser *ap) {
1991   Parser *p = (Parser *)ap;
1992   if (p->top_scope && !p->user.initial_scope) free_D_Scope(p->top_scope, 0);
1993   if (p->whitespace_parser) free_D_Parser((D_Parser *)p->whitespace_parser);
1994   FREE(ap);
1995 }
1996 
free_D_ParseNode(D_Parser * p,D_ParseNode * dpn)1997 void free_D_ParseNode(D_Parser *p, D_ParseNode *dpn) {
1998   if (dpn != NO_DPN) {
1999     unref_pn((Parser *)p, DPN_TO_PN(dpn));
2000     free_parser_working_data((Parser *)p);
2001   }
2002 #ifdef TRACK_PNODES
2003   if (((Parser *)p)->xall) printf("tracked pnodes\n");
2004 #endif
2005 }
2006 
copy_user_configurables(Parser * pp,Parser * p)2007 static void copy_user_configurables(Parser *pp, Parser *p) {
2008   memcpy(((char *)&pp->user.start_state) + sizeof(pp->user.start_state),
2009          ((char *)&p->user.start_state) + sizeof(p->user.start_state),
2010          ((char *)&pp->user.syntax_errors - (char *)&pp->user.start_state));
2011 }
2012 
new_subparser(Parser * p)2013 Parser *new_subparser(Parser *p) {
2014   Parser *pp = (Parser *)new_D_Parser(p->t, p->user.sizeof_user_parse_node);
2015   copy_user_configurables(pp, p);
2016   pp->end = p->end;
2017   pp->pinterface1 = p->pinterface1;
2018   alloc_parser_working_data(pp);
2019   return pp;
2020 }
2021 
initialize_whitespace_parser(Parser * p)2022 static void initialize_whitespace_parser(Parser *p) {
2023   if (p->t->whitespace_state) {
2024     p->whitespace_parser = new_subparser(p);
2025     p->whitespace_parser->user.initial_white_space_fn = null_white_space;
2026     p->whitespace_parser->user.error_recovery = 0;
2027     p->whitespace_parser->user.partial_parses = 1;
2028     p->whitespace_parser->user.free_node_fn = p->user.free_node_fn;
2029   }
2030 }
2031 
free_whitespace_parser(Parser * p)2032 static void free_whitespace_parser(Parser *p) {
2033   if (p->whitespace_parser) {
2034     free_D_Parser((D_Parser *)p->whitespace_parser);
2035     p->whitespace_parser = 0;
2036   }
2037 }
2038 
handle_top_level_ambiguities(Parser * p,SNode * sn)2039 static PNode *handle_top_level_ambiguities(Parser *p, SNode *sn) {
2040   uint i;
2041   ZNode *z = 0;
2042   PNode *pn = NULL, *last = NULL, *x;
2043   for (i = 0; i < sn->zns.n; i++) {
2044     if (sn->zns.v[i]) {
2045       x = sn->zns.v[i]->pn;
2046       LATEST(p, x);
2047       if (!pn) {
2048         z = sn->zns.v[i];
2049         pn = x;
2050       } else {
2051         if (x != pn && !x->ambiguities && x != last) {
2052           x->ambiguities = pn->ambiguities;
2053           ref_pn(x);
2054           pn->ambiguities = x;
2055           if (!last) last = x;
2056         }
2057         free_ZNode(p, sn->zns.v[i], sn);
2058       }
2059     }
2060   }
2061   sn->zns.v[0] = z;
2062   sn->zns.n = 1;
2063   sn->zns.i = 0;
2064   return pn;
2065 }
2066 
dparse(D_Parser * ap,char * buf,int buf_len)2067 D_ParseNode *dparse(D_Parser *ap, char *buf, int buf_len) {
2068   uint r;
2069   Parser *p = (Parser *)ap;
2070   SNode *sn;
2071   PNode *pn;
2072   D_ParseNode *res = NULL;
2073 
2074   p->states = p->scans = p->shifts = p->reductions = p->compares = 0;
2075   p->start = buf;
2076   p->end = buf + buf_len;
2077 
2078   initialize_whitespace_parser(p);
2079   alloc_parser_working_data(p);
2080   if (p->user.initial_scope)
2081     p->top_scope = p->user.initial_scope;
2082   else {
2083     if (p->top_scope) free_D_Scope(p->top_scope, 0);
2084     p->top_scope = new_D_Scope(NULL);
2085     p->top_scope->kind = D_SCOPE_SEQUENTIAL;
2086   }
2087   r = exhaustive_parse(p, p->user.start_state);
2088   if (!r) {
2089     sn = p->accept;
2090     if (sn->zns.n != 1)
2091       pn = handle_top_level_ambiguities(p, sn);
2092     else
2093       pn = sn->zns.v[0]->pn;
2094     pn = commit_tree(p, pn);
2095     if (d_verbose_level) {
2096       printf(
2097           "%d states %d scans %d shifts %d reductions "
2098           "%d compares %d ambiguities\n",
2099           p->states, p->scans, p->shifts, p->reductions, p->compares, p->ambiguities);
2100       if (p->user.save_parse_tree) {
2101         if (d_verbose_level > 1)
2102           xprint_paren(p, pn);
2103         else
2104           print_paren(p, pn);
2105         printf("\n");
2106       }
2107     }
2108     if (p->user.save_parse_tree) {
2109       ref_pn(pn);
2110       res = &pn->parse_node;
2111     } else
2112       res = NO_DPN;
2113     unref_sn(p, p->accept);
2114     p->accept = NULL;
2115   } else
2116     p->accept = NULL;
2117   free_parser_working_data(p);
2118   free_whitespace_parser(p);
2119   return res;
2120 }
2121