1 /*
2  * This file is part of DGD, https://github.com/dworkin/dgd
3  * Copyright (C) 1993-2010 Dworkin B.V.
4  * Copyright (C) 2010,2012 DGD Authors (see the commit log for details)
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Affero General Public License as
8  * published by the Free Software Foundation, either version 3 of the
9  * License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU Affero General Public License for more details.
15  *
16  * You should have received a copy of the GNU Affero General Public License
17  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 # include "dgd.h"
21 # include "srp.h"
22 
23 typedef struct _item_ {
24     char *ref;			/* pointer to rule in grammar */
25     unsigned short ruleno;	/* rule number */
26     unsigned short offset;	/* offset in rule */
27     struct _item_ *next;	/* next in linked list */
28 } item;
29 
30 # define ITCHUNKSZ	32
31 
32 typedef struct _itchunk_ {
33     int chunksz;		/* size of this chunk */
34     item *flist;		/* list of free items */
35     struct _itchunk_ *next;	/* next in linked list */
36     item it[ITCHUNKSZ];		/* chunk of items */
37 } itchunk;
38 
39 /*
40  * NAME:	item->new()
41  * DESCRIPTION:	create a new item
42  */
it_new(itchunk ** c,char * ref,unsigned short ruleno,unsigned short offset,item * next)43 static item *it_new(itchunk **c, char *ref, unsigned short ruleno, unsigned short offset, item *next)
44 {
45     item *it;
46 
47     if (*c == (itchunk *) NULL ||
48 	((*c)->flist == (item *) NULL && (*c)->chunksz == ITCHUNKSZ)) {
49 	itchunk *x;
50 
51 	x = ALLOC(itchunk, 1);
52 	x->flist = (*c != (itchunk *) NULL) ? (*c)->flist : (item *) NULL;
53 	x->next = *c;
54 	*c = x;
55 	x->chunksz = 0;
56     }
57     if ((*c)->flist != (item *) NULL) {
58 	it = (*c)->flist;
59 	(*c)->flist = it->next;
60     } else {
61 	it = &(*c)->it[(*c)->chunksz++];
62     }
63 
64     it->ref = ref;
65     it->ruleno = ruleno;
66     it->offset = offset;
67     it->next = next;
68 
69     return it;
70 }
71 
72 /*
73  * NAME:	item->del()
74  * DESCRIPTION:	delete an item
75  */
it_del(itchunk * c,item * it)76 static item *it_del(itchunk *c, item *it)
77 {
78     item *next;
79 
80     next = it->next;
81     it->next = c->flist;
82     c->flist = it;
83     return next;
84 }
85 
86 /*
87  * NAME:	item->clear()
88  * DESCRIPTION:	free all items in memory
89  */
it_clear(itchunk * c)90 static void it_clear(itchunk *c)
91 {
92     itchunk *f;
93 
94     while (c != (itchunk *) NULL) {
95 	f = c;
96 	c = c->next;
97 	FREE(f);
98     }
99 }
100 
101 /*
102  * NAME:	item->add()
103  * DESCRIPTION:	add an item to a set
104  */
it_add(itchunk ** c,item ** ri,char * ref,unsigned short ruleno,unsigned short offset,bool sort)105 static void it_add(itchunk **c, item **ri, char *ref, unsigned short ruleno, unsigned short offset, bool sort)
106 {
107     /*
108      * add item to set
109      */
110     if (offset == UCHAR(ref[0]) << 1) {
111 	offset = UCHAR(ref[1]);	/* skip possible function at the end */
112     }
113 
114     if (sort) {
115 	while (*ri != (item *) NULL &&
116 	       ((*ri)->ref < ref ||
117 		((*ri)->ref == ref && (*ri)->offset <= offset))) {
118 	    if ((*ri)->ref == ref && (*ri)->offset == offset) {
119 		return;	/* already in set */
120 	    }
121 	    ri = &(*ri)->next;
122 	}
123     } else {
124 	while (*ri != (item *) NULL) {
125 	    if ((*ri)->ref == ref && (*ri)->offset == offset) {
126 		return;	/* already in set */
127 	    }
128 	    ri = &(*ri)->next;
129 	}
130     }
131 
132     *ri = it_new(c, ref, ruleno, offset, *ri);
133 }
134 
135 /*
136  * NAME:	item->load()
137  * DESCRIPTION:	load an item
138  */
it_load(itchunk ** c,unsigned short n,char ** buf,char * grammar)139 static item *it_load(itchunk **c, unsigned short n, char **buf, char *grammar)
140 {
141     char *p;
142     item **ri;
143     item *it;
144     char *ref;
145     unsigned short ruleno;
146 
147     ri = &it;
148     p = *buf;
149     do {
150 	ref = grammar + (UCHAR(p[0]) << 8) + UCHAR(p[1]);
151 	p += 2;
152 	ruleno = (UCHAR(p[0]) << 8) + UCHAR(p[1]);
153 	p += 2;
154 	*ri = it_new(c, ref, ruleno, UCHAR(*p++), (item *) NULL);
155 	ri = &(*ri)->next;
156     } while (--n != 0);
157     *buf = p;
158 
159     return it;
160 }
161 
162 /*
163  * NAME:	item->save()
164  * DESCRIPTION:	save an item
165  */
it_save(item * it,char * buf,char * grammar)166 static char *it_save(item *it, char *buf, char *grammar)
167 {
168     int offset;
169 
170     while (it != (item *) NULL) {
171 	offset = it->ref - grammar;
172 	*buf++ = offset >> 8;
173 	*buf++ = offset;
174 	*buf++ = it->ruleno >> 8;
175 	*buf++ = it->ruleno;
176 	*buf++ = it->offset;
177 	it = it->next;
178     }
179 
180     return buf;
181 }
182 
183 
184 typedef struct {
185     item *items;		/* rules and offsets */
186     union {
187 	char e[4];		/* 1 */
188 	char *a;		/* > 1 */
189     } reds;			/* reductions */
190     unsigned short nitem;	/* # items */
191     short nred;			/* # reductions, -1 if unexpanded */
192     Int shoffset;		/* offset for shifts */
193     Int gtoffset;		/* offset for gotos */
194     unsigned short shcheck;	/* shift offset check */
195     unsigned short next;	/* next in linked list */
196     bool alloc;			/* reductions allocated? */
197 } srpstate;
198 
199 # define REDA(state)   (((state)->nred == 1) ? \
200 			(state)->reds.e : (state)->reds.a)
201 # define UNEXPANDED	-1
202 # define NOSHIFT	((Int) 0xff800000L)
203 
204 /*
205  * NAME:	srpstate->hash()
206  * DESCRIPTION:	put a new state in the hash table, or return an old one
207  */
ss_hash(unsigned short * htab,Uint htabsize,srpstate * states,unsigned short idx)208 static unsigned short ss_hash(unsigned short *htab, Uint htabsize, srpstate *states, unsigned short idx)
209 {
210     Uint h;
211     srpstate *newstate;
212     item *it, *it2;
213     srpstate *s;
214     unsigned short *sr;
215 
216     /* hash on items */
217     newstate = &states[idx];
218     h = 0;
219     for (it = newstate->items; it != (item *) NULL; it = it->next) {
220 	h ^= (uintptr_t) it->ref;
221 	h = (h >> 3) ^ (h << 29) ^ it->offset;
222     }
223 
224     /* check state hash table */
225     sr = &htab[(Uint) h % htabsize];
226     s = &states[*sr];
227     while (s != states) {
228 	it = newstate->items;
229 	it2 = s->items;
230 	while (it != (item *) NULL && it2 != (item *) NULL &&
231 	       it->ref == it2->ref && it->offset == it2->offset) {
232 	    it = it->next;
233 	    it2 = it2->next;
234 	}
235 	if (it == it2) {
236 	    return *sr;	/* state already exists */
237 	}
238 	sr = &s->next;
239 	s = &states[*sr];
240     }
241 
242     newstate->next = *sr;
243     return *sr = idx;
244 }
245 
246 /*
247  * NAME:	srpstate->load()
248  * DESCRIPTION:	load a srpstate
249  */
ss_load(char * buf,char ** rbuf,srpstate * state)250 static char *ss_load(char *buf, char **rbuf, srpstate *state)
251 {
252     state->items = (item *) NULL;
253     state->nitem = (UCHAR(buf[0]) << 8) + UCHAR(buf[1]);
254     buf += 2;
255     state->nred = (UCHAR(buf[0]) << 8) + UCHAR(buf[1]);
256     buf += 2;
257     state->shoffset = ((Int) SCHAR(buf[0]) << 16) + (UCHAR(buf[1]) << 8) +
258 		      UCHAR(buf[2]);
259     buf += 3;
260     state->gtoffset = ((Int) SCHAR(buf[0]) << 16) + (UCHAR(buf[1]) << 8) +
261 		      UCHAR(buf[2]);
262     buf += 3;
263     state->shcheck = (UCHAR(buf[0]) << 8) + UCHAR(buf[1]);
264     buf += 2;
265 
266     if (state->nred > 0) {
267 	if (state->nred == 1) {
268 	    memcpy(state->reds.e, *rbuf, 4);
269 	} else {
270 	    state->reds.a = *rbuf;
271 	}
272 	*rbuf += 4 * state->nred;
273     }
274     state->alloc = FALSE;
275 
276     return buf;
277 }
278 
279 /*
280  * NAME:	srpstate->save()
281  * DESCRIPTION:	save a srpstate
282  */
ss_save(srpstate * state,char * buf,char ** rbuf)283 static char *ss_save(srpstate *state, char *buf, char **rbuf)
284 {
285     *buf++ = state->nitem >> 8;
286     *buf++ = state->nitem;
287     *buf++ = state->nred >> 8;
288     *buf++ = state->nred;
289     *buf++ = state->shoffset >> 16;
290     *buf++ = state->shoffset >> 8;
291     *buf++ = state->shoffset;
292     *buf++ = state->gtoffset >> 16;
293     *buf++ = state->gtoffset >> 8;
294     *buf++ = state->gtoffset;
295     *buf++ = state->shcheck >> 8;
296     *buf++ = state->shcheck;
297 
298     if (state->nred > 0) {
299 	memcpy(*rbuf, REDA(state), state->nred * 4);
300 	*rbuf += state->nred * 4;
301     }
302 
303     return buf;
304 }
305 
306 
307 typedef struct _shlink_ {
308     Int shifts;			/* offset in shift table */
309     struct _shlink_ *next;	/* next in linked list */
310 } shlink;
311 
312 # define SLCHUNKSZ	64
313 
314 typedef struct _slchunk_ {
315     int chunksz;		/* size of chunk */
316     struct _slchunk_ *next;	/* next in linked list */
317     shlink sl[SLCHUNKSZ];	/* shlinks */
318 } slchunk;
319 
320 /*
321  * NAME:	shlink->hash()
322  * DESCRIPTION:	put a new shlink in the hash table, or return an old one
323  */
sl_hash(shlink ** htab,Uint htabsize,slchunk ** c,char * shtab,char * shifts,Uint n)324 static shlink *sl_hash(shlink **htab, Uint htabsize, slchunk **c, char *shtab, char *shifts, Uint n)
325 {
326     unsigned long h;
327     Uint i;
328     shlink **ssl, *sl;
329 
330     /* search in hash table */
331     shifts += 5;
332     n -= 5;
333     h = 0;
334     for (i = n; i > 0; --i) {
335 	h = (h >> 3) ^ (h << 29) ^ UCHAR(*shifts++);
336     }
337     shifts -= n;
338     ssl = &htab[h % htabsize];
339     while (*ssl != (shlink *) NULL) {
340 	if (memcmp(shtab + (*ssl)->shifts + 5, shifts, n) == 0) {
341 	    /* seen this one before */
342 	    return *ssl;
343 	}
344 	ssl = &(*ssl)->next;
345     }
346 
347     if (*c == (slchunk *) NULL || (*c)->chunksz == SLCHUNKSZ) {
348 	slchunk *x;
349 
350 	x = ALLOC(slchunk, 1);
351 	x->next = *c;
352 	*c = x;
353 	x->chunksz = 0;
354     }
355     sl = &(*c)->sl[(*c)->chunksz++];
356     sl->next = *ssl;
357     *ssl = sl;
358     sl->shifts = NOSHIFT;
359 
360     return sl;
361 }
362 
363 /*
364  * NAME:	shlink->clear()
365  * DESCRIPTION:	clean up shlinks
366  */
sl_clear(slchunk * c)367 static void sl_clear(slchunk *c)
368 {
369     slchunk *f;
370 
371     while (c != (slchunk *) NULL) {
372 	f = c;
373 	c = c->next;
374 	FREE(f);
375     }
376 }
377 
378 
379 struct _srp_ {
380     char *grammar;		/* grammar */
381     unsigned short nsstring;	/* # of source grammar strings */
382     unsigned short ntoken;	/* # of tokens (regexp & string) */
383     unsigned short nprod;	/* # of nonterminals */
384 
385     Uint nred;			/* # of reductions */
386     Uint nitem;			/* # of items */
387     Uint srpsize;		/* size of shift/reduce parser */
388     Uint tmpsize;		/* size of temporary data */
389     bool modified;		/* srp needs saving */
390     bool allocated;		/* srp allocated */
391     char *srpstr;		/* srp string */
392     char *tmpstr;		/* tmp string */
393 
394     unsigned short nstates;	/* number of states */
395     unsigned short nexpanded;	/* number of expanded states */
396     Uint sttsize;		/* state table size */
397     Uint sthsize;		/* state hash table size */
398     srpstate *states;		/* state array */
399     unsigned short *sthtab;	/* state hash table */
400 
401     itchunk *itc;		/* item chunk */
402 
403     Uint gap;			/* first gap in packed mapping */
404     Uint spread;		/* max spread in packed mapping */
405     Uint mapsize;		/* packed mapping size */
406     char *data;			/* packed shifts */
407     char *check;		/* packed check for shift validity */
408     bool alloc;			/* data and check allocated separately? */
409 
410     slchunk *slc;		/* shlink chunk */
411     Uint nshift;		/* number of shifts (from/to pairs) */
412     Uint shtsize;		/* shift table size */
413     Uint shhsize;		/* shift hash table size */
414     char *shtab;		/* shift (from/to) table */
415     shlink **shhtab;		/* shift hash table */
416 };
417 
418 # define SRP_VERSION	1
419 
420 /*
421  * NAME:	srp->new()
422  * DESCRIPTION:	create new shift/reduce parser
423  */
srp_new(char * grammar)424 srp *srp_new(char *grammar)
425 {
426     srp *lr;
427     char *p;
428     Uint nrule;
429 
430     lr = ALLOC(srp, 1);
431 
432     /* grammar info */
433     lr->grammar = grammar;
434     lr->nsstring = (UCHAR(grammar[9]) << 8) + UCHAR(grammar[10]);
435     lr->ntoken = lr->nsstring +
436 		 ((UCHAR(grammar[5]) + UCHAR(grammar[11])) << 8) +
437 		 UCHAR(grammar[6]) + UCHAR(grammar[12]);
438     lr->nprod = (UCHAR(grammar[13]) << 8) + UCHAR(grammar[14]);
439     nrule = (UCHAR(grammar[15]) << 8) + UCHAR(grammar[16]);
440 
441     /* sizes */
442     lr->srpstr = (char *) NULL;
443     lr->tmpstr = (char *) NULL;
444     lr->nred = lr->nitem = 0;
445     lr->srpsize = 14 + 12 + 4;	/* srp header + 1 state + data/check overhead */
446     lr->tmpsize = 7 + 5;	/* tmp header + 1 item */
447     lr->modified = TRUE;
448     lr->allocated = FALSE;
449 
450     /* states */
451     lr->nstates = 1;
452     lr->nexpanded = 0;
453     lr->sttsize = nrule << 1;
454     lr->sthsize = nrule << 2;
455     lr->states = ALLOC(srpstate, lr->sttsize);
456     lr->sthtab = ALLOC(unsigned short, lr->sthsize);
457     memset(lr->sthtab, '\0', lr->sthsize * sizeof(unsigned short));
458     lr->itc = (itchunk *) NULL;
459 
460     /* state 0 */
461     p = grammar + 17 + ((lr->ntoken + lr->nsstring) << 1);
462     p = grammar + (UCHAR(p[0]) << 8) + UCHAR(p[1]);
463     lr->states[0].items = it_new(&lr->itc, p + 2, lr->ntoken, 0, (item *) NULL);
464     lr->states[0].nitem = 1;
465     lr->states[0].nred = UNEXPANDED;
466     lr->states[0].shoffset = NOSHIFT;
467     lr->states[0].gtoffset = 0;
468     lr->states[0].shcheck = 0;
469     lr->states[0].alloc = FALSE;
470 
471     /* packed mapping for shift */
472     lr->gap = lr->spread = 0;
473     lr->mapsize = (Uint) (lr->ntoken + lr->nprod) << 2;
474     lr->data = ALLOC(char, lr->mapsize);
475     memset(lr->data, '\0', lr->mapsize);
476     lr->check = ALLOC(char, lr->mapsize);
477     memset(lr->check, '\xff', lr->mapsize);
478     lr->alloc = TRUE;
479 
480     /* shift hash table */
481     lr->slc = (slchunk *) NULL;
482     lr->nshift = 0;
483     lr->shtsize = lr->mapsize;
484     lr->shhsize = nrule << 2;
485     lr->shtab = ALLOC(char, lr->shtsize);
486     lr->shhtab = ALLOC(shlink*, lr->shhsize);
487     memset(lr->shhtab, '\0', lr->shhsize * sizeof(shlink*));
488 
489     return lr;
490 }
491 
492 /*
493  * NAME:	srp->del()
494  * DESCRIPTION:	delete shift/reduce parser
495  */
srp_del(srp * lr)496 void srp_del(srp *lr)
497 {
498     unsigned short i;
499     srpstate *state;
500 
501     if (lr->allocated) {
502 	FREE(lr->srpstr);
503     }
504     it_clear(lr->itc);
505     if (lr->sthtab != (unsigned short *) NULL) {
506 	FREE(lr->sthtab);
507     }
508     for (i = lr->nstates, state = lr->states; i > 0; --i, state++) {
509 	if (state->alloc) {
510 	    FREE(state->reds.a);
511 	}
512     }
513     FREE(lr->states);
514     if (lr->alloc) {
515 	FREE(lr->data);
516 	FREE(lr->check);
517     }
518     sl_clear(lr->slc);
519     if (lr->shtab != (char *) NULL) {
520 	FREE(lr->shtab);
521     }
522     if (lr->shhtab != (shlink **) NULL) {
523 	FREE(lr->shhtab);
524     }
525     FREE(lr);
526 }
527 
528 /*
529  * format of SRP permanent data:
530  *
531  * header	[1]		version number
532  *		[x][y]		# states
533  *		[x][y]		# expanded states
534  *		[x][y][z]	# reductions
535  *		[x][y][z]	initial gap in packed mapping
536  *		[x][y][z]	max spread of packed mapping
537  *
538  * state	[x][y]		# items				}
539  *		[x][y]		# reductions			}
540  *		[x][y][z]	shift offset in packed mapping	} ...
541  *		[x][y][z]	goto offset in packed mapping	}
542  *		[x][y]		shift check			}
543  *
544  * reduction	[x][y]		rule offset in grammar		} ...
545  *		[x][y]		rule number			}
546  *
547  * mapping	[...]		data table (spread + 2)
548  *		[...]		check table (spread + 2)
549  *
550  *
551  * format of SRP temporary data:
552  *
553  * header	[0]		version number
554  *		[x][y][z]	# items
555  *		[x][y][z]	shift table size
556  *
557  * item		[x][y]		rule offset in grammar		}
558  *		[x][y]		rule number			} ...
559  *		[x]		offset in rule			}
560  *
561  * shift	[...]		shift table
562  */
563 
564 /*
565  * NAME:	srp->load()
566  * DESCRIPTION:	load a shift/reduce parser from string
567  */
srp_load(char * grammar,char * str,Uint len)568 srp *srp_load(char *grammar, char *str, Uint len)
569 {
570     srp *lr;
571     char *buf;
572     Uint i;
573     srpstate *state;
574     char *rbuf;
575 
576     if (UCHAR(str[0]) != SRP_VERSION) {
577 	return srp_new(grammar);
578     }
579 
580     lr = ALLOC(srp, 1);
581 
582     /* grammar info */
583     lr->grammar = grammar;
584     lr->nsstring = (UCHAR(grammar[9]) << 8) + UCHAR(grammar[10]);
585     lr->ntoken = lr->nsstring +
586 		 ((UCHAR(grammar[5]) + UCHAR(grammar[11])) << 8) +
587 		 UCHAR(grammar[6]) + UCHAR(grammar[12]);
588     lr->nprod = (UCHAR(grammar[13]) << 8) + UCHAR(grammar[14]);
589 
590     lr->srpstr = buf = str;
591 
592     /* header */
593     lr->nstates = (UCHAR(buf[1]) << 8) + UCHAR(buf[2]);
594     lr->nexpanded = (UCHAR(buf[3]) << 8) + UCHAR(buf[4]);
595     lr->nred = ((Uint) UCHAR(buf[5]) << 16) + (UCHAR(buf[6]) << 8) +
596 	       UCHAR(buf[7]);
597     lr->gap = ((Uint) UCHAR(buf[8]) << 16) + (UCHAR(buf[9]) << 8) +
598 	      UCHAR(buf[10]);
599     lr->spread = ((Uint) UCHAR(buf[11]) << 16) + (UCHAR(buf[12]) << 8) +
600 		 UCHAR(buf[13]);
601     buf += 14;
602 
603     /* states */
604     lr->sttsize = lr->nstates + 1;
605     lr->sthsize = 0;
606     lr->states = ALLOC(srpstate, lr->sttsize);
607     lr->sthtab = (unsigned short *) NULL;
608     lr->itc = (itchunk *) NULL;
609 
610     /* load states */
611     rbuf = buf + lr->nstates * 12;
612     for (i = lr->nstates, state = lr->states; i > 0; --i, state++) {
613 	buf = ss_load(buf, &rbuf, state);
614     }
615     buf = rbuf;
616 
617     /* load packed mapping */
618     lr->mapsize = lr->spread + 2;
619     lr->data = buf;
620     buf += lr->spread + 2;
621     lr->check = buf;
622     buf += lr->spread + 2;
623     lr->alloc = FALSE;
624 
625     lr->tmpstr = buf;
626 
627     /* sizes */
628     lr->nitem = 0;
629     lr->srpsize = (intptr_t) buf - (intptr_t) str;
630     lr->tmpsize = len - lr->srpsize;
631     lr->modified = lr->allocated = FALSE;
632 
633     /* shift hash table */
634     lr->slc = (slchunk *) NULL;
635     lr->nshift = 0;
636     lr->shtsize = 0;
637     lr->shhsize = 0;
638     lr->shtab = (char *) NULL;
639     lr->shhtab = (shlink **) NULL;
640 
641     return lr;
642 }
643 
644 /*
645  * NAME:	srp->loadtmp()
646  * DESCRIPTION:	load the temporary data for a shift/reduce parser
647  */
srp_loadtmp(srp * lr)648 static void srp_loadtmp(srp *lr)
649 {
650     Uint i, n;
651     srpstate *state;
652     char *p;
653     Uint nrule;
654     char *buf;
655 
656     nrule = (UCHAR(lr->grammar[15]) << 8) + UCHAR(lr->grammar[16]);
657 
658     buf = lr->tmpstr;
659     lr->nitem = ((Uint) UCHAR(buf[1]) << 16) + (UCHAR(buf[2]) << 8) +
660 		UCHAR(buf[3]);
661     lr->nshift = ((Uint) UCHAR(buf[4]) << 16) + (UCHAR(buf[5]) << 8) +
662 		 UCHAR(buf[6]);
663     buf += 7;
664 
665     /* states */
666     lr->sthsize = nrule << 2;
667     lr->sthtab = ALLOC(unsigned short, lr->sthsize);
668     memset(lr->sthtab, '\0', lr->sthsize * sizeof(unsigned short));
669     for (i = 0, state = lr->states; i < lr->nstates; i++, state++) {
670 	if (state->nitem != 0) {
671 	    state->items = it_load(&lr->itc, state->nitem, &buf, lr->grammar);
672 	}
673 	ss_hash(lr->sthtab, lr->sthsize, lr->states, (unsigned short) i);
674     }
675 
676     /* shifts */
677     lr->shtsize = lr->nshift * 2;
678     lr->shhsize = nrule << 2;
679     lr->shtab = ALLOC(char, lr->shtsize);
680     memcpy(lr->shtab, buf, lr->nshift);
681     lr->shhtab = ALLOC(shlink*, lr->shhsize);
682     memset(lr->shhtab, '\0', lr->shhsize * sizeof(shlink*));
683     for (i = 0, p = buf; i != lr->nshift; i += n, p += n) {
684 	n = (Uint) 4 * ((UCHAR(p[5]) << 8) + UCHAR(p[6])) + 7;
685 	sl_hash(lr->shhtab, lr->shhsize, &lr->slc, lr->shtab, p, n)->shifts =
686 						(intptr_t) p - (intptr_t) buf;
687     }
688 }
689 
690 /*
691  * NAME:	srp->save()
692  * DESCRIPTION:	save a shift/reduce parser to string
693  */
srp_save(srp * lr,char ** str,Uint * len)694 bool srp_save(srp *lr, char **str, Uint *len)
695 {
696     char *buf;
697     unsigned short i;
698     srpstate *state;
699     char *rbuf;
700 
701     if (!lr->modified) {
702 	*str = lr->srpstr;
703 	*len = lr->srpsize + lr->tmpsize;
704 	return FALSE;
705     }
706 
707     if (lr->nstates == lr->nexpanded) {
708 	lr->tmpsize = 0;
709     }
710     if (lr->allocated) {
711 	FREE(lr->srpstr);
712     }
713     lr->srpstr = buf = *str = ALLOC(char, *len = lr->srpsize + lr->tmpsize);
714 
715     /* header */
716     *buf++ = SRP_VERSION;
717     *buf++ = lr->nstates >> 8;
718     *buf++ = lr->nstates;
719     *buf++ = lr->nexpanded >> 8;
720     *buf++ = lr->nexpanded;
721     *buf++ = lr->nred >> 16;
722     *buf++ = lr->nred >> 8;
723     *buf++ = lr->nred;
724     *buf++ = lr->gap >> 16;
725     *buf++ = lr->gap >> 8;
726     *buf++ = lr->gap;
727     *buf++ = lr->spread >> 16;
728     *buf++ = lr->spread >> 8;
729     *buf++ = lr->spread;
730 
731     /* save states */
732     rbuf = buf + lr->nstates * 12;
733     for (i = lr->nstates, state = lr->states; i > 0; --i, state++) {
734 	buf = ss_save(state, buf, &rbuf);
735     }
736     buf = rbuf;
737 
738     /* save packed mapping */
739     memcpy(buf, lr->data, lr->spread + 2);
740     buf += lr->spread + 2;
741     memcpy(buf, lr->check, lr->spread + 2);
742     buf += lr->spread + 2;
743 
744     lr->modified = FALSE;
745     lr->allocated = TRUE;
746     if (lr->tmpsize == 0) {
747 	/* no tmp data */
748 	return TRUE;
749     }
750 
751     lr->tmpstr = buf;
752 
753     /* tmp header */
754     *buf++ = 0;
755     *buf++ = lr->nitem >> 16;
756     *buf++ = lr->nitem >> 8;
757     *buf++ = lr->nitem;
758     *buf++ = lr->nshift >> 16;
759     *buf++ = lr->nshift >> 8;
760     *buf++ = lr->nshift;
761 
762     /* save items */
763     for (i = lr->nstates, state = lr->states; i > 0; --i, state++) {
764 	buf = it_save(state->items, buf, lr->grammar);
765     }
766 
767     /* shift data */
768     memcpy(buf, lr->shtab, lr->nshift);
769 
770     return TRUE;
771 }
772 
773 /*
774  * NAME:	srp->pack()
775  * DESCRIPTION:	add a new set of shifts and gotos to the packed mapping
776  */
srp_pack(srp * lr,unsigned short * check,unsigned short * from,unsigned short * to,unsigned short n)777 static Int srp_pack(srp *lr, unsigned short *check, unsigned short *from, unsigned short *to, unsigned short n)
778 {
779     Uint i, j;
780     char *p;
781     char *shifts;
782     shlink *sl;
783     Uint range, *offstab;
784     Int offset;
785 
786     /*
787      * check hash table
788      */
789     shifts = ALLOCA(char, j = (Uint) 4 * n + 7);
790     p = shifts + 5;
791     *p++ = n >> 8;
792     *p++ = n;
793     for (i = 0; i < n; i++) {
794 	*p++ = from[i] >> 8;
795 	*p++ = from[i];
796 	*p++ = to[i] >> 8;
797 	*p++ = to[i];
798     }
799     sl = sl_hash(lr->shhtab, lr->shhsize, &lr->slc, lr->shtab, shifts, j);
800     if (sl->shifts != NOSHIFT) {
801 	/* same as before */
802 	AFREE(shifts);
803 	p = lr->shtab + sl->shifts;
804 	*check = (UCHAR(p[0]) << 8) + UCHAR(p[1]);
805 	return ((Int) SCHAR(p[2]) << 16) + ((UCHAR(p[3]) << 8) + UCHAR(p[4]));
806     }
807 
808     /* not in hash table */
809     if (lr->nshift + j > lr->shtsize) {
810 	/* grow shift table */
811 	i = (lr->nshift + j) * 2;
812 	lr->shtab = REALLOC(lr->shtab, char, lr->shtsize, i);
813 	lr->shtsize = i;
814     }
815     sl->shifts = lr->nshift;
816     lr->nshift += j;
817     lr->tmpsize += j;
818     memcpy(lr->shtab + sl->shifts, shifts, j);
819     AFREE(shifts);
820 
821     /* create offset table */
822     offstab = ALLOCA(Uint, n);
823     for (i = 0; i < n; i++) {
824 	offstab[i] = from[i] * 2;
825     }
826     j = offset = offstab[0];
827     for (i = 1; i < n; i++) {
828 	offstab[i] -= j;
829 	j += offstab[i];
830     }
831     range = j - offset + 2;
832 
833     /*
834      * add from/to pairs to packed mapping
835      */
836     for (i = lr->gap, p = &lr->check[i];
837 	 UCHAR(p[0]) != 0xff || UCHAR(p[1]) != 0xff;
838 	 i += 2, p += 2) ;
839     lr->gap = i;
840 
841 next:
842     if (i + range >= lr->mapsize) {
843 	/* grow tables */
844 	j = (i + range) << 1;
845 	if (lr->alloc) {
846 	    lr->data = REALLOC(lr->data, char, lr->mapsize, j);
847 	    lr->check = REALLOC(lr->check, char, lr->mapsize, j);
848 	} else {
849 	    char *table;
850 
851 	    table = ALLOC(char, j);
852 	    memcpy(table, lr->data, lr->mapsize);
853 	    lr->data = table;
854 	    table = ALLOC(char, j);
855 	    memcpy(table, lr->check, lr->mapsize);
856 	    lr->check = table;
857 	    lr->alloc = TRUE;
858 	}
859 	memset(lr->data + lr->mapsize, '\0', j - lr->mapsize);
860 	memset(lr->check + lr->mapsize, '\xff', j - lr->mapsize);
861 	lr->mapsize = j;
862     }
863 
864     /* match each symbol with free slot */
865     for (j = 1; j < n; j++) {
866 	p += offstab[j];
867 	if (UCHAR(p[0]) != 0xff || UCHAR(p[1]) != 0xff) {
868 	    p = &lr->check[i];
869 	    do {
870 		i += 2;
871 		p += 2;
872 	    } while (UCHAR(p[0]) != 0xff || UCHAR(p[1]) != 0xff);
873 	    goto next;
874 	}
875     }
876     AFREE(offstab);
877 
878     /* free slots found: adjust spread */
879     if (i + range > lr->spread) {
880 	lr->srpsize += 2 * (i + range - lr->spread);
881 	lr->spread = i + range;
882     }
883 
884     /* add to packed mapping */
885     offset = i - offset;
886     do {
887 	j = from[--n] * 2 + offset;
888 	p = &lr->data[j];
889 	*p++ = to[n] >> 8;
890 	*p = to[n];
891 	p = &lr->check[j];
892 	*p++ = *check >> 8;
893 	*p = *check;
894     } while (n != 0);
895 
896     p = lr->shtab + sl->shifts;
897     offset /= 2;
898     *p++ = *check >> 8;
899     *p++ = *check;
900     *p++ = offset >> 16;
901     *p++ = offset >> 8;
902     *p = offset;
903     return offset;
904 }
905 
906 /*
907  * NAME:	cmp()
908  * DESCRIPTION:	compare two unsigned shorts
909  */
cmp(cvoid * sh1,cvoid * sh2)910 static int cmp(cvoid *sh1, cvoid *sh2)
911 {
912     return (*(unsigned short *) sh1 < *(unsigned short *) sh2) ?
913 	    -1 : (*(unsigned short *) sh1 == *(unsigned short *) sh2) ? 0 : 1;
914 }
915 
916 /*
917  * NAME:	srp->expand()
918  * DESCRIPTION:	expand a state
919  */
srp_expand(srp * lr,srpstate * state)920 static srpstate *srp_expand(srp *lr, srpstate *state)
921 {
922     unsigned short i, n;
923     char *p;
924     item *it;
925     item **itemtab, *next;
926     unsigned short *tokens, *symbols, *targets;
927     srpstate *newstate;
928     unsigned short nred, nshift, ngoto;
929 
930     lr->modified = TRUE;
931     if (state - lr->states == 1) {
932 	/* final state */
933 	state->nred = 0;
934 	lr->nexpanded++;
935 	return state;
936     }
937 
938     if (lr->sthtab == (unsigned short *) NULL) {
939 	srp_loadtmp(lr);	/* load tmp info */
940     }
941 
942     n = lr->ntoken + lr->nprod;
943     itemtab = ALLOCA(item*, n);
944     memset(itemtab, '\0', n * sizeof(item*));
945     symbols = ALLOCA(unsigned short, n);
946     targets = ALLOCA(unsigned short, n);
947     tokens = ALLOCA(unsigned short, lr->ntoken);
948     nred = nshift = ngoto = 0;
949 
950     /*
951      * compute closure of kernel item set
952      */
953     for (it = state->items; it != (item *) NULL; it = it->next) {
954 	i = it->offset;
955 	p = it->ref + 1;
956 	if (i == UCHAR(*p++)) {
957 	    /* end of production */
958 	    nred++;
959 	} else {
960 	    p += i;
961 	    n = (UCHAR(p[0]) << 8) + UCHAR(p[1]);
962 	    if (n >= lr->ntoken) {
963 		p = lr->grammar + 17 + ((n + lr->nsstring) << 1);
964 		p = lr->grammar + (UCHAR(p[0]) << 8) + UCHAR(p[1]);
965 		for (i = (UCHAR(p[0]) << 8) + UCHAR(p[1]), p += 2; i > 0; --i) {
966 		    it_add(&lr->itc, &state->items, p, n, 0, FALSE);
967 		    p += UCHAR(p[1]) + 2;
968 		}
969 	    }
970 	}
971     }
972 
973     state->nred = nred;
974     if (nred != 0) {
975 	if (nred > 1) {
976 	    state->reds.a = ALLOC(char, (Uint) nred << 2);
977 	    state->alloc = TRUE;
978 	}
979 	lr->nred += nred;
980 	lr->srpsize += (Uint) nred << 2;
981 	nred = 0;
982     }
983 
984     /*
985      * compute reductions and shifts
986      */
987     if (state == lr->states) {
988 	symbols[ngoto++] = lr->ntoken;
989     }
990     for (it = state->items; it != (item *) NULL; it = it->next) {
991 	p = it->ref;
992 	if (it->offset == UCHAR(p[1])) {
993 	    /* reduction */
994 	    n = p - lr->grammar;
995 	    p = &REDA(state)[(Uint) nred++ << 2];
996 	    *p++ = n >> 8;
997 	    *p++ = n;
998 	    *p++ = it->ruleno >> 8;
999 	    *p = it->ruleno;
1000 	} else {
1001 	    /* shift/goto */
1002 	    p += 2 + it->offset;
1003 	    n = (UCHAR(p[0]) << 8) + UCHAR(p[1]);
1004 	    if (itemtab[n] == (item *) NULL) {
1005 		if (n < lr->ntoken) {
1006 		    tokens[nshift++] = n;
1007 		} else {
1008 		    symbols[ngoto++] = n;
1009 		}
1010 	    }
1011 	    it_add(&lr->itc, &itemtab[n], it->ref, it->ruleno, it->offset + 2,
1012 		   TRUE);
1013 	}
1014     }
1015 
1016     /*
1017      * delete non-kernel items
1018      */
1019     for (it = state->items, i = state->nitem; --i > 0; it = it->next) ;
1020     next = it->next;
1021     it->next = (item *) NULL;
1022     for (it = next; it != (item *) NULL; it = it_del(lr->itc, it)) ;
1023 
1024     /*
1025      * sort and merge token and goto tables
1026      */
1027     qsort(symbols, ngoto, sizeof(unsigned short), cmp);
1028     memcpy(symbols + ngoto, tokens, nshift * sizeof(unsigned short));
1029     AFREE(tokens);
1030     tokens = symbols + ngoto;
1031     qsort(tokens, nshift, sizeof(unsigned short), cmp);
1032 
1033     /*
1034      * create target table
1035      */
1036     for (i = 0; i < nshift + ngoto; i++) {
1037 	newstate = &lr->states[lr->nstates];
1038 	newstate->items = itemtab[symbols[i]];
1039 
1040 	n = ss_hash(lr->sthtab, lr->sthsize, lr->states,
1041 		    (unsigned short) lr->nstates);
1042 	targets[i] = n;
1043 	if (n == lr->nstates) {
1044 	    /*
1045 	     * new state
1046 	     */
1047 	    n = 0;
1048 	    for (it = newstate->items; it != (item *) NULL; it = it->next) {
1049 		n++;
1050 	    }
1051 	    lr->srpsize += 12;
1052 	    lr->nitem += n;
1053 	    lr->tmpsize += (Uint) 5 * n;
1054 	    newstate->nitem = n;
1055 	    newstate->nred = UNEXPANDED;
1056 	    newstate->shoffset = NOSHIFT;
1057 	    newstate->gtoffset = 0;
1058 	    newstate->shcheck = 0;
1059 	    newstate->alloc = FALSE;
1060 
1061 	    if (++lr->nstates == lr->sttsize) {
1062 		unsigned short save;
1063 
1064 		/* grow table */
1065 		save = state - lr->states;
1066 		lr->states = REALLOC(lr->states, srpstate, lr->nstates,
1067 				     lr->sttsize <<= 1);
1068 		state = lr->states + save;
1069 	    }
1070 	}
1071     }
1072 
1073     /*
1074      * add shifts and gotos to packed mapping
1075      */
1076     if (nshift != 0) {
1077 	state->shcheck = state - lr->states;
1078 	state->shoffset = srp_pack(lr, &state->shcheck, tokens, targets + ngoto,
1079 				   nshift);
1080     }
1081     if (ngoto != 0) {
1082 	unsigned short dummy;
1083 
1084 	dummy = -258;
1085 	state->gtoffset = srp_pack(lr, &dummy, symbols, targets, ngoto);
1086     }
1087     AFREE(targets);
1088     AFREE(symbols);
1089     AFREE(itemtab);
1090 
1091     lr->nexpanded++;
1092     return state;
1093 }
1094 
1095 /*
1096  * NAME:	srp->check()
1097  * DESCRIPTION:	fetch reductions for a given state, possibly first expanding it
1098  */
srp_check(srp * lr,unsigned int num,unsigned short * nredp,char ** redp)1099 short srp_check(srp *lr, unsigned int num, unsigned short *nredp, char **redp)
1100 {
1101     srpstate *state;
1102 
1103     state = &lr->states[num];
1104     if (state->nred < 0) {
1105 	state = srp_expand(lr, state);
1106 	if (lr->srpsize + lr->tmpsize > (Uint) MAX_AUTOMSZ * USHRT_MAX) {
1107 	    unsigned short save;
1108 
1109 	    /*
1110 	     * too much temporary data: attempt to expand all states
1111 	     */
1112 	    save = state - lr->states;
1113 	    for (state = lr->states; lr->nstates != lr->nexpanded; state++) {
1114 		if (lr->nstates > SHRT_MAX ||
1115 		    lr->srpsize > (Uint) MAX_AUTOMSZ * USHRT_MAX) {
1116 		    return -1;	/* too big */
1117 		}
1118 		if (state->nred < 0) {
1119 		    state = srp_expand(lr, state);
1120 		}
1121 	    }
1122 	    state = &lr->states[save];
1123 	}
1124 	if (lr->nstates > SHRT_MAX ||
1125 	    lr->srpsize > (Uint) MAX_AUTOMSZ * USHRT_MAX) {
1126 	    return -1;	/* too big */
1127 	}
1128     }
1129 
1130     *nredp = state->nred;
1131     *redp = REDA(state);
1132     return lr->nstates;
1133 }
1134 
1135 /*
1136  * NAME:	srp->shift()
1137  * DESCRIPTION:	shift to a new state, if possible
1138  */
srp_shift(srp * lr,unsigned int num,unsigned int token)1139 short srp_shift(srp *lr, unsigned int num, unsigned int token)
1140 {
1141     Int n;
1142     char *p;
1143     srpstate *state;
1144 
1145     state = &lr->states[num];
1146     n = state->shoffset;
1147     if (n != NOSHIFT) {
1148 	n = (n + (Int) token) * 2;
1149 	if (n >= 0 && n < lr->mapsize) {
1150 	    p = &lr->check[n];
1151 	    if ((UCHAR(p[0]) << 8) + UCHAR(p[1]) == state->shcheck) {
1152 		/* shift works: return new state */
1153 		p = &lr->data[n];
1154 		return (UCHAR(p[0]) << 8) + UCHAR(p[1]);
1155 	    }
1156 	}
1157     }
1158 
1159     /* shift failed */
1160     return -1;
1161 }
1162 
1163 /*
1164  * NAME:	srp->goto()
1165  * DESCRIPTION:	goto a new state
1166  */
srp_goto(srp * lr,unsigned int num,unsigned int symb)1167 short srp_goto(srp *lr, unsigned int num, unsigned int symb)
1168 {
1169     char *p;
1170 
1171     p = &lr->data[(lr->states[num].gtoffset + symb) * 2];
1172     return (UCHAR(p[0]) << 8) + UCHAR(p[1]);
1173 }
1174