1 /*
2 * This file is part of DGD, https://github.com/dworkin/dgd
3 * Copyright (C) 1993-2010 Dworkin B.V.
4 * Copyright (C) 2010,2012 DGD Authors (see the commit log for details)
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU Affero General Public License as
8 * published by the Free Software Foundation, either version 3 of the
9 * License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Affero General Public License for more details.
15 *
16 * You should have received a copy of the GNU Affero General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20 # include "dgd.h"
21 # include "srp.h"
22
23 typedef struct _item_ {
24 char *ref; /* pointer to rule in grammar */
25 unsigned short ruleno; /* rule number */
26 unsigned short offset; /* offset in rule */
27 struct _item_ *next; /* next in linked list */
28 } item;
29
30 # define ITCHUNKSZ 32
31
32 typedef struct _itchunk_ {
33 int chunksz; /* size of this chunk */
34 item *flist; /* list of free items */
35 struct _itchunk_ *next; /* next in linked list */
36 item it[ITCHUNKSZ]; /* chunk of items */
37 } itchunk;
38
39 /*
40 * NAME: item->new()
41 * DESCRIPTION: create a new item
42 */
it_new(itchunk ** c,char * ref,unsigned short ruleno,unsigned short offset,item * next)43 static item *it_new(itchunk **c, char *ref, unsigned short ruleno, unsigned short offset, item *next)
44 {
45 item *it;
46
47 if (*c == (itchunk *) NULL ||
48 ((*c)->flist == (item *) NULL && (*c)->chunksz == ITCHUNKSZ)) {
49 itchunk *x;
50
51 x = ALLOC(itchunk, 1);
52 x->flist = (*c != (itchunk *) NULL) ? (*c)->flist : (item *) NULL;
53 x->next = *c;
54 *c = x;
55 x->chunksz = 0;
56 }
57 if ((*c)->flist != (item *) NULL) {
58 it = (*c)->flist;
59 (*c)->flist = it->next;
60 } else {
61 it = &(*c)->it[(*c)->chunksz++];
62 }
63
64 it->ref = ref;
65 it->ruleno = ruleno;
66 it->offset = offset;
67 it->next = next;
68
69 return it;
70 }
71
72 /*
73 * NAME: item->del()
74 * DESCRIPTION: delete an item
75 */
it_del(itchunk * c,item * it)76 static item *it_del(itchunk *c, item *it)
77 {
78 item *next;
79
80 next = it->next;
81 it->next = c->flist;
82 c->flist = it;
83 return next;
84 }
85
86 /*
87 * NAME: item->clear()
88 * DESCRIPTION: free all items in memory
89 */
it_clear(itchunk * c)90 static void it_clear(itchunk *c)
91 {
92 itchunk *f;
93
94 while (c != (itchunk *) NULL) {
95 f = c;
96 c = c->next;
97 FREE(f);
98 }
99 }
100
101 /*
102 * NAME: item->add()
103 * DESCRIPTION: add an item to a set
104 */
it_add(itchunk ** c,item ** ri,char * ref,unsigned short ruleno,unsigned short offset,bool sort)105 static void it_add(itchunk **c, item **ri, char *ref, unsigned short ruleno, unsigned short offset, bool sort)
106 {
107 /*
108 * add item to set
109 */
110 if (offset == UCHAR(ref[0]) << 1) {
111 offset = UCHAR(ref[1]); /* skip possible function at the end */
112 }
113
114 if (sort) {
115 while (*ri != (item *) NULL &&
116 ((*ri)->ref < ref ||
117 ((*ri)->ref == ref && (*ri)->offset <= offset))) {
118 if ((*ri)->ref == ref && (*ri)->offset == offset) {
119 return; /* already in set */
120 }
121 ri = &(*ri)->next;
122 }
123 } else {
124 while (*ri != (item *) NULL) {
125 if ((*ri)->ref == ref && (*ri)->offset == offset) {
126 return; /* already in set */
127 }
128 ri = &(*ri)->next;
129 }
130 }
131
132 *ri = it_new(c, ref, ruleno, offset, *ri);
133 }
134
135 /*
136 * NAME: item->load()
137 * DESCRIPTION: load an item
138 */
it_load(itchunk ** c,unsigned short n,char ** buf,char * grammar)139 static item *it_load(itchunk **c, unsigned short n, char **buf, char *grammar)
140 {
141 char *p;
142 item **ri;
143 item *it;
144 char *ref;
145 unsigned short ruleno;
146
147 ri = ⁢
148 p = *buf;
149 do {
150 ref = grammar + (UCHAR(p[0]) << 8) + UCHAR(p[1]);
151 p += 2;
152 ruleno = (UCHAR(p[0]) << 8) + UCHAR(p[1]);
153 p += 2;
154 *ri = it_new(c, ref, ruleno, UCHAR(*p++), (item *) NULL);
155 ri = &(*ri)->next;
156 } while (--n != 0);
157 *buf = p;
158
159 return it;
160 }
161
162 /*
163 * NAME: item->save()
164 * DESCRIPTION: save an item
165 */
it_save(item * it,char * buf,char * grammar)166 static char *it_save(item *it, char *buf, char *grammar)
167 {
168 int offset;
169
170 while (it != (item *) NULL) {
171 offset = it->ref - grammar;
172 *buf++ = offset >> 8;
173 *buf++ = offset;
174 *buf++ = it->ruleno >> 8;
175 *buf++ = it->ruleno;
176 *buf++ = it->offset;
177 it = it->next;
178 }
179
180 return buf;
181 }
182
183
184 typedef struct {
185 item *items; /* rules and offsets */
186 union {
187 char e[4]; /* 1 */
188 char *a; /* > 1 */
189 } reds; /* reductions */
190 unsigned short nitem; /* # items */
191 short nred; /* # reductions, -1 if unexpanded */
192 Int shoffset; /* offset for shifts */
193 Int gtoffset; /* offset for gotos */
194 unsigned short shcheck; /* shift offset check */
195 unsigned short next; /* next in linked list */
196 bool alloc; /* reductions allocated? */
197 } srpstate;
198
199 # define REDA(state) (((state)->nred == 1) ? \
200 (state)->reds.e : (state)->reds.a)
201 # define UNEXPANDED -1
202 # define NOSHIFT ((Int) 0xff800000L)
203
204 /*
205 * NAME: srpstate->hash()
206 * DESCRIPTION: put a new state in the hash table, or return an old one
207 */
ss_hash(unsigned short * htab,Uint htabsize,srpstate * states,unsigned short idx)208 static unsigned short ss_hash(unsigned short *htab, Uint htabsize, srpstate *states, unsigned short idx)
209 {
210 Uint h;
211 srpstate *newstate;
212 item *it, *it2;
213 srpstate *s;
214 unsigned short *sr;
215
216 /* hash on items */
217 newstate = &states[idx];
218 h = 0;
219 for (it = newstate->items; it != (item *) NULL; it = it->next) {
220 h ^= (uintptr_t) it->ref;
221 h = (h >> 3) ^ (h << 29) ^ it->offset;
222 }
223
224 /* check state hash table */
225 sr = &htab[(Uint) h % htabsize];
226 s = &states[*sr];
227 while (s != states) {
228 it = newstate->items;
229 it2 = s->items;
230 while (it != (item *) NULL && it2 != (item *) NULL &&
231 it->ref == it2->ref && it->offset == it2->offset) {
232 it = it->next;
233 it2 = it2->next;
234 }
235 if (it == it2) {
236 return *sr; /* state already exists */
237 }
238 sr = &s->next;
239 s = &states[*sr];
240 }
241
242 newstate->next = *sr;
243 return *sr = idx;
244 }
245
246 /*
247 * NAME: srpstate->load()
248 * DESCRIPTION: load a srpstate
249 */
ss_load(char * buf,char ** rbuf,srpstate * state)250 static char *ss_load(char *buf, char **rbuf, srpstate *state)
251 {
252 state->items = (item *) NULL;
253 state->nitem = (UCHAR(buf[0]) << 8) + UCHAR(buf[1]);
254 buf += 2;
255 state->nred = (UCHAR(buf[0]) << 8) + UCHAR(buf[1]);
256 buf += 2;
257 state->shoffset = ((Int) SCHAR(buf[0]) << 16) + (UCHAR(buf[1]) << 8) +
258 UCHAR(buf[2]);
259 buf += 3;
260 state->gtoffset = ((Int) SCHAR(buf[0]) << 16) + (UCHAR(buf[1]) << 8) +
261 UCHAR(buf[2]);
262 buf += 3;
263 state->shcheck = (UCHAR(buf[0]) << 8) + UCHAR(buf[1]);
264 buf += 2;
265
266 if (state->nred > 0) {
267 if (state->nred == 1) {
268 memcpy(state->reds.e, *rbuf, 4);
269 } else {
270 state->reds.a = *rbuf;
271 }
272 *rbuf += 4 * state->nred;
273 }
274 state->alloc = FALSE;
275
276 return buf;
277 }
278
279 /*
280 * NAME: srpstate->save()
281 * DESCRIPTION: save a srpstate
282 */
ss_save(srpstate * state,char * buf,char ** rbuf)283 static char *ss_save(srpstate *state, char *buf, char **rbuf)
284 {
285 *buf++ = state->nitem >> 8;
286 *buf++ = state->nitem;
287 *buf++ = state->nred >> 8;
288 *buf++ = state->nred;
289 *buf++ = state->shoffset >> 16;
290 *buf++ = state->shoffset >> 8;
291 *buf++ = state->shoffset;
292 *buf++ = state->gtoffset >> 16;
293 *buf++ = state->gtoffset >> 8;
294 *buf++ = state->gtoffset;
295 *buf++ = state->shcheck >> 8;
296 *buf++ = state->shcheck;
297
298 if (state->nred > 0) {
299 memcpy(*rbuf, REDA(state), state->nred * 4);
300 *rbuf += state->nred * 4;
301 }
302
303 return buf;
304 }
305
306
307 typedef struct _shlink_ {
308 Int shifts; /* offset in shift table */
309 struct _shlink_ *next; /* next in linked list */
310 } shlink;
311
312 # define SLCHUNKSZ 64
313
314 typedef struct _slchunk_ {
315 int chunksz; /* size of chunk */
316 struct _slchunk_ *next; /* next in linked list */
317 shlink sl[SLCHUNKSZ]; /* shlinks */
318 } slchunk;
319
320 /*
321 * NAME: shlink->hash()
322 * DESCRIPTION: put a new shlink in the hash table, or return an old one
323 */
sl_hash(shlink ** htab,Uint htabsize,slchunk ** c,char * shtab,char * shifts,Uint n)324 static shlink *sl_hash(shlink **htab, Uint htabsize, slchunk **c, char *shtab, char *shifts, Uint n)
325 {
326 unsigned long h;
327 Uint i;
328 shlink **ssl, *sl;
329
330 /* search in hash table */
331 shifts += 5;
332 n -= 5;
333 h = 0;
334 for (i = n; i > 0; --i) {
335 h = (h >> 3) ^ (h << 29) ^ UCHAR(*shifts++);
336 }
337 shifts -= n;
338 ssl = &htab[h % htabsize];
339 while (*ssl != (shlink *) NULL) {
340 if (memcmp(shtab + (*ssl)->shifts + 5, shifts, n) == 0) {
341 /* seen this one before */
342 return *ssl;
343 }
344 ssl = &(*ssl)->next;
345 }
346
347 if (*c == (slchunk *) NULL || (*c)->chunksz == SLCHUNKSZ) {
348 slchunk *x;
349
350 x = ALLOC(slchunk, 1);
351 x->next = *c;
352 *c = x;
353 x->chunksz = 0;
354 }
355 sl = &(*c)->sl[(*c)->chunksz++];
356 sl->next = *ssl;
357 *ssl = sl;
358 sl->shifts = NOSHIFT;
359
360 return sl;
361 }
362
363 /*
364 * NAME: shlink->clear()
365 * DESCRIPTION: clean up shlinks
366 */
sl_clear(slchunk * c)367 static void sl_clear(slchunk *c)
368 {
369 slchunk *f;
370
371 while (c != (slchunk *) NULL) {
372 f = c;
373 c = c->next;
374 FREE(f);
375 }
376 }
377
378
379 struct _srp_ {
380 char *grammar; /* grammar */
381 unsigned short nsstring; /* # of source grammar strings */
382 unsigned short ntoken; /* # of tokens (regexp & string) */
383 unsigned short nprod; /* # of nonterminals */
384
385 Uint nred; /* # of reductions */
386 Uint nitem; /* # of items */
387 Uint srpsize; /* size of shift/reduce parser */
388 Uint tmpsize; /* size of temporary data */
389 bool modified; /* srp needs saving */
390 bool allocated; /* srp allocated */
391 char *srpstr; /* srp string */
392 char *tmpstr; /* tmp string */
393
394 unsigned short nstates; /* number of states */
395 unsigned short nexpanded; /* number of expanded states */
396 Uint sttsize; /* state table size */
397 Uint sthsize; /* state hash table size */
398 srpstate *states; /* state array */
399 unsigned short *sthtab; /* state hash table */
400
401 itchunk *itc; /* item chunk */
402
403 Uint gap; /* first gap in packed mapping */
404 Uint spread; /* max spread in packed mapping */
405 Uint mapsize; /* packed mapping size */
406 char *data; /* packed shifts */
407 char *check; /* packed check for shift validity */
408 bool alloc; /* data and check allocated separately? */
409
410 slchunk *slc; /* shlink chunk */
411 Uint nshift; /* number of shifts (from/to pairs) */
412 Uint shtsize; /* shift table size */
413 Uint shhsize; /* shift hash table size */
414 char *shtab; /* shift (from/to) table */
415 shlink **shhtab; /* shift hash table */
416 };
417
418 # define SRP_VERSION 1
419
420 /*
421 * NAME: srp->new()
422 * DESCRIPTION: create new shift/reduce parser
423 */
srp_new(char * grammar)424 srp *srp_new(char *grammar)
425 {
426 srp *lr;
427 char *p;
428 Uint nrule;
429
430 lr = ALLOC(srp, 1);
431
432 /* grammar info */
433 lr->grammar = grammar;
434 lr->nsstring = (UCHAR(grammar[9]) << 8) + UCHAR(grammar[10]);
435 lr->ntoken = lr->nsstring +
436 ((UCHAR(grammar[5]) + UCHAR(grammar[11])) << 8) +
437 UCHAR(grammar[6]) + UCHAR(grammar[12]);
438 lr->nprod = (UCHAR(grammar[13]) << 8) + UCHAR(grammar[14]);
439 nrule = (UCHAR(grammar[15]) << 8) + UCHAR(grammar[16]);
440
441 /* sizes */
442 lr->srpstr = (char *) NULL;
443 lr->tmpstr = (char *) NULL;
444 lr->nred = lr->nitem = 0;
445 lr->srpsize = 14 + 12 + 4; /* srp header + 1 state + data/check overhead */
446 lr->tmpsize = 7 + 5; /* tmp header + 1 item */
447 lr->modified = TRUE;
448 lr->allocated = FALSE;
449
450 /* states */
451 lr->nstates = 1;
452 lr->nexpanded = 0;
453 lr->sttsize = nrule << 1;
454 lr->sthsize = nrule << 2;
455 lr->states = ALLOC(srpstate, lr->sttsize);
456 lr->sthtab = ALLOC(unsigned short, lr->sthsize);
457 memset(lr->sthtab, '\0', lr->sthsize * sizeof(unsigned short));
458 lr->itc = (itchunk *) NULL;
459
460 /* state 0 */
461 p = grammar + 17 + ((lr->ntoken + lr->nsstring) << 1);
462 p = grammar + (UCHAR(p[0]) << 8) + UCHAR(p[1]);
463 lr->states[0].items = it_new(&lr->itc, p + 2, lr->ntoken, 0, (item *) NULL);
464 lr->states[0].nitem = 1;
465 lr->states[0].nred = UNEXPANDED;
466 lr->states[0].shoffset = NOSHIFT;
467 lr->states[0].gtoffset = 0;
468 lr->states[0].shcheck = 0;
469 lr->states[0].alloc = FALSE;
470
471 /* packed mapping for shift */
472 lr->gap = lr->spread = 0;
473 lr->mapsize = (Uint) (lr->ntoken + lr->nprod) << 2;
474 lr->data = ALLOC(char, lr->mapsize);
475 memset(lr->data, '\0', lr->mapsize);
476 lr->check = ALLOC(char, lr->mapsize);
477 memset(lr->check, '\xff', lr->mapsize);
478 lr->alloc = TRUE;
479
480 /* shift hash table */
481 lr->slc = (slchunk *) NULL;
482 lr->nshift = 0;
483 lr->shtsize = lr->mapsize;
484 lr->shhsize = nrule << 2;
485 lr->shtab = ALLOC(char, lr->shtsize);
486 lr->shhtab = ALLOC(shlink*, lr->shhsize);
487 memset(lr->shhtab, '\0', lr->shhsize * sizeof(shlink*));
488
489 return lr;
490 }
491
492 /*
493 * NAME: srp->del()
494 * DESCRIPTION: delete shift/reduce parser
495 */
srp_del(srp * lr)496 void srp_del(srp *lr)
497 {
498 unsigned short i;
499 srpstate *state;
500
501 if (lr->allocated) {
502 FREE(lr->srpstr);
503 }
504 it_clear(lr->itc);
505 if (lr->sthtab != (unsigned short *) NULL) {
506 FREE(lr->sthtab);
507 }
508 for (i = lr->nstates, state = lr->states; i > 0; --i, state++) {
509 if (state->alloc) {
510 FREE(state->reds.a);
511 }
512 }
513 FREE(lr->states);
514 if (lr->alloc) {
515 FREE(lr->data);
516 FREE(lr->check);
517 }
518 sl_clear(lr->slc);
519 if (lr->shtab != (char *) NULL) {
520 FREE(lr->shtab);
521 }
522 if (lr->shhtab != (shlink **) NULL) {
523 FREE(lr->shhtab);
524 }
525 FREE(lr);
526 }
527
528 /*
529 * format of SRP permanent data:
530 *
531 * header [1] version number
532 * [x][y] # states
533 * [x][y] # expanded states
534 * [x][y][z] # reductions
535 * [x][y][z] initial gap in packed mapping
536 * [x][y][z] max spread of packed mapping
537 *
538 * state [x][y] # items }
539 * [x][y] # reductions }
540 * [x][y][z] shift offset in packed mapping } ...
541 * [x][y][z] goto offset in packed mapping }
542 * [x][y] shift check }
543 *
544 * reduction [x][y] rule offset in grammar } ...
545 * [x][y] rule number }
546 *
547 * mapping [...] data table (spread + 2)
548 * [...] check table (spread + 2)
549 *
550 *
551 * format of SRP temporary data:
552 *
553 * header [0] version number
554 * [x][y][z] # items
555 * [x][y][z] shift table size
556 *
557 * item [x][y] rule offset in grammar }
558 * [x][y] rule number } ...
559 * [x] offset in rule }
560 *
561 * shift [...] shift table
562 */
563
564 /*
565 * NAME: srp->load()
566 * DESCRIPTION: load a shift/reduce parser from string
567 */
srp_load(char * grammar,char * str,Uint len)568 srp *srp_load(char *grammar, char *str, Uint len)
569 {
570 srp *lr;
571 char *buf;
572 Uint i;
573 srpstate *state;
574 char *rbuf;
575
576 if (UCHAR(str[0]) != SRP_VERSION) {
577 return srp_new(grammar);
578 }
579
580 lr = ALLOC(srp, 1);
581
582 /* grammar info */
583 lr->grammar = grammar;
584 lr->nsstring = (UCHAR(grammar[9]) << 8) + UCHAR(grammar[10]);
585 lr->ntoken = lr->nsstring +
586 ((UCHAR(grammar[5]) + UCHAR(grammar[11])) << 8) +
587 UCHAR(grammar[6]) + UCHAR(grammar[12]);
588 lr->nprod = (UCHAR(grammar[13]) << 8) + UCHAR(grammar[14]);
589
590 lr->srpstr = buf = str;
591
592 /* header */
593 lr->nstates = (UCHAR(buf[1]) << 8) + UCHAR(buf[2]);
594 lr->nexpanded = (UCHAR(buf[3]) << 8) + UCHAR(buf[4]);
595 lr->nred = ((Uint) UCHAR(buf[5]) << 16) + (UCHAR(buf[6]) << 8) +
596 UCHAR(buf[7]);
597 lr->gap = ((Uint) UCHAR(buf[8]) << 16) + (UCHAR(buf[9]) << 8) +
598 UCHAR(buf[10]);
599 lr->spread = ((Uint) UCHAR(buf[11]) << 16) + (UCHAR(buf[12]) << 8) +
600 UCHAR(buf[13]);
601 buf += 14;
602
603 /* states */
604 lr->sttsize = lr->nstates + 1;
605 lr->sthsize = 0;
606 lr->states = ALLOC(srpstate, lr->sttsize);
607 lr->sthtab = (unsigned short *) NULL;
608 lr->itc = (itchunk *) NULL;
609
610 /* load states */
611 rbuf = buf + lr->nstates * 12;
612 for (i = lr->nstates, state = lr->states; i > 0; --i, state++) {
613 buf = ss_load(buf, &rbuf, state);
614 }
615 buf = rbuf;
616
617 /* load packed mapping */
618 lr->mapsize = lr->spread + 2;
619 lr->data = buf;
620 buf += lr->spread + 2;
621 lr->check = buf;
622 buf += lr->spread + 2;
623 lr->alloc = FALSE;
624
625 lr->tmpstr = buf;
626
627 /* sizes */
628 lr->nitem = 0;
629 lr->srpsize = (intptr_t) buf - (intptr_t) str;
630 lr->tmpsize = len - lr->srpsize;
631 lr->modified = lr->allocated = FALSE;
632
633 /* shift hash table */
634 lr->slc = (slchunk *) NULL;
635 lr->nshift = 0;
636 lr->shtsize = 0;
637 lr->shhsize = 0;
638 lr->shtab = (char *) NULL;
639 lr->shhtab = (shlink **) NULL;
640
641 return lr;
642 }
643
644 /*
645 * NAME: srp->loadtmp()
646 * DESCRIPTION: load the temporary data for a shift/reduce parser
647 */
srp_loadtmp(srp * lr)648 static void srp_loadtmp(srp *lr)
649 {
650 Uint i, n;
651 srpstate *state;
652 char *p;
653 Uint nrule;
654 char *buf;
655
656 nrule = (UCHAR(lr->grammar[15]) << 8) + UCHAR(lr->grammar[16]);
657
658 buf = lr->tmpstr;
659 lr->nitem = ((Uint) UCHAR(buf[1]) << 16) + (UCHAR(buf[2]) << 8) +
660 UCHAR(buf[3]);
661 lr->nshift = ((Uint) UCHAR(buf[4]) << 16) + (UCHAR(buf[5]) << 8) +
662 UCHAR(buf[6]);
663 buf += 7;
664
665 /* states */
666 lr->sthsize = nrule << 2;
667 lr->sthtab = ALLOC(unsigned short, lr->sthsize);
668 memset(lr->sthtab, '\0', lr->sthsize * sizeof(unsigned short));
669 for (i = 0, state = lr->states; i < lr->nstates; i++, state++) {
670 if (state->nitem != 0) {
671 state->items = it_load(&lr->itc, state->nitem, &buf, lr->grammar);
672 }
673 ss_hash(lr->sthtab, lr->sthsize, lr->states, (unsigned short) i);
674 }
675
676 /* shifts */
677 lr->shtsize = lr->nshift * 2;
678 lr->shhsize = nrule << 2;
679 lr->shtab = ALLOC(char, lr->shtsize);
680 memcpy(lr->shtab, buf, lr->nshift);
681 lr->shhtab = ALLOC(shlink*, lr->shhsize);
682 memset(lr->shhtab, '\0', lr->shhsize * sizeof(shlink*));
683 for (i = 0, p = buf; i != lr->nshift; i += n, p += n) {
684 n = (Uint) 4 * ((UCHAR(p[5]) << 8) + UCHAR(p[6])) + 7;
685 sl_hash(lr->shhtab, lr->shhsize, &lr->slc, lr->shtab, p, n)->shifts =
686 (intptr_t) p - (intptr_t) buf;
687 }
688 }
689
690 /*
691 * NAME: srp->save()
692 * DESCRIPTION: save a shift/reduce parser to string
693 */
srp_save(srp * lr,char ** str,Uint * len)694 bool srp_save(srp *lr, char **str, Uint *len)
695 {
696 char *buf;
697 unsigned short i;
698 srpstate *state;
699 char *rbuf;
700
701 if (!lr->modified) {
702 *str = lr->srpstr;
703 *len = lr->srpsize + lr->tmpsize;
704 return FALSE;
705 }
706
707 if (lr->nstates == lr->nexpanded) {
708 lr->tmpsize = 0;
709 }
710 if (lr->allocated) {
711 FREE(lr->srpstr);
712 }
713 lr->srpstr = buf = *str = ALLOC(char, *len = lr->srpsize + lr->tmpsize);
714
715 /* header */
716 *buf++ = SRP_VERSION;
717 *buf++ = lr->nstates >> 8;
718 *buf++ = lr->nstates;
719 *buf++ = lr->nexpanded >> 8;
720 *buf++ = lr->nexpanded;
721 *buf++ = lr->nred >> 16;
722 *buf++ = lr->nred >> 8;
723 *buf++ = lr->nred;
724 *buf++ = lr->gap >> 16;
725 *buf++ = lr->gap >> 8;
726 *buf++ = lr->gap;
727 *buf++ = lr->spread >> 16;
728 *buf++ = lr->spread >> 8;
729 *buf++ = lr->spread;
730
731 /* save states */
732 rbuf = buf + lr->nstates * 12;
733 for (i = lr->nstates, state = lr->states; i > 0; --i, state++) {
734 buf = ss_save(state, buf, &rbuf);
735 }
736 buf = rbuf;
737
738 /* save packed mapping */
739 memcpy(buf, lr->data, lr->spread + 2);
740 buf += lr->spread + 2;
741 memcpy(buf, lr->check, lr->spread + 2);
742 buf += lr->spread + 2;
743
744 lr->modified = FALSE;
745 lr->allocated = TRUE;
746 if (lr->tmpsize == 0) {
747 /* no tmp data */
748 return TRUE;
749 }
750
751 lr->tmpstr = buf;
752
753 /* tmp header */
754 *buf++ = 0;
755 *buf++ = lr->nitem >> 16;
756 *buf++ = lr->nitem >> 8;
757 *buf++ = lr->nitem;
758 *buf++ = lr->nshift >> 16;
759 *buf++ = lr->nshift >> 8;
760 *buf++ = lr->nshift;
761
762 /* save items */
763 for (i = lr->nstates, state = lr->states; i > 0; --i, state++) {
764 buf = it_save(state->items, buf, lr->grammar);
765 }
766
767 /* shift data */
768 memcpy(buf, lr->shtab, lr->nshift);
769
770 return TRUE;
771 }
772
773 /*
774 * NAME: srp->pack()
775 * DESCRIPTION: add a new set of shifts and gotos to the packed mapping
776 */
srp_pack(srp * lr,unsigned short * check,unsigned short * from,unsigned short * to,unsigned short n)777 static Int srp_pack(srp *lr, unsigned short *check, unsigned short *from, unsigned short *to, unsigned short n)
778 {
779 Uint i, j;
780 char *p;
781 char *shifts;
782 shlink *sl;
783 Uint range, *offstab;
784 Int offset;
785
786 /*
787 * check hash table
788 */
789 shifts = ALLOCA(char, j = (Uint) 4 * n + 7);
790 p = shifts + 5;
791 *p++ = n >> 8;
792 *p++ = n;
793 for (i = 0; i < n; i++) {
794 *p++ = from[i] >> 8;
795 *p++ = from[i];
796 *p++ = to[i] >> 8;
797 *p++ = to[i];
798 }
799 sl = sl_hash(lr->shhtab, lr->shhsize, &lr->slc, lr->shtab, shifts, j);
800 if (sl->shifts != NOSHIFT) {
801 /* same as before */
802 AFREE(shifts);
803 p = lr->shtab + sl->shifts;
804 *check = (UCHAR(p[0]) << 8) + UCHAR(p[1]);
805 return ((Int) SCHAR(p[2]) << 16) + ((UCHAR(p[3]) << 8) + UCHAR(p[4]));
806 }
807
808 /* not in hash table */
809 if (lr->nshift + j > lr->shtsize) {
810 /* grow shift table */
811 i = (lr->nshift + j) * 2;
812 lr->shtab = REALLOC(lr->shtab, char, lr->shtsize, i);
813 lr->shtsize = i;
814 }
815 sl->shifts = lr->nshift;
816 lr->nshift += j;
817 lr->tmpsize += j;
818 memcpy(lr->shtab + sl->shifts, shifts, j);
819 AFREE(shifts);
820
821 /* create offset table */
822 offstab = ALLOCA(Uint, n);
823 for (i = 0; i < n; i++) {
824 offstab[i] = from[i] * 2;
825 }
826 j = offset = offstab[0];
827 for (i = 1; i < n; i++) {
828 offstab[i] -= j;
829 j += offstab[i];
830 }
831 range = j - offset + 2;
832
833 /*
834 * add from/to pairs to packed mapping
835 */
836 for (i = lr->gap, p = &lr->check[i];
837 UCHAR(p[0]) != 0xff || UCHAR(p[1]) != 0xff;
838 i += 2, p += 2) ;
839 lr->gap = i;
840
841 next:
842 if (i + range >= lr->mapsize) {
843 /* grow tables */
844 j = (i + range) << 1;
845 if (lr->alloc) {
846 lr->data = REALLOC(lr->data, char, lr->mapsize, j);
847 lr->check = REALLOC(lr->check, char, lr->mapsize, j);
848 } else {
849 char *table;
850
851 table = ALLOC(char, j);
852 memcpy(table, lr->data, lr->mapsize);
853 lr->data = table;
854 table = ALLOC(char, j);
855 memcpy(table, lr->check, lr->mapsize);
856 lr->check = table;
857 lr->alloc = TRUE;
858 }
859 memset(lr->data + lr->mapsize, '\0', j - lr->mapsize);
860 memset(lr->check + lr->mapsize, '\xff', j - lr->mapsize);
861 lr->mapsize = j;
862 }
863
864 /* match each symbol with free slot */
865 for (j = 1; j < n; j++) {
866 p += offstab[j];
867 if (UCHAR(p[0]) != 0xff || UCHAR(p[1]) != 0xff) {
868 p = &lr->check[i];
869 do {
870 i += 2;
871 p += 2;
872 } while (UCHAR(p[0]) != 0xff || UCHAR(p[1]) != 0xff);
873 goto next;
874 }
875 }
876 AFREE(offstab);
877
878 /* free slots found: adjust spread */
879 if (i + range > lr->spread) {
880 lr->srpsize += 2 * (i + range - lr->spread);
881 lr->spread = i + range;
882 }
883
884 /* add to packed mapping */
885 offset = i - offset;
886 do {
887 j = from[--n] * 2 + offset;
888 p = &lr->data[j];
889 *p++ = to[n] >> 8;
890 *p = to[n];
891 p = &lr->check[j];
892 *p++ = *check >> 8;
893 *p = *check;
894 } while (n != 0);
895
896 p = lr->shtab + sl->shifts;
897 offset /= 2;
898 *p++ = *check >> 8;
899 *p++ = *check;
900 *p++ = offset >> 16;
901 *p++ = offset >> 8;
902 *p = offset;
903 return offset;
904 }
905
906 /*
907 * NAME: cmp()
908 * DESCRIPTION: compare two unsigned shorts
909 */
cmp(cvoid * sh1,cvoid * sh2)910 static int cmp(cvoid *sh1, cvoid *sh2)
911 {
912 return (*(unsigned short *) sh1 < *(unsigned short *) sh2) ?
913 -1 : (*(unsigned short *) sh1 == *(unsigned short *) sh2) ? 0 : 1;
914 }
915
916 /*
917 * NAME: srp->expand()
918 * DESCRIPTION: expand a state
919 */
srp_expand(srp * lr,srpstate * state)920 static srpstate *srp_expand(srp *lr, srpstate *state)
921 {
922 unsigned short i, n;
923 char *p;
924 item *it;
925 item **itemtab, *next;
926 unsigned short *tokens, *symbols, *targets;
927 srpstate *newstate;
928 unsigned short nred, nshift, ngoto;
929
930 lr->modified = TRUE;
931 if (state - lr->states == 1) {
932 /* final state */
933 state->nred = 0;
934 lr->nexpanded++;
935 return state;
936 }
937
938 if (lr->sthtab == (unsigned short *) NULL) {
939 srp_loadtmp(lr); /* load tmp info */
940 }
941
942 n = lr->ntoken + lr->nprod;
943 itemtab = ALLOCA(item*, n);
944 memset(itemtab, '\0', n * sizeof(item*));
945 symbols = ALLOCA(unsigned short, n);
946 targets = ALLOCA(unsigned short, n);
947 tokens = ALLOCA(unsigned short, lr->ntoken);
948 nred = nshift = ngoto = 0;
949
950 /*
951 * compute closure of kernel item set
952 */
953 for (it = state->items; it != (item *) NULL; it = it->next) {
954 i = it->offset;
955 p = it->ref + 1;
956 if (i == UCHAR(*p++)) {
957 /* end of production */
958 nred++;
959 } else {
960 p += i;
961 n = (UCHAR(p[0]) << 8) + UCHAR(p[1]);
962 if (n >= lr->ntoken) {
963 p = lr->grammar + 17 + ((n + lr->nsstring) << 1);
964 p = lr->grammar + (UCHAR(p[0]) << 8) + UCHAR(p[1]);
965 for (i = (UCHAR(p[0]) << 8) + UCHAR(p[1]), p += 2; i > 0; --i) {
966 it_add(&lr->itc, &state->items, p, n, 0, FALSE);
967 p += UCHAR(p[1]) + 2;
968 }
969 }
970 }
971 }
972
973 state->nred = nred;
974 if (nred != 0) {
975 if (nred > 1) {
976 state->reds.a = ALLOC(char, (Uint) nred << 2);
977 state->alloc = TRUE;
978 }
979 lr->nred += nred;
980 lr->srpsize += (Uint) nred << 2;
981 nred = 0;
982 }
983
984 /*
985 * compute reductions and shifts
986 */
987 if (state == lr->states) {
988 symbols[ngoto++] = lr->ntoken;
989 }
990 for (it = state->items; it != (item *) NULL; it = it->next) {
991 p = it->ref;
992 if (it->offset == UCHAR(p[1])) {
993 /* reduction */
994 n = p - lr->grammar;
995 p = &REDA(state)[(Uint) nred++ << 2];
996 *p++ = n >> 8;
997 *p++ = n;
998 *p++ = it->ruleno >> 8;
999 *p = it->ruleno;
1000 } else {
1001 /* shift/goto */
1002 p += 2 + it->offset;
1003 n = (UCHAR(p[0]) << 8) + UCHAR(p[1]);
1004 if (itemtab[n] == (item *) NULL) {
1005 if (n < lr->ntoken) {
1006 tokens[nshift++] = n;
1007 } else {
1008 symbols[ngoto++] = n;
1009 }
1010 }
1011 it_add(&lr->itc, &itemtab[n], it->ref, it->ruleno, it->offset + 2,
1012 TRUE);
1013 }
1014 }
1015
1016 /*
1017 * delete non-kernel items
1018 */
1019 for (it = state->items, i = state->nitem; --i > 0; it = it->next) ;
1020 next = it->next;
1021 it->next = (item *) NULL;
1022 for (it = next; it != (item *) NULL; it = it_del(lr->itc, it)) ;
1023
1024 /*
1025 * sort and merge token and goto tables
1026 */
1027 qsort(symbols, ngoto, sizeof(unsigned short), cmp);
1028 memcpy(symbols + ngoto, tokens, nshift * sizeof(unsigned short));
1029 AFREE(tokens);
1030 tokens = symbols + ngoto;
1031 qsort(tokens, nshift, sizeof(unsigned short), cmp);
1032
1033 /*
1034 * create target table
1035 */
1036 for (i = 0; i < nshift + ngoto; i++) {
1037 newstate = &lr->states[lr->nstates];
1038 newstate->items = itemtab[symbols[i]];
1039
1040 n = ss_hash(lr->sthtab, lr->sthsize, lr->states,
1041 (unsigned short) lr->nstates);
1042 targets[i] = n;
1043 if (n == lr->nstates) {
1044 /*
1045 * new state
1046 */
1047 n = 0;
1048 for (it = newstate->items; it != (item *) NULL; it = it->next) {
1049 n++;
1050 }
1051 lr->srpsize += 12;
1052 lr->nitem += n;
1053 lr->tmpsize += (Uint) 5 * n;
1054 newstate->nitem = n;
1055 newstate->nred = UNEXPANDED;
1056 newstate->shoffset = NOSHIFT;
1057 newstate->gtoffset = 0;
1058 newstate->shcheck = 0;
1059 newstate->alloc = FALSE;
1060
1061 if (++lr->nstates == lr->sttsize) {
1062 unsigned short save;
1063
1064 /* grow table */
1065 save = state - lr->states;
1066 lr->states = REALLOC(lr->states, srpstate, lr->nstates,
1067 lr->sttsize <<= 1);
1068 state = lr->states + save;
1069 }
1070 }
1071 }
1072
1073 /*
1074 * add shifts and gotos to packed mapping
1075 */
1076 if (nshift != 0) {
1077 state->shcheck = state - lr->states;
1078 state->shoffset = srp_pack(lr, &state->shcheck, tokens, targets + ngoto,
1079 nshift);
1080 }
1081 if (ngoto != 0) {
1082 unsigned short dummy;
1083
1084 dummy = -258;
1085 state->gtoffset = srp_pack(lr, &dummy, symbols, targets, ngoto);
1086 }
1087 AFREE(targets);
1088 AFREE(symbols);
1089 AFREE(itemtab);
1090
1091 lr->nexpanded++;
1092 return state;
1093 }
1094
1095 /*
1096 * NAME: srp->check()
1097 * DESCRIPTION: fetch reductions for a given state, possibly first expanding it
1098 */
srp_check(srp * lr,unsigned int num,unsigned short * nredp,char ** redp)1099 short srp_check(srp *lr, unsigned int num, unsigned short *nredp, char **redp)
1100 {
1101 srpstate *state;
1102
1103 state = &lr->states[num];
1104 if (state->nred < 0) {
1105 state = srp_expand(lr, state);
1106 if (lr->srpsize + lr->tmpsize > (Uint) MAX_AUTOMSZ * USHRT_MAX) {
1107 unsigned short save;
1108
1109 /*
1110 * too much temporary data: attempt to expand all states
1111 */
1112 save = state - lr->states;
1113 for (state = lr->states; lr->nstates != lr->nexpanded; state++) {
1114 if (lr->nstates > SHRT_MAX ||
1115 lr->srpsize > (Uint) MAX_AUTOMSZ * USHRT_MAX) {
1116 return -1; /* too big */
1117 }
1118 if (state->nred < 0) {
1119 state = srp_expand(lr, state);
1120 }
1121 }
1122 state = &lr->states[save];
1123 }
1124 if (lr->nstates > SHRT_MAX ||
1125 lr->srpsize > (Uint) MAX_AUTOMSZ * USHRT_MAX) {
1126 return -1; /* too big */
1127 }
1128 }
1129
1130 *nredp = state->nred;
1131 *redp = REDA(state);
1132 return lr->nstates;
1133 }
1134
1135 /*
1136 * NAME: srp->shift()
1137 * DESCRIPTION: shift to a new state, if possible
1138 */
srp_shift(srp * lr,unsigned int num,unsigned int token)1139 short srp_shift(srp *lr, unsigned int num, unsigned int token)
1140 {
1141 Int n;
1142 char *p;
1143 srpstate *state;
1144
1145 state = &lr->states[num];
1146 n = state->shoffset;
1147 if (n != NOSHIFT) {
1148 n = (n + (Int) token) * 2;
1149 if (n >= 0 && n < lr->mapsize) {
1150 p = &lr->check[n];
1151 if ((UCHAR(p[0]) << 8) + UCHAR(p[1]) == state->shcheck) {
1152 /* shift works: return new state */
1153 p = &lr->data[n];
1154 return (UCHAR(p[0]) << 8) + UCHAR(p[1]);
1155 }
1156 }
1157 }
1158
1159 /* shift failed */
1160 return -1;
1161 }
1162
1163 /*
1164 * NAME: srp->goto()
1165 * DESCRIPTION: goto a new state
1166 */
srp_goto(srp * lr,unsigned int num,unsigned int symb)1167 short srp_goto(srp *lr, unsigned int num, unsigned int symb)
1168 {
1169 char *p;
1170
1171 p = &lr->data[(lr->states[num].gtoffset + symb) * 2];
1172 return (UCHAR(p[0]) << 8) + UCHAR(p[1]);
1173 }
1174