1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22
23 /*
24 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
25 * Use is subject to license terms.
26 */
27
28 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
29 /* All Rights Reserved */
30
31 #include "awk.def"
32 #include "stdio.h"
33 #include "awk.h"
34 #include <stdlib.h>
35
36
37 extern NODE *op2();
38 extern struct fa *cgotofn();
39 #define MAXLIN 256
40 #define NCHARS 128
41 #define NSTATES 256
42
43
44 #define type(v) v->nobj
45 #define left(v) v->narg[0]
46 #define right(v) v->narg[1]
47 #define parent(v) v->nnext
48
49
50 #define LEAF case CCL: case NCCL: case CHAR: case DOT:
51 #define UNARY case FINAL: case STAR: case PLUS: case QUEST:
52
53
54 /*
55 * encoding in tree NODEs:
56 * leaf (CCL, NCCL, CHAR, DOT): left is index,
57 * right contains value or pointer to value
58 * unary (FINAL, STAR, PLUS, QUEST): left is child, right is null
59 * binary (CAT, OR): left and right are children
60 * parent contains pointer to parent
61 */
62
63
64 struct fa {
65 union {
66 ccl_chars_t s;
67 int h;
68 } cc;
69 #define MLCMPLT(m1, l1, m2, l2) ((m1 != m2 &&\
70 (int)m1 < (int)m2) ||\
71 (m1 == m2 && (int)l1 < (int)l2))
72 #define MLCMPLE(m1, l1, m2, l2) ((m1 != m2 &&\
73 (int)m1 <= (int)m2) ||\
74 (m1 == m2 && (int)l1 <= (int)l2))
75 #define MLCMPGT(m1, l1, m2, l2) ((m1 != m2 &&\
76 (int)m1 > (int)m2) ||\
77 (m1 == m2 && (int)l1 > (int)l2))
78 #define MAX_CODESET 3
79 struct fa *st;
80 };
81
82
83 int *state[NSTATES];
84 int *foll[MAXLIN];
85 int setvec[MAXLIN];
86 NODE *point[MAXLIN];
87
88
89 int setcnt;
90 int line;
91
92
93 static int ccln_member();
94 static int insert_table();
95 static int delete_table();
96 static void penter(NODE *p);
97 static void follow(NODE *v);
98 static void overflo(void);
99 static void cfoll(NODE *v);
100 static void freetr(NODE *p);
101 #ifdef DEBUG
102 #define ddump_table(t, s) dump_table(t, s)
103 #else
104 #define ddump_table(t, s)
105 #endif
106
107 struct fa *
makedfa(p)108 makedfa(p) /* returns dfa for tree pointed to by p */
109 NODE *p;
110 {
111 NODE *p1;
112 struct fa *fap;
113 p1 = op2(CAT, op2(STAR, op2(DOT, (NODE *) 0,
114 (NODE *) 0), (NODE *) 0), p);
115 /* put DOT STAR in front of reg. exp. */
116 p1 = op2(FINAL, p1, (NODE *) 0); /* install FINAL NODE */
117
118
119 line = 0;
120 penter(p1); /* enter parent pointers and leaf indices */
121 point[line] = p1; /* FINAL NODE */
122 setvec[0] = 1; /* for initial DOT STAR */
123 cfoll(p1); /* set up follow sets */
124 fap = cgotofn();
125 freetr(p1); /* add this when alloc works */
126 return (fap);
127 }
128
129 static void
penter(NODE * p)130 penter(NODE *p) /* set up parent pointers and leaf indices */
131 {
132 switch (type(p)) {
133 LEAF
134 left(p) = (NODE *)line;
135 point[line++] = p;
136 break;
137 UNARY
138 penter(left(p));
139 parent(left(p)) = p;
140 break;
141 case CAT:
142 case OR:
143 penter(left(p));
144 penter(right(p));
145 parent(left(p)) = p;
146 parent(right(p)) = p;
147 break;
148 default:
149 error(FATAL, "unknown type %d in penter\n", type(p));
150 break;
151 }
152 }
153
154 static void
freetr(NODE * p)155 freetr(NODE *p) /* free parse tree and follow sets */
156 {
157 switch (type(p)) {
158 LEAF
159 xfree(foll[(int)left(p)]);
160 xfree(p);
161 break;
162 UNARY
163 freetr(left(p));
164 xfree(p);
165 break;
166 case CAT:
167 case OR:
168 freetr(left(p));
169 freetr(right(p));
170 xfree(p);
171 break;
172 default:
173 error(FATAL, "unknown type %d in freetr", type(p));
174 break;
175 }
176 }
177 ccl_chars_t *
cclenter(wchar_t * p)178 cclenter(wchar_t *p)
179 {
180 int i, cn;
181 wchar_t c, pc;
182 wchar_t *op;
183 ccl_chars_t *new;
184 ccl_chars_t chars[MAXLIN];
185
186 op = p;
187 i = 0;
188 while ((c = *p++) != 0) {
189 if (c == '-' && i > 0) {
190 if (*p != 0) {
191 /*
192 * If there are not in same code set, the
193 * class should be ignore (make two independent
194 * characters)!
195 */
196 c = *p++;
197 cn = wcsetno(pc);
198 if (cn != wcsetno(c) || pc > c)
199 goto char_array;
200 i = insert_table(chars, i, cn, pc, cn, c);
201 continue;
202 }
203 }
204 char_array:
205 if (i >= MAXLIN)
206 overflo();
207 cn = wcsetno(c);
208 i = insert_table(chars, i, cn, c, cn, c);
209 pc = c;
210 }
211 dprintf("cclenter: in = |%ws|, ", op, NULL, NULL);
212 xfree(op);
213 i = (i + 1) * sizeof (ccl_chars_t);
214 if ((new = (ccl_chars_t *)malloc(i)) == NULL)
215 error(FATAL, "out of space in cclenter on %s", op);
216 (void) memcpy((char *)new, (char *)chars, i);
217 ddump_table(chars, i / 4);
218
219
220 return (new);
221 }
222
223 static void
overflo(void)224 overflo(void)
225 {
226 error(FATAL, "regular expression too long\n");
227 }
228
229 static void
cfoll(NODE * v)230 cfoll(NODE *v) /* enter follow set of each leaf of vertex v into foll[leaf] */
231 {
232 int i;
233 int prev;
234 int *add();
235
236
237 switch (type(v)) {
238 LEAF
239 setcnt = 0;
240 for (i = 1; i <= line; i++)
241 setvec[i] = 0;
242 follow(v);
243 foll[(int)left(v)] = add(setcnt);
244 break;
245 UNARY
246 cfoll(left(v));
247 break;
248 case CAT:
249 case OR:
250 cfoll(left(v));
251 cfoll(right(v));
252 break;
253 default:
254 error(FATAL, "unknown type %d in cfoll", type(v));
255 }
256 }
257
258 int
first(NODE * p)259 first(NODE *p) /* collects initially active leaves of p into setvec */
260 /* returns 0 or 1 depending on whether p matches empty string */
261 {
262 int b;
263
264
265 switch (type(p)) {
266 LEAF
267 if (setvec[(int)left(p)] != 1) {
268 setvec[(int)left(p)] = 1;
269 setcnt++;
270 }
271 if (type(p) == CCL &&
272 (*(ccl_chars_t *)right(p)).cc_cs == (wchar_t)0x0)
273 return (0); /* empty CCL */
274 else return (1);
275 case FINAL:
276 case PLUS:
277 if (first(left(p)) == 0)
278 return (0);
279 return (1);
280 case STAR:
281 case QUEST:
282 first(left(p));
283 return (0);
284 case CAT:
285 if (first(left(p)) == 0 && first(right(p)) == 0)
286 return (0);
287 return (1);
288 case OR:
289 b = first(right(p));
290 if (first(left(p)) == 0 || b == 0)
291 return (0);
292 return (1);
293 }
294 error(FATAL, "unknown type %d in first\n", type(p));
295 return (-1);
296 }
297
298 static void
follow(NODE * v)299 follow(NODE *v)
300 /* collects leaves that can follow v into setvec */
301 {
302 NODE *p;
303
304
305 if (type(v) == FINAL)
306 return;
307 p = parent(v);
308 switch (type(p)) {
309 case STAR:
310 case PLUS: first(v);
311 follow(p);
312 return;
313
314
315 case OR:
316 case QUEST: follow(p);
317 return;
318
319
320 case CAT: if (v == left(p)) { /* v is left child of p */
321 if (first(right(p)) == 0) {
322 follow(p);
323 return;
324 }
325 } else /* v is right child */
326 follow(p);
327 return;
328 case FINAL: if (setvec[line] != 1) {
329 setvec[line] = 1;
330 setcnt++;
331 }
332 return;
333 }
334 }
335
336
337 /*
338 * There are three type of functions for checking member ship. Because I have
339 * been changed structure of CCL tables. And some CCL tables end up with NULLs
340 * but someone has length and will includes NULLs in table as one of data.
341 * Please note, CCL table which has a length data and data will include NULLs,
342 * it only used within a this source file("b.c").
343 */
344
345 int /* is cs thru ce in s? */
ccl_member(int ns,wchar_t cs,int ne,wchar_t ce,ccl_chars_t * s)346 ccl_member(int ns, wchar_t cs, int ne, wchar_t ce, ccl_chars_t *s)
347 {
348 /*
349 * The specified range(cs, ce) must be beside the range between
350 * s->cc_start and s->cc_end to determine member.
351 */
352 while (s->cc_cs || s->cc_ce) {
353 if (MLCMPLE(s->cc_ns, s->cc_cs, ns, cs) &&
354 MLCMPLE(ne, ce, s->cc_ne, s->cc_ce))
355 return (1);
356 s++;
357 }
358 return (0);
359 }
360
361
362 static int /* is cs thru ce in s? */
ccln_member(int ns,wchar_t cs,int ne,wchar_t ce,ccl_chars_t * s,int n)363 ccln_member(int ns, wchar_t cs, int ne, wchar_t ce, ccl_chars_t *s, int n)
364 {
365 /*
366 * The specified range(cs, ce) must be beside the range between
367 * s->cc_start and s->cc_end to determine member.
368 */
369 while (n-- > 0) {
370 if (MLCMPLE(s->cc_ns, s->cc_cs, ns, cs) &&
371 MLCMPLE(ne, ce, s->cc_ne, s->cc_ce))
372 return (1);
373 s++;
374 }
375 return (0);
376 }
377
378
379 int
member(wchar_t c,wchar_t * s)380 member(wchar_t c, wchar_t *s) /* is c in s? */
381 {
382 while (*s)
383 if (c == *s++)
384 return (1);
385 return (0);
386 }
387
388 int
notin(int ** array,int n,int * prev)389 notin(int **array, int n, int *prev) /* is setvec in array[0] thru array[n]? */
390 {
391 int i, j;
392 int *ptr;
393 for (i = 0; i <= n; i++) {
394 ptr = array[i];
395 if (*ptr == setcnt) {
396 for (j = 0; j < setcnt; j++)
397 if (setvec[*(++ptr)] != 1) goto nxt;
398 *prev = i;
399 return (0);
400 }
401 nxt: /* dummy */;
402 }
403 return (1);
404 }
405
406
407 int *
add(int n)408 add(int n)
409 { /* remember setvec */
410 int *ptr, *p;
411 int i;
412 if ((p = ptr = (int *)malloc((n+1)*sizeof (int))) == NULL)
413 overflo();
414 *ptr = n;
415 dprintf("add(%d)\n", n, NULL, NULL);
416 for (i = 1; i <= line; i++)
417 if (setvec[i] == 1) {
418 *(++ptr) = i;
419 dprintf(" ptr = %o, *ptr = %d, i = %d\n", ptr, *ptr, i);
420 }
421 dprintf("\n", NULL, NULL, NULL);
422 return (p);
423 }
424
425
426 struct fa *
cgotofn()427 cgotofn()
428 {
429 int i, k;
430 int *ptr;
431 int ns, ne;
432 wchar_t cs, ce;
433 ccl_chars_t *p;
434 NODE *cp;
435 int j, n, s, ind, numtrans;
436 int finflg;
437 int curpos, num, prev;
438 struct fa *where[NSTATES];
439
440
441 struct {
442 ccl_chars_t cc;
443 int n;
444 } fatab[257];
445 struct fa *pfa;
446
447
448 char index[MAXLIN];
449 char iposns[MAXLIN];
450 int sposns[MAXLIN];
451 int spmax, spinit;
452 ccl_chars_t symbol[NCHARS];
453 ccl_chars_t isyms[NCHARS];
454 ccl_chars_t ssyms[NCHARS];
455 int ssmax, symax, ismax, ssinit;
456
457
458 wchar_t hat;
459 int hatcn;
460
461
462 for (i = 0; i <= line; i++) index[i] = iposns[i] = setvec[i] = 0;
463 isyms[0].cc_cs = isyms[0].cc_ce = (wchar_t)0x0;
464 for (i = 0; i < NCHARS; i++)
465 isyms[i] = symbol[i] = ssyms[i] = isyms[0];
466 symax = 0;
467 setcnt = 0;
468 /* compute initial positions and symbols of state 0 */
469 ismax = 0;
470 ssmax = 0;
471 ptr = state[0] = foll[0];
472 spinit = *ptr;
473 hat = HAT;
474 hatcn = wcsetno(hat);
475 for (i = 0; i < spinit; i++) {
476 curpos = *(++ptr);
477 sposns[i] = curpos;
478 iposns[curpos] = 1;
479 cp = point[curpos];
480 dprintf("i= %d, spinit = %d, curpos = %d\n", i, spinit, curpos);
481 switch (type(cp)) {
482 case CHAR:
483 k = (int)right(cp);
484 ns = wcsetno(k);
485 if (! ccln_member(ns, k, ns, k,
486 isyms, ismax)) {
487 ismax = insert_table(isyms, ismax,
488 ns, k, ns, k);
489 }
490 ssyms[ssmax].cc_ns = ns;
491 ssyms[ssmax].cc_cs = k;
492 ssyms[ssmax].cc_ne = ns;
493 ssyms[ssmax++].cc_ce = k;
494 break;
495 case DOT:
496 cs = WC_VERY_SMALL;
497 ns = 0;
498 ce = HAT - 1;
499 ne = hatcn;
500 if (! ccln_member(ns, cs, ne, ce,
501 isyms, ismax)) {
502 ismax = insert_table(isyms, ismax,
503 ns, cs, ne, ce);
504 }
505 ssyms[ssmax].cc_cs = cs;
506 ssyms[ssmax].cc_ns = ns;
507 ssyms[ssmax].cc_ce = ce;
508 ssyms[ssmax++].cc_ne = ne;
509 cs = HAT + 1;
510 ns = hatcn;
511 ce = WC_VERY_LARGE;
512 ne = MAX_CODESET;
513 if (! ccln_member(ns, cs, ne, ce,
514 isyms, ismax)) {
515 ismax = insert_table(isyms, ismax,
516 ns, cs, ne, ce);
517 }
518 ssyms[ssmax].cc_cs = cs;
519 ssyms[ssmax].cc_ns = ns;
520 ssyms[ssmax].cc_ce = ce;
521 ssyms[ssmax++].cc_ne = ne;
522 break;
523 case CCL:
524 cs = HAT;
525 ns = hatcn;
526 for (p = (ccl_chars_t *)right(cp);
527 p->cc_cs; p++) {
528 if ((p->cc_ns != ns ||\
529 p->cc_cs != cs) &&\
530 !ccln_member(p->cc_ns, p->cc_cs,
531 p->cc_ne, p->cc_ce, isyms, ismax)) {
532 ismax = insert_table(isyms,
533 ismax, p->cc_ns, p->cc_cs, p->cc_ne, p->cc_ce);
534 }
535 ssyms[ssmax++] = *p;
536 }
537 break;
538 case NCCL:
539 ns = 0;
540 cs = WC_VERY_SMALL;
541 for (p = (ccl_chars_t *)right(cp);
542 p->cc_cs; p++) {
543 if ((ns != hatcn || p->cc_cs != HAT) &&
544 ! ccln_member(ns, cs,
545 p->cc_ns, p->cc_cs-1,
546 isyms, ismax)) {
547 ismax = insert_table(isyms,
548 ismax,
549 ns, cs,
550 p->cc_ns,
551 p->cc_cs-1);
552 }
553 ssyms[ssmax].cc_ns = ns;
554 ssyms[ssmax].cc_cs = cs;
555 ssyms[ssmax].cc_ne = p->cc_ns;
556 ssyms[ssmax++].cc_ce = p->cc_cs-1;
557 if (p->cc_ce == (wchar_t)0x0) {
558 ns = p->cc_ns;
559 cs = p->cc_cs + 1;
560
561 } else {
562 ns = p->cc_ne;
563 cs = p->cc_ce + 1;
564 }
565 }
566 if ((ns != hatcn || cs != HAT) &&
567 ! ccln_member(ns, cs,
568 MAX_CODESET, WC_VERY_LARGE,
569 isyms, ismax)) {
570 ismax = insert_table(isyms, ismax,
571 ns, cs, MAX_CODESET,
572 WC_VERY_LARGE);
573 }
574 ssyms[ssmax].cc_ns = ns;
575 ssyms[ssmax].cc_cs = cs;
576 ssyms[ssmax].cc_ne = MAX_CODESET;
577 ssyms[ssmax++].cc_ce = WC_VERY_LARGE;
578 break;
579 }
580 }
581 ssinit = ssmax;
582 symax = 0;
583 n = 0;
584 for (s = 0; s <= n; s++) {
585 dprintf("s = %d\n", s, NULL, NULL);
586 ind = 0;
587 numtrans = 0;
588 finflg = 0;
589 if (*(state[s] + *state[s]) == line) { /* s final? */
590 finflg = 1;
591 goto tenter;
592 }
593 spmax = spinit;
594 ssmax = ssinit;
595 ptr = state[s];
596 num = *ptr;
597 for (i = 0; i < num; i++) {
598 curpos = *(++ptr);
599 if (iposns[curpos] != 1 && index[curpos] != 1) {
600 index[curpos] = 1;
601 sposns[spmax++] = curpos;
602 }
603 cp = point[curpos];
604 switch (type(cp)) {
605 case CHAR:
606 k = (int)right(cp);
607 ns = wcsetno(k);
608 if (! ccln_member(ns, k, ns, k,
609 isyms, ismax) &&
610 ! ccln_member(ns, k, ns, k,
611 symbol, symax)) {
612 symax = insert_table(symbol,
613 symax,
614 ns, k,
615 ns, k);
616 }
617 ssyms[ssmax].cc_ns = ns;
618 ssyms[ssmax].cc_cs = k;
619 ssyms[ssmax].cc_ne = ns;
620 ssyms[ssmax++].cc_ce = k;
621 break;
622 case DOT:
623 cs = WC_VERY_SMALL;
624 ns = 0;
625 ce = HAT - 1;
626 ne = hatcn;
627 if (! ccln_member(ns, cs, ne, ce,
628 isyms, ismax) &&
629 ! ccln_member(ns, cs, ne, ce,
630 symbol, symax)) {
631 symax = insert_table(symbol,
632 symax,
633 ns, cs,
634 ne, ce);
635 }
636 ssyms[ssmax].cc_cs = cs;
637 ssyms[ssmax].cc_ns = ns;
638 ssyms[ssmax].cc_ce = ce;
639 ssyms[ssmax++].cc_ne = ne;
640 cs = HAT + 1;
641 ns = hatcn;
642 ce = WC_VERY_LARGE;
643 ne = MAX_CODESET;
644 if (! ccln_member(ns, cs, ne, ce,
645 isyms, ismax) &&
646 ! ccln_member(ns, cs, ne, ce,
647 symbol, symax)) {
648 symax = insert_table(symbol,
649 symax,
650 ns, cs,
651 ne, ce);
652 }
653 ssyms[ssmax].cc_cs = cs;
654 ssyms[ssmax].cc_ns = ns;
655 ssyms[ssmax].cc_ce = ce;
656 ssyms[ssmax++].cc_ne = ne;
657 break;
658 case CCL:
659 cs = HAT;
660 ns = hatcn;
661 for (p = (ccl_chars_t *)right(cp);
662 p->cc_cs; p++) {
663 if ((p->cc_ns != ns ||
664 p->cc_cs != cs) &&
665 ! ccln_member(p->cc_ns,
666 p->cc_cs, p->cc_ne,
667 p->cc_ce, isyms, ismax) &&
668 !ccln_member(p->cc_ns, p->cc_cs,
669 p->cc_ne, p->cc_ce, symbol,
670 symax)) {
671 symax = insert_table(
672 symbol, symax, p->cc_ns,
673 p->cc_cs, p->cc_ne, p->cc_ce);
674 }
675 ssyms[ssmax++] = *p;
676 }
677 break;
678 case NCCL:
679 ns = 0;
680 cs = WC_VERY_SMALL;
681 for (p = (ccl_chars_t *)right(cp); p->cc_cs; p++) {
682 if ((p->cc_ns != hatcn || p->cc_cs != HAT) &&
683 ! ccln_member(ns, cs, p->cc_ns,
684 p->cc_cs-1, isyms, ismax) &&
685 ! ccln_member(ns, cs, p->cc_ns,
686 p->cc_cs-1, symbol, symax)) {
687 symax = insert_table(symbol,
688 symax, ns, cs, p->cc_ns, p->cc_cs-1);
689 }
690 ssyms[ssmax].cc_ns = ns;
691 ssyms[ssmax].cc_cs = cs;
692 ssyms[ssmax].cc_ne = p->cc_ns;
693 ssyms[ssmax++].cc_ce
694 = p->cc_cs-1;
695 if (p->cc_ce == (wchar_t)0x0) {
696 ns = p->cc_ns;
697 cs = p->cc_cs + 1;
698
699 } else {
700 ns = p->cc_ne;
701 cs = p->cc_ce + 1;
702 }
703 }
704 if ((ns != hatcn || cs != HAT) && ! ccln_member(ns, cs,
705 MAX_CODESET, WC_VERY_LARGE, isyms, ismax) &&
706 ! ccln_member(ns, cs, MAX_CODESET,
707 WC_VERY_LARGE, symbol, symax)) {
708 symax = insert_table(symbol, symax, ns, cs,
709 MAX_CODESET,
710 WC_VERY_LARGE);
711 }
712 ssyms[ssmax].cc_ns = ns;
713 ssyms[ssmax].cc_cs = cs;
714 ssyms[ssmax].cc_ne = MAX_CODESET;
715 ssyms[ssmax++].cc_ce = WC_VERY_LARGE;
716 break;
717 }
718 }
719 for (j = 0; j < ssmax; j++) { /* nextstate(s, ssyms[j]) */
720 ns = ssyms[j].cc_ns;
721 cs = ssyms[j].cc_cs;
722 ne = ssyms[j].cc_ne;
723 ce = ssyms[j].cc_ce;
724 dprintf("j = %d, cs = %o, ce = %o\n", j, cs, ce);
725 symax = delete_table(symbol, symax, ns, cs, ne, ce);
726 setcnt = 0;
727 for (k = 0; k <= line; k++) setvec[k] = 0;
728 for (i = 0; i < spmax; i++) {
729 index[sposns[i]] = 0;
730 cp = point[sposns[i]];
731 if ((k = type(cp)) != FINAL) {
732 if (k == CHAR && ns == ne && cs == ce &&
733 cs == (int)right(cp) ||
734 k == DOT || k == CCL &&
735 ccl_member(ns, cs, ne, ce,
736 (ccl_chars_t *)right(cp)) ||
737 k == NCCL &&
738 !ccl_member(ns, cs, ne, ce,
739 (ccl_chars_t *)right(cp))) {
740 ptr = foll[sposns[i]];
741 num = *ptr;
742 for (k = 0; k < num; k++) {
743 if (setvec[*(++ptr)] != 1 &&
744 iposns[*ptr] != 1) {
745 setvec[*ptr] = 1;
746 setcnt++;
747 }
748 }
749 }
750 }
751 } /* end nextstate */
752 if (notin(state, n, &prev)) {
753 if (n >= NSTATES - 1) {
754 printf("cgotofn: notin; state = %d, n = %d\n", state, n, NULL);
755 overflo();
756 }
757 state[++n] = add(setcnt);
758 dprintf(" delta(%d,[%o,%o])",
759 s, cs, ce);
760 dprintf(" = %d, ind = %d\n", n, ind+1, NULL);
761 fatab[++ind].cc.cc_ns = ns;
762 fatab[ind].cc.cc_cs = cs;
763 fatab[ind].cc.cc_ne = ne;
764 fatab[ind].cc.cc_ce = ce;
765 fatab[ind].n = n;
766 numtrans++;
767 } else {
768 if (prev != 0) {
769 dprintf(" delta(%d,[%o,%o])",
770 s, cs, ce);
771 dprintf("= %d, ind = %d\n",
772 prev, ind+1, NULL);
773 fatab[++ind].cc.cc_ns = ns;
774 fatab[ind].cc.cc_cs = cs;
775 fatab[ind].cc.cc_ne = ne;
776 fatab[ind].cc.cc_ce = ce;
777 fatab[ind].n = prev;
778 numtrans++;
779 }
780 }
781 }
782 tenter:
783 if ((pfa = (struct fa *)malloc((numtrans + 1)
784 * sizeof (struct fa))) == NULL)
785 overflo();
786 where[s] = pfa;
787 if (finflg)
788 pfa->cc.h = -1; /* s is a final state */
789 else
790 pfa->cc.h = numtrans;
791 pfa->st = 0;
792 for (i = 1, pfa += 1; i <= numtrans; i++, pfa++) {
793 pfa->cc.s = fatab[i].cc;
794 pfa->st = (struct fa *)fatab[i].n;
795 }
796 }
797 for (i = 0; i <= n; i++) {
798 if (i != 0) /* state[0] is freed later in freetr() */
799 xfree(state[i]); /* free state[i] */
800 pfa = where[i];
801 pfa->st = where[0];
802 dprintf("state %d: (%o)\n", i, pfa, NULL);
803 dprintf(" numtrans = %d, default = %o\n",
804 pfa->cc.h, pfa->st, NULL);
805 for (k = 1; k <= pfa->cc.h; k++) {
806 (pfa+k)->st = where[(int)(pfa+k)->st];
807 dprintf(" char = [%o,%o], nextstate = %o\n",
808 (pfa+k)->cc.s.cc_cs, (pfa+k)->cc.s.cc_ce,
809 (pfa+k)->st);
810 }
811 }
812 pfa = where[0];
813 if ((num = pfa->cc.h) < 0)
814 return (where[0]);
815 for (pfa += num; num; num--, pfa--)
816 if (pfa->cc.s.cc_ns == hatcn && pfa->cc.s.cc_cs == HAT) {
817 return (pfa->st);
818 }
819 return (where[0]);
820 }
821
822
823 /*
824 * Insert CCL entry to CCL table with maintain optimized order.
825 */
826 static int
insert_table(ccl_chars_t * table_base,int table_size,int ns,wchar_t cs,int ne,wchar_t ce)827 insert_table(ccl_chars_t *table_base, int table_size, int ns, wchar_t cs,
828 int ne, wchar_t ce)
829 {
830 int i;
831 int tns, tne;
832 wchar_t tcs, tce;
833 ccl_chars_t *table;
834 ccl_chars_t *saved_table;
835 int saved_i;
836
837
838
839
840 dprintf("Inserting {%o, %o} to table %o\n", cs, ce, table_base);
841 /*
842 * Searching the table to find out where should put the new item.
843 */
844 for (i = 0, table = table_base; i < table_size; i++, table++) {
845 tns = table->cc_ns;
846 tcs = table->cc_cs;
847 tne = table->cc_ne;
848 tce = table->cc_ce;
849 if (MLCMPLT(ne, ce, tns, (tcs - 1))) {
850 /*
851 * Quick! insert to font of current table entries.
852 */
853 qinsert:
854 table_size++;
855 for (; i < table_size; i++, table++) {
856 tns = table->cc_ns;
857 tcs = table->cc_cs;
858 tne = table->cc_ne;
859 tce = table->cc_ce;
860 table->cc_ns = ns;
861 table->cc_cs = cs;
862 table->cc_ne = ne;
863 table->cc_ce = ce;
864 ns = tns;
865 cs = tcs;
866 ne = tne;
867 ce = tce;
868 }
869 goto add_null;
870 } else if (MLCMPLE(tns, (tcs - 1), ns, cs) &&
871 MLCMPLE(ns, cs, tne, (tce + 1))) {
872 /*
873 * Starting point is within the current entry.
874 */
875 if (MLCMPGT(tns, tcs, ns, cs)) {
876 table->cc_ns = ns;
877 table->cc_cs = cs;
878 }
879 if (MLCMPLE(ne, ce, tne, tce)) {
880 return (table_size);
881 }
882 goto combine;
883 }
884 }
885
886
887 /*
888 * Adding new one to end of table.
889 */
890 table->cc_ns = ns;
891 table->cc_cs = cs;
892 table->cc_ne = ne;
893 table->cc_ce = ce;
894
895
896 table_size++;
897 goto add_null;
898
899
900
901
902 combine:
903 /*
904 * Check and try to combine the new entry with rest of entries.
905 */
906 if ((i + 1) >= table_size) {
907 table->cc_ne = ne;
908 table->cc_ce = ce;
909 return (table_size);
910 }
911
912
913 saved_table = table++;
914 saved_i = i++;
915
916
917 /*
918 * Finding the spot where we should put the end point.
919 */
920 for (; i < table_size; i++, table++) {
921 if (MLCMPLT(ne, ce, table->cc_ns, (table->cc_cs - 1))) {
922 break;
923 } else
924 if (MLCMPLE(table->cc_ns, (table->cc_cs - 1), ne, ce) &&
925 MLCMPLE(ne, ce, table->cc_ne, (table->cc_ce + 1))) {
926 /*
927 * Tack with this table.
928 */
929 if (MLCMPLT(ne, ce, table->cc_ne, table->cc_ce)) {
930 ne = table->cc_ne;
931 ce = table->cc_ce;
932 }
933 table++;
934 i++;
935 break;
936 }
937 }
938
939
940 saved_table->cc_ne = ne;
941 saved_table->cc_ce = ce;
942 saved_i = table_size - (i - saved_i - 1);
943
944
945 /*
946 * Moving the rest of entries.
947 */
948 for (; i < table_size; i++, table++)
949 *(++saved_table) = *table;
950 table_size = saved_i;
951
952
953 add_null:
954 table_base[table_size].cc_cs = (wchar_t)0x0;
955 table_base[table_size].cc_ce = (wchar_t)0x0;
956
957
958 return (table_size);
959 }
960
961
962
963
964 static int
delete_table(ccl_chars_t * table_base,int table_size,int ns,wchar_t cs,int ne,wchar_t ce)965 delete_table(ccl_chars_t *table_base, int table_size, int ns, wchar_t cs,
966 int ne, wchar_t ce)
967 {
968 int i;
969 int saved_i;
970 ccl_chars_t *table;
971 ccl_chars_t *saved_table;
972 int tns;
973 wchar_t tcs;
974 int tne;
975 wchar_t tce;
976
977
978
979
980 for (i = 0, table = table_base; i < table_size; i++, table++) {
981 tns = table->cc_ns;
982 tcs = table->cc_cs;
983 tne = table->cc_ne;
984 tce = table->cc_ce;
985 if (MLCMPLT(ne, ce, tns, tcs))
986 return (table_size);
987 else if (MLCMPLT(ne, ce, tne, tce)) {
988 if (MLCMPLE(ns, cs, tns, tcs)) {
989 /*
990 * Shrink type 1.
991 */
992 table->cc_ns = ne;
993 table->cc_cs = ce + 1;
994 return (table_size);
995
996 } else {
997 /*
998 * Spliting !!
999 */
1000 table->cc_ns = ne;
1001 table->cc_cs = ce + 1;
1002 tne = ns;
1003 tce = cs - 1;
1004 table_size++;
1005 for (; i < table_size; i++, table++) {
1006 ns = table->cc_ns;
1007 cs = table->cc_cs;
1008 ne = table->cc_ne;
1009 ce = table->cc_ce;
1010 table->cc_ns = tns;
1011 table->cc_cs = tcs;
1012 table->cc_ne = tne;
1013 table->cc_ce = tce;
1014 tns = ns;
1015 tcs = cs;
1016 tne = ne;
1017 tce = ce;
1018 }
1019 return (table_size);
1020 }
1021
1022 } else if (MLCMPLE(ns, cs, tne, tce)) {
1023 if (MLCMPGT(ns, cs, tns, tcs)) {
1024 /*
1025 * Shrink current table(type 2).
1026 */
1027 table->cc_ne = ns;
1028 table->cc_ce = cs - 1;
1029 table++;
1030 i++;
1031 }
1032 /*
1033 * Search for the end point.
1034 */
1035 saved_i = i;
1036 saved_table = table;
1037 for (; i < table_size; i++, table++) {
1038 if (MLCMPLT(ne, ce,
1039 table->cc_ns, table->cc_cs)) {
1040 /*
1041 * Easy point, no shrinks!
1042 */
1043 break;
1044
1045 } else if (MLCMPGT(table->cc_ne, table->cc_ce,
1046 ne, ce)) {
1047 /*
1048 * Shrinking...
1049 */
1050 table->cc_ns = ne;
1051 table->cc_cs = ce + 1;
1052 break;
1053 }
1054
1055
1056 }
1057 /*
1058 * Moving(removing) backword.
1059 */
1060 saved_i = table_size - (i - saved_i);
1061 for (; i < table_size; i++)
1062 *saved_table++ = *table++;
1063 return (saved_i);
1064 }
1065 }
1066 return (table_size);
1067 }
1068
1069
1070 #ifdef DEBUG
dump_table(ccl_chars_t * table,int size)1071 dump_table(ccl_chars_t *table, int size)
1072 {
1073 int i;
1074
1075
1076
1077
1078 if (! dbg)
1079 return;
1080
1081
1082 printf("Duming table %o with size %d\n", table, size);
1083 size++; /* To watch out NULL */
1084 for (i = 0; i < size; i++, table++) {
1085 printf("{%3o, %3o}, ", table->cc_cs, table->cc_ce);
1086 }
1087 printf("\n");
1088 }
1089 #endif /* DEBUG */
1090
1091
1092
1093 int
match(struct fa * pfa,wchar_t * p)1094 match(struct fa *pfa, wchar_t *p)
1095 {
1096 int count;
1097 int n, ns, ne;
1098 wchar_t c, cs, ce;
1099
1100
1101 if (p == 0)
1102 return (0);
1103 if (pfa->cc.h == 1) { /* fast test for first character, if possible */
1104 ns = (++pfa)->cc.s.cc_ns;
1105 cs = (pfa)->cc.s.cc_cs;
1106 ne = (pfa)->cc.s.cc_ne;
1107 ce = (pfa)->cc.s.cc_ce;
1108 do {
1109 c = *p;
1110 n = wcsetno(c);
1111 if (MLCMPLE(ns, cs, n, c) &&
1112 MLCMPLE(n, c, ne, ce)) {
1113 p++;
1114 pfa = pfa->st;
1115 goto adv;
1116 }
1117 } while (*p++ != 0);
1118 return (0);
1119 }
1120 adv: if ((count = pfa->cc.h) < 0)
1121 return (1);
1122 do {
1123 c = *p;
1124 n = wcsetno(c);
1125 for (pfa += count; count; count--, pfa--) {
1126 ns = (pfa)->cc.s.cc_ns;
1127 cs = (pfa)->cc.s.cc_cs;
1128 ne = (pfa)->cc.s.cc_ne;
1129 ce = (pfa)->cc.s.cc_ce;
1130 if (MLCMPLE(ns, cs, n, c) && MLCMPLE(n, c, ne, ce))
1131 break;
1132 }
1133 pfa = pfa->st;
1134 if ((count = pfa->cc.h) < 0)
1135 return (1);
1136 } while (*p++ != 0);
1137 return (0);
1138 }
1139