1 /* Copyright (c) 1991 Sun Wu and Udi Manber. All Rights Reserved. */
2 #include "agrep.h"
3
4 extern unsigned D_endpos, endposition, Init1, wildmask;
5 extern int Mask[], Bit[], Init[], NO_ERR_MASK;
6 extern int AND, SIMPLEPATTERN, REGEX, NOUPPER, D_length;
7 extern unsigned char Progname[];
8
maskgen(Pattern,D)9 int maskgen(Pattern, D)
10 unsigned char *Pattern; int D;
11 {
12 struct term { int flag; unsigned char class[WORD];
13 } position[WORD+10];
14 unsigned char c;
15
16 int i, j, k, l, M, OR=0, EVEN = 0, base, No_error;
17
18
19 for(i=0; i<WORD; i++) position[i].class[0] = '\0';
20 for(i=0; i<WORD; i++) position[i].flag = 0;
21 wildmask = NO_ERR_MASK = endposition = 0;
22 No_error = 0;
23 M = strlen(Pattern);
24 if(NOUPPER) {
25 for(i=0; i<M; i++) if(isalpha(Pattern[i]))
26 if (isupper(Pattern[i])) Pattern[i] = tolower(Pattern[i]);
27 }
28 #ifdef DEBUG
29 for(i=0; i<M; i++) printf(" %d", Pattern[i]);
30 printf("\n");
31 #endif
32 for (i=0, j=1; i< M; i++)
33 {
34 switch (Pattern[i])
35 {
36 case WILDCD : if(REGEX) {
37 position[j].class[0] = '.';
38 position[j].class[1] = '.';
39 position[j++].class[2] = '\0';
40 break;
41 }
42 wildmask = wildmask | Bit[j-1]; break;
43 case STAR : break;
44 case ORSYM : break;
45 case LPARENT: break;
46 case RPARENT: break;
47 case LANGLE : No_error = ON; EVEN++;
48 break;
49 case RANGLE : No_error = OFF; EVEN--;
50 if(EVEN < 0) {
51 fprintf(stderr, "%s: illegal pattern, unmatched '<', '>'\n", Progname);
52 exit(2);
53 }
54 break;
55 case LRANGE : if(No_error == ON) NO_ERR_MASK = NO_ERR_MASK | Bit[j];
56 i=i+1;
57 if (Pattern[i] == NOTSYM) { position[j].flag = Compl; i++; }
58 k=0;
59 while (Pattern[i] != RRANGE && i < M)
60 {
61 if(Pattern[i] == HYPHEN)
62 { position[j].class[k-1] = Pattern[i+1]; i=i+2; }
63 else {
64 position[j].class[k] = position[j].class[k+1] = Pattern[i];
65 k = k+2; i++;
66 }
67 }
68 if(i == M) {
69 fprintf(stderr, "%s: illegal pattern, unmatched '[', ']'\n",Progname);
70 exit(2);
71 }
72 position[j].class[k] = '\0';
73 j++; break;
74 case RRANGE : fprintf(stderr, "%s: illegal pattern, unmatched '[', ']'\n", Progname);
75 exit(2);
76 break;
77 case ORPAT : if(REGEX == ON || AND == ON) {
78 fprintf(stderr, "illegal pattern \n");
79 exit(2);
80 }
81 OR = ON;
82 position[j].flag = 2; position[j].class[0] = '\0';
83 endposition = endposition | Bit[j++]; break;
84 case ANDPAT : position[j].flag = 2; position[j].class[0] = '\0';
85 if(j > D_length) AND = ON;
86 if(OR || (REGEX == ON && j>D_length)) {
87 fprintf(stderr, "illegal pattern \n");
88 exit(2);
89 }
90 endposition = endposition | Bit[j++]; break;
91 /*
92 case ' ' : if (Pattern[i-1] == ORPAT || Pattern[i-1] == ANDPAT) break;
93 if(No_error == ON) NO_ERR_MASK = NO_ERR_MASK | Bit[j];
94 position[j].flag = 0;
95 position[j].class[0] = position[j].class[1] = Pattern[i];
96 position[j++].class[2] = '\0'; break;
97 */
98 case '\n' : NO_ERR_MASK = NO_ERR_MASK | Bit[j];
99 position[j].class[0] = position[j].class[1] = '\n';
100 position[j++].class[2] = '\0';
101 break;
102 case WORDB : NO_ERR_MASK = NO_ERR_MASK | Bit[j];
103 position[j].class[0] = 1;
104 position[j].class[1] = 47;
105 position[j].class[2] = 58;
106 position[j].class[3] = 64;
107 position[j].class[4] = 91;
108 position[j].class[5] = 96;
109 position[j].class[6] = 123;
110 position[j].class[7] = 127;
111 position[j++].class[8] = '\0';
112 break;
113 case NNLINE : NO_ERR_MASK |= Bit[j];
114 position[j].class[0] = position[j].class[1] = '\n';
115 position[j].class[2] = position[j].class[3] = NNLINE;
116 position[j++].class[4] = '\0';
117 break;
118 default : if(No_error == ON) NO_ERR_MASK = NO_ERR_MASK | Bit[j];
119 position[j].flag = 0;
120 position[j].class[0] = position[j].class[1] = Pattern[i];
121 position[j++].class[2] = '\0';
122 }
123 if(j > WORD) {
124 fprintf(stderr, "%s: pattern too long\n", Progname);
125 exit(2);
126 }
127 }
128 if (EVEN != 0) {
129 fprintf(stderr, "%s: illegal pattern, unmatched '<', '>'\n", Progname);
130 exit(2);
131 }
132 M = j - 1;
133 base = WORD - M;
134 wildmask = (wildmask >> base);
135 endposition = (endposition >> base);
136 NO_ERR_MASK = (NO_ERR_MASK >> 1) & (~Bit[1]);
137 NO_ERR_MASK = ~NO_ERR_MASK >> (base-1);
138 for (i=1; i<= WORD - M ; i++) Init[0] = Init[0] | Bit[i];
139 Init[0] = Init[0] | endposition;
140 /* not necessary for INit[i], i>0, */
141 /* but at every begining of the matching process append one
142 no-match character to initialize the error vectors */
143 endposition = ( endposition << 1 ) + 1;
144 Init1 = (Init[0] | wildmask | endposition) ;
145 D_endpos = ( endposition >> ( M - D_length ) ) << ( M - D_length);
146 endposition = endposition ^ D_endpos;
147 #ifdef DEBUG
148 printf("endposition: %o\n", endposition);
149 printf("no_err_mask: %o\n", NO_ERR_MASK);
150 #endif
151 for(c=0, i=0; i < MAXSYM; c++, i++)
152 {
153 for (k=1, l=0; k<=M ; k++, l=0) {
154 while (position[k].class[l] != '\0') {
155 if (position[k].class[l] == NOCARE && (c != '\n' || REGEX) )
156 { Mask[c] = Mask[c] | Bit[base + k]; break; }
157 if (c >= position[k].class[l] && c <= position[k].class[l+1])
158 { Mask[c] = Mask[c] | Bit[base + k]; break; }
159 l = l + 2; }
160 if (position[k].flag == Compl) Mask[c] = Mask[c] ^ Bit[base+k];
161 }
162 }
163 if(NOUPPER) for(c='A'; c<='Z'; c=c+1) if (isupper(c))
164 Mask[c] = Mask[tolower(c)];
165 return(M);
166 }
167
168