1 /* Copyright (c) 1991 Sun Wu and Udi Manber.  All Rights Reserved. */
2 #include "agrep.h"
3 
4 extern unsigned D_endpos, endposition, Init1, wildmask;
5 extern int Mask[], Bit[], Init[], NO_ERR_MASK;
6 extern int AND, SIMPLEPATTERN, REGEX, NOUPPER, D_length;
7 extern unsigned char Progname[];
8 
maskgen(Pattern,D)9 int maskgen(Pattern, D)
10 unsigned char *Pattern; int D;
11 {
12 struct term { int flag; unsigned char class[WORD];
13             } position[WORD+10];
14 unsigned char c;
15 
16 int i, j, k, l, M, OR=0, EVEN = 0, base, No_error;
17 
18 
19 for(i=0; i<WORD; i++) position[i].class[0] = '\0';
20 for(i=0; i<WORD; i++) position[i].flag = 0;
21 wildmask = NO_ERR_MASK = endposition = 0;
22 No_error = 0;
23 M = strlen(Pattern);
24 if(NOUPPER) {
25               for(i=0; i<M; i++) if(isalpha(Pattern[i]))
26                      if (isupper(Pattern[i])) Pattern[i] = tolower(Pattern[i]);
27             }
28 #ifdef DEBUG
29 	for(i=0; i<M; i++) printf(" %d", Pattern[i]);
30 	printf("\n");
31 #endif
32 for (i=0, j=1; i< M; i++)
33 {
34   switch (Pattern[i])
35   {
36     case WILDCD : if(REGEX) {
37                      position[j].class[0] = '.';
38                      position[j].class[1] = '.';
39                      position[j++].class[2] = '\0';
40                      break;
41                   }
42                   wildmask = wildmask | Bit[j-1]; break;
43     case STAR   : break;
44     case ORSYM  : break;
45     case LPARENT: break;
46     case RPARENT: break;
47     case LANGLE : No_error = ON; EVEN++;
48                   break;
49     case RANGLE : No_error = OFF; EVEN--;
50                   if(EVEN < 0) {
51                      fprintf(stderr, "%s: illegal pattern, unmatched '<', '>'\n", Progname);
52                      exit(2);
53                   }
54                   break;
55     case LRANGE : if(No_error == ON) NO_ERR_MASK = NO_ERR_MASK | Bit[j];
56                   i=i+1;
57                   if (Pattern[i] == NOTSYM) { position[j].flag = Compl; i++; }
58                   k=0;
59                   while (Pattern[i] != RRANGE && i < M)
60                   {
61                     if(Pattern[i] == HYPHEN)
62                        { position[j].class[k-1] = Pattern[i+1]; i=i+2; }
63                     else {
64                      position[j].class[k] = position[j].class[k+1] = Pattern[i];
65                      k = k+2; i++;
66                     }
67                   }
68                   if(i == M) {
69                      fprintf(stderr, "%s: illegal pattern, unmatched '[', ']'\n",Progname);
70                      exit(2);
71                   }
72                   position[j].class[k] = '\0';
73                   j++; break;
74     case RRANGE : fprintf(stderr, "%s: illegal pattern, unmatched '[', ']'\n", Progname);
75                   exit(2);
76                   break;
77     case ORPAT  : if(REGEX == ON || AND == ON) {
78                      fprintf(stderr, "illegal pattern \n");
79                      exit(2);
80                   }
81                   OR = ON;
82                   position[j].flag = 2; position[j].class[0] = '\0';
83                   endposition = endposition | Bit[j++]; break;
84     case ANDPAT : position[j].flag = 2; position[j].class[0] = '\0';
85                   if(j > D_length) AND = ON;
86                   if(OR || (REGEX == ON && j>D_length)) {
87                      fprintf(stderr, "illegal pattern \n");
88                      exit(2);
89                   }
90                   endposition = endposition | Bit[j++]; break;
91 /*
92     case ' '    : if (Pattern[i-1] == ORPAT || Pattern[i-1] == ANDPAT) break;
93                   if(No_error == ON) NO_ERR_MASK = NO_ERR_MASK | Bit[j];
94                   position[j].flag = 0;
95                   position[j].class[0] = position[j].class[1] = Pattern[i];
96                   position[j++].class[2] = '\0';  break;
97 */
98     case '\n'   : NO_ERR_MASK = NO_ERR_MASK | Bit[j];
99                   position[j].class[0] = position[j].class[1] = '\n';
100                   position[j++].class[2] = '\0';
101                   break;
102     case WORDB  : NO_ERR_MASK = NO_ERR_MASK | Bit[j];
103                   position[j].class[0] = 1;
104                   position[j].class[1] = 47;
105                   position[j].class[2] = 58;
106                   position[j].class[3] = 64;
107                   position[j].class[4] = 91;
108                   position[j].class[5] = 96;
109                   position[j].class[6] = 123;
110                   position[j].class[7] = 127;
111                   position[j++].class[8] = '\0';
112                   break;
113     case NNLINE : NO_ERR_MASK |= Bit[j];
114                   position[j].class[0] = position[j].class[1] = '\n';
115                   position[j].class[2] = position[j].class[3] = NNLINE;
116                   position[j++].class[4] = '\0';
117                   break;
118     default : if(No_error == ON) NO_ERR_MASK = NO_ERR_MASK | Bit[j];
119                   position[j].flag = 0;
120                   position[j].class[0] = position[j].class[1] = Pattern[i];
121                   position[j++].class[2] = '\0';
122   }
123   if(j > WORD) {
124      fprintf(stderr, "%s: pattern too long\n", Progname);
125      exit(2);
126   }
127 }
128   if (EVEN != 0) {
129      fprintf(stderr, "%s: illegal pattern, unmatched '<', '>'\n", Progname);
130      exit(2);
131   }
132 M = j - 1;
133 base = WORD - M;
134 wildmask = (wildmask >> base);
135 endposition = (endposition >> base);
136 NO_ERR_MASK = (NO_ERR_MASK >> 1) & (~Bit[1]);
137 NO_ERR_MASK = ~NO_ERR_MASK >> (base-1);
138   for (i=1; i<= WORD - M ; i++) Init[0] = Init[0] | Bit[i];
139   Init[0] = Init[0] | endposition;
140              /* not necessary for INit[i], i>0, */
141              /* but at every begining of the matching process append one
142                 no-match character to initialize the error vectors */
143   endposition = ( endposition << 1 ) + 1;
144   Init1 = (Init[0] | wildmask | endposition) ;
145   D_endpos = ( endposition >> ( M - D_length ) ) << ( M - D_length);
146   endposition = endposition ^ D_endpos;
147 #ifdef DEBUG
148 	printf("endposition: %o\n", endposition);
149 	printf("no_err_mask: %o\n", NO_ERR_MASK);
150 #endif
151   for(c=0, i=0; i < MAXSYM; c++, i++)
152   {
153      for (k=1, l=0; k<=M ; k++, l=0)  {
154          while (position[k].class[l] != '\0') {
155                if (position[k].class[l] == NOCARE && (c != '\n' || REGEX) )
156                   {  Mask[c] = Mask[c] | Bit[base + k]; break; }
157                if (c >= position[k].class[l] && c <= position[k].class[l+1])
158                   {  Mask[c] = Mask[c] | Bit[base + k]; break; }
159                l = l + 2;  }
160          if (position[k].flag == Compl) Mask[c] = Mask[c] ^ Bit[base+k];
161      }
162   }
163   if(NOUPPER) for(c='A'; c<='Z'; c=c+1) if (isupper(c))
164                   Mask[c] = Mask[tolower(c)];
165   return(M);
166 }
167 
168