1 /* Copyright (c) 1991 Sun Wu and Udi Manber.  All Rights Reserved. */
2 /* if the pattern is not simple fixed pattern, then after preprocessing */
3 /* and generating the masks, the program goes here. four cases:  1.     */
4 /* the pattern is simple regular expression and no error, then do the   */
5 /* matching here.  2. the pattern is simple regular expression and      */
6 /* unit cost errors are allowed: then go to asearch().                  */
7 /* 3. the pattern is simple regular expression, and the edit cost is    */
8 /* not uniform, then go to asearch1().                                  */
9 /* if the pattern is regular expression then go to re() if M < 14,      */
10 /* else go to re1()                                                     */
11 /* input parameters: old_D_pat: delimiter pattern.                      */
12 /* fd, input file descriptor, M: size of pattern, D: # of errors.       */
13 
14 #include "agrep.h"
15 
16 #include <unistd.h>
17 
18 extern unsigned Init1, D_endpos, endposition, Init[], Mask[], Bit[];
19 extern int DELIMITER, FILENAMEONLY, D_length, I, AND, REGEX, JUMP, INVERSE;
20 extern char D_pattern[];
21 extern int TRUNCATE, DD, S;
22 extern char Progname[], CurrentFileName[];
23 extern int num_of_matched;
24 
25 /* bitap dispatches job */
26 
bitap(old_D_pat,Pattern,fd,M,D)27 void bitap(old_D_pat, Pattern, fd, M, D)
28 char old_D_pat[], *Pattern;  int fd, M, D;
29 {
30 char c;
31 register unsigned r1, r2, r3, CMask, i;
32 register unsigned end, endpos, r_Init1;
33 register unsigned D_Mask;
34 int  ResidueSize , FIRSTROUND, lasti, print_end, j, num_read;
35 int  k;
36 char buffer[Max_record+Max_record+BlockSize];
37   D_length = strlen(old_D_pat);
38   for(i=0; i<D_length; i++) if(old_D_pat[i] == '^' || old_D_pat[i] == '$')
39                                old_D_pat[i] = '\n';
40   if (REGEX) {
41       if (D > 4) {
42           fprintf(stderr, "%s: the maximum number of erorrs allowed for full regular expression is 4\n", Progname);
43           exit(2);
44       }
45       if (M <= SHORTREG) { re(fd, M, D);   /* SUN: need to find a even point */
46                      return; }
47       else { re1(fd, M, D);
48              return; }
49   }
50   if (D > 0 && JUMP == ON)
51      { asearch1(old_D_pat, fd, D); return; }
52   if (D > 0)
53      { asearch(old_D_pat, fd, D); return; }
54   if(I == 0) Init1 = 037777777777;
55 
56   j=0;
57   lasti = Max_record;
58   buffer[Max_record-1] = '\n';
59   r_Init1 = Init1;
60   r1 = r2 = r3 = Init[0];
61   endpos = D_endpos;
62 
63   buffer[Max_record-1] = '\n';
64   D_Mask = D_endpos;
65   for(i=1 ; i<D_length; i++) D_Mask = (D_Mask << 1) | D_Mask;
66   D_Mask = ~D_Mask;
67   FIRSTROUND = ON;
68 
69   while ((num_read = fill_buf(fd, buffer + Max_record, Max_record)) > 0)
70   {
71     i=Max_record; end = Max_record + num_read;
72     if(FIRSTROUND) {  i = Max_record - 1 ;
73 
74 			if(DELIMITER) {
75 				for(k=0; k<D_length; k++) {
76 					if(old_D_pat[k] != buffer[Max_record+k]) 						break;
77 				}
78 				if(k>=D_length) j--;
79 			}
80 
81                       FIRSTROUND = OFF;  }
82     if(num_read < BlockSize) {
83                       strncpy(buffer+Max_record+num_read, old_D_pat, D_length);
84                       end = end + D_length;
85                       buffer[end] = '\0';
86     }
87     while (i < end)
88     {
89         c = buffer[i++];
90         CMask = Mask[c];
91               r1 = r_Init1 & r3;
92               r2 = (( r3 >> 1 ) & CMask) | r1;
93         if ( r2 & endpos ) {
94            j++;
95            if(((AND == 1) && ((r2 & endposition) == endposition)) ||                           ((AND == 0) && (r2 & endposition)) ^ INVERSE )
96                {
97                  if(FILENAMEONLY) {
98                     num_of_matched++;
99                     printf("%s\n", CurrentFileName);
100                     return; }
101                  print_end = i - D_length - 1;
102                  if(!(lasti >= Max_record+num_read - 1))
103                     output(buffer, lasti, print_end, j);
104                }
105            lasti = i - D_length;
106            TRUNCATE = OFF;
107            r2 = r3 = r1 = Init[0];
108            r1 = r_Init1 & r3;
109            r2 = ((( r2 >> 1) & CMask) | r1 ) & D_Mask;
110         }
111         c = buffer[i++];
112         CMask = Mask[c];
113               r1 = r_Init1 & r2;
114               r3 = (( r2 >> 1 ) & CMask) | r1;
115         if ( r3 & endpos ) {
116            j++;
117            if(((AND == 1) && ((r3 & endposition) == endposition)) ||                           ((AND == 0) && (r3 & endposition)) ^ INVERSE )
118                {
119                  if(FILENAMEONLY) {
120                     num_of_matched++;
121                     printf("%s\n", CurrentFileName);
122                     return; }
123                  print_end = i - D_length - 1;
124                  if(!(lasti >= Max_record+num_read - 1))
125                     output(buffer, lasti, print_end, j);
126                }
127            lasti = i - D_length ;
128            TRUNCATE = OFF;
129            r2 = r3 = r1 = Init[0];
130            r1 = r_Init1 & r2;
131            r3 = ((( r2 >> 1) & CMask) | r1 ) & D_Mask;
132        }
133     }
134     ResidueSize = num_read + Max_record - lasti;
135     if(ResidueSize > Max_record) {
136             ResidueSize = Max_record;
137             TRUNCATE = ON;
138     }
139     strncpy(buffer+Max_record-ResidueSize, buffer+lasti, ResidueSize);
140     lasti = Max_record - ResidueSize;
141     if(lasti < 0) {
142        lasti = 1;
143     }
144   }
145   return;
146 }
147 
fill_buf(fd,buf,record_size)148 int fill_buf(fd, buf, record_size)
149 int fd, record_size; unsigned char *buf;
150 {
151 int num_read=1;
152 int total_read=0;
153 	while(total_read < record_size && num_read > 0) {
154 		num_read = read(fd, buf+total_read, 4096);
155 		total_read = total_read + num_read;
156 	}
157 	return(total_read);
158 }
159 
160