1 /* Copyright (c) 1991 Sun Wu and Udi Manber. All Rights Reserved. */
2 /* if the pattern is not simple fixed pattern, then after preprocessing */
3 /* and generating the masks, the program goes here. four cases: 1. */
4 /* the pattern is simple regular expression and no error, then do the */
5 /* matching here. 2. the pattern is simple regular expression and */
6 /* unit cost errors are allowed: then go to asearch(). */
7 /* 3. the pattern is simple regular expression, and the edit cost is */
8 /* not uniform, then go to asearch1(). */
9 /* if the pattern is regular expression then go to re() if M < 14, */
10 /* else go to re1() */
11 /* input parameters: old_D_pat: delimiter pattern. */
12 /* fd, input file descriptor, M: size of pattern, D: # of errors. */
13
14 #include "agrep.h"
15
16 #include <unistd.h>
17
18 extern unsigned Init1, D_endpos, endposition, Init[], Mask[], Bit[];
19 extern int DELIMITER, FILENAMEONLY, D_length, I, AND, REGEX, JUMP, INVERSE;
20 extern char D_pattern[];
21 extern int TRUNCATE, DD, S;
22 extern char Progname[], CurrentFileName[];
23 extern int num_of_matched;
24
25 /* bitap dispatches job */
26
bitap(old_D_pat,Pattern,fd,M,D)27 void bitap(old_D_pat, Pattern, fd, M, D)
28 char old_D_pat[], *Pattern; int fd, M, D;
29 {
30 char c;
31 register unsigned r1, r2, r3, CMask, i;
32 register unsigned end, endpos, r_Init1;
33 register unsigned D_Mask;
34 int ResidueSize , FIRSTROUND, lasti, print_end, j, num_read;
35 int k;
36 char buffer[Max_record+Max_record+BlockSize];
37 D_length = strlen(old_D_pat);
38 for(i=0; i<D_length; i++) if(old_D_pat[i] == '^' || old_D_pat[i] == '$')
39 old_D_pat[i] = '\n';
40 if (REGEX) {
41 if (D > 4) {
42 fprintf(stderr, "%s: the maximum number of erorrs allowed for full regular expression is 4\n", Progname);
43 exit(2);
44 }
45 if (M <= SHORTREG) { re(fd, M, D); /* SUN: need to find a even point */
46 return; }
47 else { re1(fd, M, D);
48 return; }
49 }
50 if (D > 0 && JUMP == ON)
51 { asearch1(old_D_pat, fd, D); return; }
52 if (D > 0)
53 { asearch(old_D_pat, fd, D); return; }
54 if(I == 0) Init1 = 037777777777;
55
56 j=0;
57 lasti = Max_record;
58 buffer[Max_record-1] = '\n';
59 r_Init1 = Init1;
60 r1 = r2 = r3 = Init[0];
61 endpos = D_endpos;
62
63 buffer[Max_record-1] = '\n';
64 D_Mask = D_endpos;
65 for(i=1 ; i<D_length; i++) D_Mask = (D_Mask << 1) | D_Mask;
66 D_Mask = ~D_Mask;
67 FIRSTROUND = ON;
68
69 while ((num_read = fill_buf(fd, buffer + Max_record, Max_record)) > 0)
70 {
71 i=Max_record; end = Max_record + num_read;
72 if(FIRSTROUND) { i = Max_record - 1 ;
73
74 if(DELIMITER) {
75 for(k=0; k<D_length; k++) {
76 if(old_D_pat[k] != buffer[Max_record+k]) break;
77 }
78 if(k>=D_length) j--;
79 }
80
81 FIRSTROUND = OFF; }
82 if(num_read < BlockSize) {
83 strncpy(buffer+Max_record+num_read, old_D_pat, D_length);
84 end = end + D_length;
85 buffer[end] = '\0';
86 }
87 while (i < end)
88 {
89 c = buffer[i++];
90 CMask = Mask[c];
91 r1 = r_Init1 & r3;
92 r2 = (( r3 >> 1 ) & CMask) | r1;
93 if ( r2 & endpos ) {
94 j++;
95 if(((AND == 1) && ((r2 & endposition) == endposition)) || ((AND == 0) && (r2 & endposition)) ^ INVERSE )
96 {
97 if(FILENAMEONLY) {
98 num_of_matched++;
99 printf("%s\n", CurrentFileName);
100 return; }
101 print_end = i - D_length - 1;
102 if(!(lasti >= Max_record+num_read - 1))
103 output(buffer, lasti, print_end, j);
104 }
105 lasti = i - D_length;
106 TRUNCATE = OFF;
107 r2 = r3 = r1 = Init[0];
108 r1 = r_Init1 & r3;
109 r2 = ((( r2 >> 1) & CMask) | r1 ) & D_Mask;
110 }
111 c = buffer[i++];
112 CMask = Mask[c];
113 r1 = r_Init1 & r2;
114 r3 = (( r2 >> 1 ) & CMask) | r1;
115 if ( r3 & endpos ) {
116 j++;
117 if(((AND == 1) && ((r3 & endposition) == endposition)) || ((AND == 0) && (r3 & endposition)) ^ INVERSE )
118 {
119 if(FILENAMEONLY) {
120 num_of_matched++;
121 printf("%s\n", CurrentFileName);
122 return; }
123 print_end = i - D_length - 1;
124 if(!(lasti >= Max_record+num_read - 1))
125 output(buffer, lasti, print_end, j);
126 }
127 lasti = i - D_length ;
128 TRUNCATE = OFF;
129 r2 = r3 = r1 = Init[0];
130 r1 = r_Init1 & r2;
131 r3 = ((( r2 >> 1) & CMask) | r1 ) & D_Mask;
132 }
133 }
134 ResidueSize = num_read + Max_record - lasti;
135 if(ResidueSize > Max_record) {
136 ResidueSize = Max_record;
137 TRUNCATE = ON;
138 }
139 strncpy(buffer+Max_record-ResidueSize, buffer+lasti, ResidueSize);
140 lasti = Max_record - ResidueSize;
141 if(lasti < 0) {
142 lasti = 1;
143 }
144 }
145 return;
146 }
147
fill_buf(fd,buf,record_size)148 int fill_buf(fd, buf, record_size)
149 int fd, record_size; unsigned char *buf;
150 {
151 int num_read=1;
152 int total_read=0;
153 while(total_read < record_size && num_read > 0) {
154 num_read = read(fd, buf+total_read, 4096);
155 total_read = total_read + num_read;
156 }
157 return(total_read);
158 }
159
160