1 /*
2 * Copyright (c) 2005, 2008 Sun Microsystems, Inc. All Rights Reserved.
3 * Use is subject to license terms.
4 *
5 * Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T
6 * All Rights Reserved
7 *
8 * University Copyright- Copyright (c) 1982, 1986, 1988
9 * The Regents of the University of California
10 * All Rights Reserved
11 *
12 * University Acknowledgment- Portions of this document are derived from
13 * software developed by the University of California, Berkeley, and its
14 * contributors.
15 *
16 * Licensed under the Apache License, Version 2.0 (the "License");
17 * you may not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 * http://www.apache.org/licenses/LICENSE-2.0.
20 *
21 * Unless required by applicable law or agreed to in writing, software
22 * distributed under the License is distributed on an "AS IS" BASIS,
23 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
24 * or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 /* Code moved from regexp.h */
30
31 #include "apr.h"
32 #include "apr_lib.h"
33 #if APR_HAVE_LIMITS_H
34 #include <limits.h>
35 #endif
36 #if APR_HAVE_STDLIB_H
37 #include <stdlib.h>
38 #endif
39 #include "libsed.h"
40 #include "regexp.h"
41 #include "sed.h"
42
43 #define GETC() ((unsigned char)*sp++)
44 #define PEEKC() ((unsigned char)*sp)
45 #define UNGETC(c) (--sp)
46 #define SEDCOMPILE_ERROR(c) { \
47 regerrno = c; \
48 goto out; \
49 }
50 #define ecmp(s1, s2, n) (strncmp(s1, s2, n) == 0)
51 #define uletter(c) (isalpha(c) || c == '_')
52
53
54 static unsigned char bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
55
56 static int regerr(sed_commands_t *commands, int err);
57 static void comperr(sed_commands_t *commands, char *msg);
58 static void getrnge(char *str, step_vars_storage *vars);
59 static int _advance(char *, char *, step_vars_storage *);
60 extern int sed_step(char *p1, char *p2, int circf, step_vars_storage *vars);
61
62
comperr(sed_commands_t * commands,char * msg)63 static void comperr(sed_commands_t *commands, char *msg)
64 {
65 command_errf(commands, msg, commands->linebuf);
66 }
67
68 /*
69 */
regerr(sed_commands_t * commands,int err)70 static int regerr(sed_commands_t *commands, int err)
71 {
72 switch(err) {
73 case 0:
74 /* No error */
75 break;
76 case 11:
77 comperr(commands, "Range endpoint too large: %s");
78 break;
79
80 case 16:
81 comperr(commands, "Bad number: %s");
82 break;
83
84 case 25:
85 comperr(commands, "``\\digit'' out of range: %s");
86 break;
87
88 case 36:
89 comperr(commands, "Illegal or missing delimiter: %s");
90 break;
91
92 case 41:
93 comperr(commands, "No remembered search string: %s");
94 break;
95
96 case 42:
97 comperr(commands, "\\( \\) imbalance: %s");
98 break;
99
100 case 43:
101 comperr(commands, "Too many \\(: %s");
102 break;
103
104 case 44:
105 comperr(commands, "More than 2 numbers given in \\{ \\}: %s");
106 break;
107
108 case 45:
109 comperr(commands, "} expected after \\: %s");
110 break;
111
112 case 46:
113 comperr(commands, "First number exceeds second in \\{ \\}: %s");
114 break;
115
116 case 49:
117 comperr(commands, "[ ] imbalance: %s");
118 break;
119
120 case 50:
121 comperr(commands, SEDERR_TMMES);
122 break;
123
124 default:
125 comperr(commands, "Unknown regexp error code %s\n");
126 break;
127 }
128 return (0);
129 }
130
131
sed_compile(sed_commands_t * commands,sed_comp_args * compargs,char * ep,char * endbuf,int seof)132 char *sed_compile(sed_commands_t *commands, sed_comp_args *compargs,
133 char *ep, char *endbuf, int seof)
134 {
135 int c;
136 int eof = seof;
137 char *lastep;
138 int cclcnt;
139 char bracket[NBRA], *bracketp;
140 int closed;
141 int neg;
142 int lc;
143 int i, cflg;
144 int iflag; /* used for non-ascii characters in brackets */
145 char *sp = commands->cp;
146 int regerrno = 0;
147
148 lastep = 0;
149 if ((c = GETC()) == eof || c == '\n') {
150 if (c == '\n') {
151 UNGETC(c);
152 }
153 commands->cp = sp;
154 goto out;
155 }
156 bracketp = bracket;
157 compargs->circf = closed = compargs->nbra = 0;
158 if (c == '^')
159 compargs->circf++;
160 else
161 UNGETC(c);
162 while (1) {
163 if (ep >= endbuf)
164 SEDCOMPILE_ERROR(50);
165 c = GETC();
166 if (c != '*' && ((c != '\\') || (PEEKC() != '{')))
167 lastep = ep;
168 if (c == eof) {
169 *ep++ = CCEOF;
170 if (bracketp != bracket)
171 SEDCOMPILE_ERROR(42);
172 commands->cp = sp;
173 goto out;
174 }
175 switch (c) {
176
177 case '.':
178 *ep++ = CDOT;
179 continue;
180
181 case '\n':
182 SEDCOMPILE_ERROR(36);
183 commands->cp = sp;
184 goto out;
185 case '*':
186 if (lastep == 0 || *lastep == CBRA || *lastep == CKET)
187 goto defchar;
188 *lastep |= STAR;
189 continue;
190
191 case '$':
192 if (PEEKC() != eof && PEEKC() != '\n')
193 goto defchar;
194 *ep++ = CDOL;
195 continue;
196
197 case '[':
198 if (&ep[17] >= endbuf)
199 SEDCOMPILE_ERROR(50);
200
201 *ep++ = CCL;
202 lc = 0;
203 for (i = 0; i < 16; i++)
204 ep[i] = 0;
205
206 neg = 0;
207 if ((c = GETC()) == '^') {
208 neg = 1;
209 c = GETC();
210 }
211 iflag = 1;
212 do {
213 c &= 0377;
214 if (c == '\0' || c == '\n')
215 SEDCOMPILE_ERROR(49);
216 if ((c & 0200) && iflag) {
217 iflag = 0;
218 if (&ep[32] >= endbuf)
219 SEDCOMPILE_ERROR(50);
220 ep[-1] = CXCL;
221 for (i = 16; i < 32; i++)
222 ep[i] = 0;
223 }
224 if (c == '-' && lc != 0) {
225 if ((c = GETC()) == ']') {
226 PLACE('-');
227 break;
228 }
229 if ((c & 0200) && iflag) {
230 iflag = 0;
231 if (&ep[32] >= endbuf)
232 SEDCOMPILE_ERROR(50);
233 ep[-1] = CXCL;
234 for (i = 16; i < 32; i++)
235 ep[i] = 0;
236 }
237 while (lc < c) {
238 PLACE(lc);
239 lc++;
240 }
241 }
242 lc = c;
243 PLACE(c);
244 } while ((c = GETC()) != ']');
245
246 if (iflag)
247 iflag = 16;
248 else
249 iflag = 32;
250
251 if (neg) {
252 if (iflag == 32) {
253 for (cclcnt = 0; cclcnt < iflag;
254 cclcnt++)
255 ep[cclcnt] ^= 0377;
256 ep[0] &= 0376;
257 } else {
258 ep[-1] = NCCL;
259 /* make nulls match so test fails */
260 ep[0] |= 01;
261 }
262 }
263
264 ep += iflag;
265
266 continue;
267
268 case '\\':
269 switch (c = GETC()) {
270
271 case '(':
272 if (compargs->nbra >= NBRA)
273 SEDCOMPILE_ERROR(43);
274 *bracketp++ = compargs->nbra;
275 *ep++ = CBRA;
276 *ep++ = compargs->nbra++;
277 continue;
278
279 case ')':
280 if (bracketp <= bracket)
281 SEDCOMPILE_ERROR(42);
282 *ep++ = CKET;
283 *ep++ = *--bracketp;
284 closed++;
285 continue;
286
287 case '{':
288 if (lastep == (char *) 0)
289 goto defchar;
290 *lastep |= RNGE;
291 cflg = 0;
292 nlim:
293 c = GETC();
294 i = 0;
295 do {
296 if ('0' <= c && c <= '9')
297 i = 10 * i + c - '0';
298 else
299 SEDCOMPILE_ERROR(16);
300 } while (((c = GETC()) != '\\') && (c != ','));
301 if (i >= 255)
302 SEDCOMPILE_ERROR(11);
303 *ep++ = i;
304 if (c == ',') {
305 if (cflg++)
306 SEDCOMPILE_ERROR(44);
307 if ((c = GETC()) == '\\')
308 *ep++ = (char) 255;
309 else {
310 UNGETC(c);
311 goto nlim;
312 /* get 2'nd number */
313 }
314 }
315 if (GETC() != '}')
316 SEDCOMPILE_ERROR(45);
317 if (!cflg) /* one number */
318 *ep++ = i;
319 else if ((ep[-1] & 0377) < (ep[-2] & 0377))
320 SEDCOMPILE_ERROR(46);
321 continue;
322
323 case '\n':
324 SEDCOMPILE_ERROR(36);
325
326 case 'n':
327 c = '\n';
328 goto defchar;
329
330 default:
331 if (c >= '1' && c <= '9') {
332 if ((c -= '1') >= closed)
333 SEDCOMPILE_ERROR(25);
334 *ep++ = CBACK;
335 *ep++ = c;
336 continue;
337 }
338 }
339 /* Drop through to default to use \ to turn off special chars */
340
341 defchar:
342 default:
343 lastep = ep;
344 *ep++ = CCHR;
345 *ep++ = c;
346 }
347 }
348 out:
349 if (regerrno) {
350 regerr(commands, regerrno);
351 return (char*) NULL;
352 }
353 /* XXX : Basant : what extra */
354 /* int reglength = (int)(ep - expbuf); */
355 return ep;
356 }
357
sed_step(char * p1,char * p2,int circf,step_vars_storage * vars)358 int sed_step(char *p1, char *p2, int circf, step_vars_storage *vars)
359 {
360 int c;
361
362
363 if (circf) {
364 vars->loc1 = p1;
365 return (_advance(p1, p2, vars));
366 }
367 /* fast check for first character */
368 if (*p2 == CCHR) {
369 c = p2[1];
370 do {
371 if (*p1 != c)
372 continue;
373 if (_advance(p1, p2, vars)) {
374 vars->loc1 = p1;
375 return (1);
376 }
377 } while (*p1++);
378 return (0);
379 }
380 /* regular algorithm */
381 do {
382 if (_advance(p1, p2, vars)) {
383 vars->loc1 = p1;
384 return (1);
385 }
386 } while (*p1++);
387 return (0);
388 }
389
_advance(char * lp,char * ep,step_vars_storage * vars)390 static int _advance(char *lp, char *ep, step_vars_storage *vars)
391 {
392 char *curlp;
393 int c;
394 char *bbeg;
395 char neg;
396 int ct;
397 int epint; /* int value of *ep */
398
399 while (1) {
400 neg = 0;
401 switch (*ep++) {
402
403 case CCHR:
404 if (*ep++ == *lp++)
405 continue;
406 return (0);
407
408 case CDOT:
409 if (*lp++)
410 continue;
411 return (0);
412
413 case CDOL:
414 if (*lp == 0)
415 continue;
416 return (0);
417
418 case CCEOF:
419 vars->loc2 = lp;
420 return (1);
421
422 case CXCL:
423 c = (unsigned char)*lp++;
424 if (ISTHERE(c)) {
425 ep += 32;
426 continue;
427 }
428 return (0);
429
430 case NCCL:
431 neg = 1;
432
433 case CCL:
434 c = *lp++;
435 if (((c & 0200) == 0 && ISTHERE(c)) ^ neg) {
436 ep += 16;
437 continue;
438 }
439 return (0);
440
441 case CBRA:
442 epint = (int) *ep;
443 vars->braslist[epint] = lp;
444 ep++;
445 continue;
446
447 case CKET:
448 epint = (int) *ep;
449 vars->braelist[epint] = lp;
450 ep++;
451 continue;
452
453 case CCHR | RNGE:
454 c = *ep++;
455 getrnge(ep, vars);
456 while (vars->low--)
457 if (*lp++ != c)
458 return (0);
459 curlp = lp;
460 while (vars->size--)
461 if (*lp++ != c)
462 break;
463 if (vars->size < 0)
464 lp++;
465 ep += 2;
466 goto star;
467
468 case CDOT | RNGE:
469 getrnge(ep, vars);
470 while (vars->low--)
471 if (*lp++ == '\0')
472 return (0);
473 curlp = lp;
474 while (vars->size--)
475 if (*lp++ == '\0')
476 break;
477 if (vars->size < 0)
478 lp++;
479 ep += 2;
480 goto star;
481
482 case CXCL | RNGE:
483 getrnge(ep + 32, vars);
484 while (vars->low--) {
485 c = (unsigned char)*lp++;
486 if (!ISTHERE(c))
487 return (0);
488 }
489 curlp = lp;
490 while (vars->size--) {
491 c = (unsigned char)*lp++;
492 if (!ISTHERE(c))
493 break;
494 }
495 if (vars->size < 0)
496 lp++;
497 ep += 34; /* 32 + 2 */
498 goto star;
499
500 case NCCL | RNGE:
501 neg = 1;
502
503 case CCL | RNGE:
504 getrnge(ep + 16, vars);
505 while (vars->low--) {
506 c = *lp++;
507 if (((c & 0200) || !ISTHERE(c)) ^ neg)
508 return (0);
509 }
510 curlp = lp;
511 while (vars->size--) {
512 c = *lp++;
513 if (((c & 0200) || !ISTHERE(c)) ^ neg)
514 break;
515 }
516 if (vars->size < 0)
517 lp++;
518 ep += 18; /* 16 + 2 */
519 goto star;
520
521 case CBACK:
522 epint = (int) *ep;
523 bbeg = vars->braslist[epint];
524 ct = vars->braelist[epint] - bbeg;
525 ep++;
526
527 if (ecmp(bbeg, lp, ct)) {
528 lp += ct;
529 continue;
530 }
531 return (0);
532
533 case CBACK | STAR:
534 epint = (int) *ep;
535 bbeg = vars->braslist[epint];
536 ct = vars->braelist[epint] - bbeg;
537 ep++;
538 curlp = lp;
539 while (ecmp(bbeg, lp, ct))
540 lp += ct;
541
542 while (lp >= curlp) {
543 if (_advance(lp, ep, vars))
544 return (1);
545 lp -= ct;
546 }
547 return (0);
548
549
550 case CDOT | STAR:
551 curlp = lp;
552 while (*lp++);
553 goto star;
554
555 case CCHR | STAR:
556 curlp = lp;
557 while (*lp++ == *ep);
558 ep++;
559 goto star;
560
561 case CXCL | STAR:
562 curlp = lp;
563 do {
564 c = (unsigned char)*lp++;
565 } while (ISTHERE(c));
566 ep += 32;
567 goto star;
568
569 case NCCL | STAR:
570 neg = 1;
571
572 case CCL | STAR:
573 curlp = lp;
574 do {
575 c = *lp++;
576 } while (((c & 0200) == 0 && ISTHERE(c)) ^ neg);
577 ep += 16;
578 goto star;
579
580 star:
581 do {
582 if (--lp == vars->locs)
583 break;
584 if (_advance(lp, ep, vars))
585 return (1);
586 } while (lp > curlp);
587 return (0);
588
589 }
590 }
591 }
592
getrnge(char * str,step_vars_storage * vars)593 static void getrnge(char *str, step_vars_storage *vars)
594 {
595 vars->low = *str++ & 0377;
596 vars->size = ((*str & 0377) == 255)? 20000: (*str &0377) - vars->low;
597 }
598
599
600