1 /*
2  * Copyright (c) 2005, 2008 Sun Microsystems, Inc. All Rights Reserved.
3  * Use is subject to license terms.
4  *
5  *      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T
6  *        All Rights Reserved
7  *
8  * University Copyright- Copyright (c) 1982, 1986, 1988
9  * The Regents of the University of California
10  * All Rights Reserved
11  *
12  * University Acknowledgment- Portions of this document are derived from
13  * software developed by the University of California, Berkeley, and its
14  * contributors.
15  *
16  * Licensed under the Apache License, Version 2.0 (the "License");
17  * you may not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *  http://www.apache.org/licenses/LICENSE-2.0.
20  *
21  * Unless required by applicable law or agreed to in writing, software
22  * distributed under the License is distributed on an "AS IS" BASIS,
23  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
24  * or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 /* Code moved from regexp.h */
30 
31 #include "apr.h"
32 #include "apr_lib.h"
33 #if APR_HAVE_LIMITS_H
34 #include <limits.h>
35 #endif
36 #if APR_HAVE_STDLIB_H
37 #include <stdlib.h>
38 #endif
39 #include "libsed.h"
40 #include "regexp.h"
41 #include "sed.h"
42 
43 #define GETC() ((unsigned char)*sp++)
44 #define PEEKC() ((unsigned char)*sp)
45 #define UNGETC(c) (--sp)
46 #define SEDCOMPILE_ERROR(c) { \
47             regerrno = c; \
48             goto out; \
49             }
50 #define ecmp(s1, s2, n)    (strncmp(s1, s2, n) == 0)
51 #define uletter(c) (isalpha(c) || c == '_')
52 
53 
54 static unsigned char bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
55 
56 static int regerr(sed_commands_t *commands, int err);
57 static void comperr(sed_commands_t *commands, char *msg);
58 static void getrnge(char *str, step_vars_storage *vars);
59 static int _advance(char *, char *, step_vars_storage *);
60 extern int sed_step(char *p1, char *p2, int circf, step_vars_storage *vars);
61 
62 
comperr(sed_commands_t * commands,char * msg)63 static void comperr(sed_commands_t *commands, char *msg)
64 {
65     command_errf(commands, msg, commands->linebuf);
66 }
67 
68 /*
69 */
regerr(sed_commands_t * commands,int err)70 static int regerr(sed_commands_t *commands, int err)
71 {
72     switch(err) {
73     case 0:
74         /* No error */
75         break;
76     case 11:
77         comperr(commands, "Range endpoint too large: %s");
78         break;
79 
80     case 16:
81         comperr(commands, "Bad number: %s");
82         break;
83 
84     case 25:
85         comperr(commands, "``\\digit'' out of range: %s");
86         break;
87 
88     case 36:
89         comperr(commands, "Illegal or missing delimiter: %s");
90         break;
91 
92     case 41:
93         comperr(commands, "No remembered search string: %s");
94         break;
95 
96     case 42:
97         comperr(commands, "\\( \\) imbalance: %s");
98         break;
99 
100     case 43:
101         comperr(commands, "Too many \\(: %s");
102         break;
103 
104     case 44:
105         comperr(commands, "More than 2 numbers given in \\{ \\}: %s");
106         break;
107 
108     case 45:
109         comperr(commands, "} expected after \\: %s");
110         break;
111 
112     case 46:
113         comperr(commands, "First number exceeds second in \\{ \\}: %s");
114         break;
115 
116     case 49:
117         comperr(commands, "[ ] imbalance: %s");
118         break;
119 
120     case 50:
121         comperr(commands, SEDERR_TMMES);
122         break;
123 
124     default:
125         comperr(commands, "Unknown regexp error code %s\n");
126         break;
127     }
128     return (0);
129 }
130 
131 
sed_compile(sed_commands_t * commands,sed_comp_args * compargs,char * ep,char * endbuf,int seof)132 char *sed_compile(sed_commands_t *commands, sed_comp_args *compargs,
133                   char *ep, char *endbuf, int seof)
134 {
135     int c;
136     int eof = seof;
137     char *lastep;
138     int cclcnt;
139     char bracket[NBRA], *bracketp;
140     int closed;
141     int neg;
142     int lc;
143     int i, cflg;
144     int iflag; /* used for non-ascii characters in brackets */
145     char *sp = commands->cp;
146     int regerrno = 0;
147 
148     lastep = 0;
149     if ((c = GETC()) == eof || c == '\n') {
150         if (c == '\n') {
151             UNGETC(c);
152         }
153         commands->cp = sp;
154         goto out;
155     }
156     bracketp = bracket;
157     compargs->circf = closed = compargs->nbra = 0;
158     if (c == '^')
159         compargs->circf++;
160     else
161         UNGETC(c);
162     while (1) {
163         if (ep >= endbuf)
164             SEDCOMPILE_ERROR(50);
165         c = GETC();
166         if (c != '*' && ((c != '\\') || (PEEKC() != '{')))
167             lastep = ep;
168         if (c == eof) {
169             *ep++ = CCEOF;
170             if (bracketp != bracket)
171                 SEDCOMPILE_ERROR(42);
172             commands->cp = sp;
173             goto out;
174         }
175         switch (c) {
176 
177         case '.':
178             *ep++ = CDOT;
179             continue;
180 
181         case '\n':
182             SEDCOMPILE_ERROR(36);
183             commands->cp = sp;
184             goto out;
185         case '*':
186             if (lastep == 0 || *lastep == CBRA || *lastep == CKET)
187                 goto defchar;
188             *lastep |= STAR;
189             continue;
190 
191         case '$':
192             if (PEEKC() != eof && PEEKC() != '\n')
193                 goto defchar;
194             *ep++ = CDOL;
195             continue;
196 
197         case '[':
198             if (&ep[17] >= endbuf)
199                 SEDCOMPILE_ERROR(50);
200 
201             *ep++ = CCL;
202             lc = 0;
203             for (i = 0; i < 16; i++)
204                 ep[i] = 0;
205 
206             neg = 0;
207             if ((c = GETC()) == '^') {
208                 neg = 1;
209                 c = GETC();
210             }
211             iflag = 1;
212             do {
213                 c &= 0377;
214                 if (c == '\0' || c == '\n')
215                     SEDCOMPILE_ERROR(49);
216                 if ((c & 0200) && iflag) {
217                     iflag = 0;
218                     if (&ep[32] >= endbuf)
219                         SEDCOMPILE_ERROR(50);
220                     ep[-1] = CXCL;
221                     for (i = 16; i < 32; i++)
222                         ep[i] = 0;
223                 }
224                 if (c == '-' && lc != 0) {
225                     if ((c = GETC()) == ']') {
226                         PLACE('-');
227                         break;
228                     }
229                     if ((c & 0200) && iflag) {
230                         iflag = 0;
231                         if (&ep[32] >= endbuf)
232                             SEDCOMPILE_ERROR(50);
233                         ep[-1] = CXCL;
234                         for (i = 16; i < 32; i++)
235                             ep[i] = 0;
236                     }
237                     while (lc < c) {
238                         PLACE(lc);
239                         lc++;
240                     }
241                 }
242                 lc = c;
243                 PLACE(c);
244             } while ((c = GETC()) != ']');
245 
246             if (iflag)
247                 iflag = 16;
248             else
249                 iflag = 32;
250 
251             if (neg) {
252                 if (iflag == 32) {
253                     for (cclcnt = 0; cclcnt < iflag;
254                         cclcnt++)
255                         ep[cclcnt] ^= 0377;
256                     ep[0] &= 0376;
257                 } else {
258                     ep[-1] = NCCL;
259                     /* make nulls match so test fails */
260                     ep[0] |= 01;
261                 }
262             }
263 
264             ep += iflag;
265 
266             continue;
267 
268         case '\\':
269             switch (c = GETC()) {
270 
271             case '(':
272                 if (compargs->nbra >= NBRA)
273                     SEDCOMPILE_ERROR(43);
274                 *bracketp++ = compargs->nbra;
275                 *ep++ = CBRA;
276                 *ep++ = compargs->nbra++;
277                 continue;
278 
279             case ')':
280                 if (bracketp <= bracket)
281                     SEDCOMPILE_ERROR(42);
282                 *ep++ = CKET;
283                 *ep++ = *--bracketp;
284                 closed++;
285                 continue;
286 
287             case '{':
288                 if (lastep == (char *) 0)
289                     goto defchar;
290                 *lastep |= RNGE;
291                 cflg = 0;
292             nlim:
293                 c = GETC();
294                 i = 0;
295                 do {
296                     if ('0' <= c && c <= '9')
297                         i = 10 * i + c - '0';
298                     else
299                         SEDCOMPILE_ERROR(16);
300                 } while (((c = GETC()) != '\\') && (c != ','));
301                 if (i >= 255)
302                     SEDCOMPILE_ERROR(11);
303                 *ep++ = i;
304                 if (c == ',') {
305                     if (cflg++)
306                         SEDCOMPILE_ERROR(44);
307                     if ((c = GETC()) == '\\')
308                         *ep++ = (char) 255;
309                     else {
310                         UNGETC(c);
311                         goto nlim;
312                         /* get 2'nd number */
313                     }
314                 }
315                 if (GETC() != '}')
316                     SEDCOMPILE_ERROR(45);
317                 if (!cflg)    /* one number */
318                     *ep++ = i;
319                 else if ((ep[-1] & 0377) < (ep[-2] & 0377))
320                     SEDCOMPILE_ERROR(46);
321                 continue;
322 
323             case '\n':
324                 SEDCOMPILE_ERROR(36);
325 
326             case 'n':
327                 c = '\n';
328                 goto defchar;
329 
330             default:
331                 if (c >= '1' && c <= '9') {
332                     if ((c -= '1') >= closed)
333                         SEDCOMPILE_ERROR(25);
334                     *ep++ = CBACK;
335                     *ep++ = c;
336                     continue;
337                 }
338             }
339     /* Drop through to default to use \ to turn off special chars */
340 
341         defchar:
342         default:
343             lastep = ep;
344             *ep++ = CCHR;
345             *ep++ = c;
346         }
347     }
348 out:
349     if (regerrno) {
350         regerr(commands, regerrno);
351         return (char*) NULL;
352     }
353     /* XXX : Basant : what extra */
354     /* int reglength = (int)(ep - expbuf); */
355     return ep;
356 }
357 
sed_step(char * p1,char * p2,int circf,step_vars_storage * vars)358 int sed_step(char *p1, char *p2, int circf, step_vars_storage *vars)
359 {
360     int c;
361 
362 
363     if (circf) {
364         vars->loc1 = p1;
365         return (_advance(p1, p2, vars));
366     }
367     /* fast check for first character */
368     if (*p2 == CCHR) {
369         c = p2[1];
370         do {
371             if (*p1 != c)
372                 continue;
373             if (_advance(p1, p2, vars)) {
374                 vars->loc1 = p1;
375                 return (1);
376             }
377         } while (*p1++);
378         return (0);
379     }
380         /* regular algorithm */
381     do {
382         if (_advance(p1, p2, vars)) {
383             vars->loc1 = p1;
384             return (1);
385         }
386     } while (*p1++);
387     return (0);
388 }
389 
_advance(char * lp,char * ep,step_vars_storage * vars)390 static int _advance(char *lp, char *ep, step_vars_storage *vars)
391 {
392     char *curlp;
393     int c;
394     char *bbeg;
395     char neg;
396     int ct;
397     int epint; /* int value of *ep */
398 
399     while (1) {
400         neg = 0;
401         switch (*ep++) {
402 
403         case CCHR:
404             if (*ep++ == *lp++)
405                 continue;
406             return (0);
407 
408         case CDOT:
409             if (*lp++)
410                 continue;
411             return (0);
412 
413         case CDOL:
414             if (*lp == 0)
415                 continue;
416             return (0);
417 
418         case CCEOF:
419             vars->loc2 = lp;
420             return (1);
421 
422         case CXCL:
423             c = (unsigned char)*lp++;
424             if (ISTHERE(c)) {
425                 ep += 32;
426                 continue;
427             }
428             return (0);
429 
430         case NCCL:
431             neg = 1;
432 
433         case CCL:
434             c = *lp++;
435             if (((c & 0200) == 0 && ISTHERE(c)) ^ neg) {
436                 ep += 16;
437                 continue;
438             }
439             return (0);
440 
441         case CBRA:
442             epint = (int) *ep;
443             vars->braslist[epint] = lp;
444             ep++;
445             continue;
446 
447         case CKET:
448             epint = (int) *ep;
449             vars->braelist[epint] = lp;
450             ep++;
451             continue;
452 
453         case CCHR | RNGE:
454             c = *ep++;
455             getrnge(ep, vars);
456             while (vars->low--)
457                 if (*lp++ != c)
458                     return (0);
459             curlp = lp;
460             while (vars->size--)
461                 if (*lp++ != c)
462                     break;
463             if (vars->size < 0)
464                 lp++;
465             ep += 2;
466             goto star;
467 
468         case CDOT | RNGE:
469             getrnge(ep, vars);
470             while (vars->low--)
471                 if (*lp++ == '\0')
472                     return (0);
473             curlp = lp;
474             while (vars->size--)
475                 if (*lp++ == '\0')
476                     break;
477             if (vars->size < 0)
478                 lp++;
479             ep += 2;
480             goto star;
481 
482         case CXCL | RNGE:
483             getrnge(ep + 32, vars);
484             while (vars->low--) {
485                 c = (unsigned char)*lp++;
486                 if (!ISTHERE(c))
487                     return (0);
488             }
489             curlp = lp;
490             while (vars->size--) {
491                 c = (unsigned char)*lp++;
492                 if (!ISTHERE(c))
493                     break;
494             }
495             if (vars->size < 0)
496                 lp++;
497             ep += 34;        /* 32 + 2 */
498             goto star;
499 
500         case NCCL | RNGE:
501             neg = 1;
502 
503         case CCL | RNGE:
504             getrnge(ep + 16, vars);
505             while (vars->low--) {
506                 c = *lp++;
507                 if (((c & 0200) || !ISTHERE(c)) ^ neg)
508                     return (0);
509             }
510             curlp = lp;
511             while (vars->size--) {
512                 c = *lp++;
513                 if (((c & 0200) || !ISTHERE(c)) ^ neg)
514                     break;
515             }
516             if (vars->size < 0)
517                 lp++;
518             ep += 18;         /* 16 + 2 */
519             goto star;
520 
521         case CBACK:
522             epint = (int) *ep;
523             bbeg = vars->braslist[epint];
524             ct = vars->braelist[epint] - bbeg;
525             ep++;
526 
527             if (ecmp(bbeg, lp, ct)) {
528                 lp += ct;
529                 continue;
530             }
531             return (0);
532 
533         case CBACK | STAR:
534             epint = (int) *ep;
535             bbeg = vars->braslist[epint];
536             ct = vars->braelist[epint] - bbeg;
537             ep++;
538             curlp = lp;
539             while (ecmp(bbeg, lp, ct))
540                 lp += ct;
541 
542             while (lp >= curlp) {
543                 if (_advance(lp, ep, vars))
544                     return (1);
545                 lp -= ct;
546             }
547             return (0);
548 
549 
550         case CDOT | STAR:
551             curlp = lp;
552             while (*lp++);
553             goto star;
554 
555         case CCHR | STAR:
556             curlp = lp;
557             while (*lp++ == *ep);
558             ep++;
559             goto star;
560 
561         case CXCL | STAR:
562             curlp = lp;
563             do {
564                 c = (unsigned char)*lp++;
565             } while (ISTHERE(c));
566             ep += 32;
567             goto star;
568 
569         case NCCL | STAR:
570             neg = 1;
571 
572         case CCL | STAR:
573             curlp = lp;
574             do {
575                 c = *lp++;
576             } while (((c & 0200) == 0 && ISTHERE(c)) ^ neg);
577             ep += 16;
578             goto star;
579 
580         star:
581             do {
582                 if (--lp == vars->locs)
583                     break;
584                 if (_advance(lp, ep, vars))
585                     return (1);
586             } while (lp > curlp);
587             return (0);
588 
589         }
590     }
591 }
592 
getrnge(char * str,step_vars_storage * vars)593 static void getrnge(char *str, step_vars_storage *vars)
594 {
595     vars->low = *str++ & 0377;
596     vars->size = ((*str & 0377) == 255)? 20000: (*str &0377) - vars->low;
597 }
598 
599 
600