1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2  * contributor license agreements.  See the NOTICE file distributed with
3  * this work for additional information regarding copyright ownership.
4  * The ASF licenses this file to You under the Apache License, Version 2.0
5  * (the "License"); you may not use this file except in compliance with
6  * the License.  You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "apr.h"
18 #include "apr_lib.h"
19 #include "apr_pools.h"
20 #include "apr_strings.h"
21 #include "ap_config.h"
22 #include "ap_regex.h"
23 #include "httpd.h"
24 
rxplus_cleanup(void * preg)25 static apr_status_t rxplus_cleanup(void *preg)
26 {
27     ap_regfree((ap_regex_t *) preg);
28     return APR_SUCCESS;
29 }
30 
ap_rxplus_compile(apr_pool_t * pool,const char * pattern)31 AP_DECLARE(ap_rxplus_t*) ap_rxplus_compile(apr_pool_t *pool,
32                                            const char *pattern)
33 {
34     /* perl style patterns
35      * add support for more as and when wanted
36      * substitute: s/rx/subs/
37      * match: m/rx/ or just /rx/
38      */
39 
40     /* allow any nonalnum delimiter as first or second char.
41      * If we ever use this with non-string pattern we'll need an extra check
42      */
43     const char *endp = 0;
44     const char *str = pattern;
45     const char *rxstr;
46     ap_rxplus_t *ret = apr_pcalloc(pool, sizeof(ap_rxplus_t));
47     char delim = 0;
48     enum { SUBSTITUTE = 's', MATCH = 'm'} action = MATCH;
49 
50     if (!apr_isalnum(pattern[0])) {
51         delim = *str++;
52     }
53     else if (pattern[0] == 's' && !apr_isalnum(pattern[1])) {
54         action = SUBSTITUTE;
55         delim = pattern[1];
56         str += 2;
57     }
58     else if (pattern[0] == 'm' && !apr_isalnum(pattern[1])) {
59         delim = pattern[1];
60         str += 2;
61     }
62     /* TODO: support perl's after/before */
63     /* FIXME: fix these simplminded delims */
64 
65     /* we think there's a delimiter.  Allow for it not to be if unmatched */
66     if (delim) {
67         endp = ap_strchr_c(str, delim);
68     }
69     if (!endp) { /* there's no delim or flags */
70         if (ap_regcomp(&ret->rx, pattern, 0) == 0) {
71             apr_pool_cleanup_register(pool, &ret->rx, rxplus_cleanup,
72                                       apr_pool_cleanup_null);
73             return ret;
74         }
75         else {
76             return NULL;
77         }
78     }
79 
80     /* We have a delimiter.  Use it to extract the regexp */
81     rxstr = apr_pstrmemdup(pool, str, endp-str);
82 
83     /* If it's a substitution, we need the replacement string
84      * TODO: possible future enhancement - support other parsing
85      * in the replacement string.
86      */
87     if (action == SUBSTITUTE) {
88         str = endp+1;
89         if (!*str || (endp = ap_strchr_c(str, delim), !endp)) {
90             /* missing replacement string is an error */
91             return NULL;
92         }
93         ret->subs = apr_pstrmemdup(pool, str, endp-str);
94     }
95 
96     /* anything after the current delimiter is flags */
97     ret->flags = ap_regcomp_get_default_cflags() & AP_REG_DOLLAR_ENDONLY;
98     while (*++endp) {
99         switch (*endp) {
100         case 'i': ret->flags |= AP_REG_ICASE; break;
101         case 'm': ret->flags |= AP_REG_NEWLINE; break;
102         case 'n': ret->flags |= AP_REG_NOMEM; break;
103         case 'g': ret->flags |= AP_REG_MULTI; break;
104         case 's': ret->flags |= AP_REG_DOTALL; break;
105         case '^': ret->flags |= AP_REG_NOTBOL; break;
106         case '$': ret->flags |= AP_REG_NOTEOL; break;
107         default: break; /* we should probably be stricter here */
108         }
109     }
110     if (ap_regcomp(&ret->rx, rxstr, AP_REG_NO_DEFAULT | ret->flags) == 0) {
111         apr_pool_cleanup_register(pool, &ret->rx, rxplus_cleanup,
112                                   apr_pool_cleanup_null);
113     }
114     else {
115         return NULL;
116     }
117     if (!(ret->flags & AP_REG_NOMEM)) {
118         /* count size of memory required, starting at 1 for the whole-match
119          * Simpleminded should be fine 'cos regcomp already checked syntax
120          */
121         ret->nmatch = 1;
122         while (*rxstr) {
123             switch (*rxstr++) {
124             case '\\':  /* next char is escaped - skip it */
125                 if (*rxstr != 0) {
126                     ++rxstr;
127                 }
128                 break;
129             case '(':   /* unescaped bracket implies memory */
130                 ++ret->nmatch;
131                 break;
132             default:
133                 break;
134             }
135         }
136         ret->pmatch = apr_palloc(pool, ret->nmatch*sizeof(ap_regmatch_t));
137     }
138     return ret;
139 }
140 
ap_rxplus_exec(apr_pool_t * pool,ap_rxplus_t * rx,const char * pattern,char ** newpattern)141 AP_DECLARE(int) ap_rxplus_exec(apr_pool_t *pool, ap_rxplus_t *rx,
142                                const char *pattern, char **newpattern)
143 {
144     int ret = 1;
145     int startl, oldl, newl, diffsz;
146     const char *remainder;
147     char *subs;
148 /* snrf process_regexp from mod_headers */
149     if (ap_regexec(&rx->rx, pattern, rx->nmatch, rx->pmatch, rx->flags) != 0) {
150         rx->match = NULL;
151         return 0; /* no match, nothing to do */
152     }
153     rx->match = pattern;
154     if (rx->subs) {
155         *newpattern = ap_pregsub(pool, rx->subs, pattern,
156                                  rx->nmatch, rx->pmatch);
157         if (!*newpattern) {
158             return 0; /* FIXME - should we do more to handle error? */
159         }
160         startl = rx->pmatch[0].rm_so;
161         oldl = rx->pmatch[0].rm_eo - startl;
162         newl = strlen(*newpattern);
163         diffsz = newl - oldl;
164         remainder = pattern + startl + oldl;
165         if (rx->flags & AP_REG_MULTI) {
166             /* recurse to do any further matches */
167             ret += ap_rxplus_exec(pool, rx, remainder, &subs);
168             if (ret > 1) {
169                 /* a further substitution happened */
170                 diffsz += strlen(subs) - strlen(remainder);
171                 remainder = subs;
172             }
173         }
174         subs  = apr_palloc(pool, strlen(pattern) + 1 + diffsz);
175         memcpy(subs, pattern, startl);
176         memcpy(subs+startl, *newpattern, newl);
177         strcpy(subs+startl+newl, remainder);
178         *newpattern = subs;
179     }
180     return ret;
181 }
182 #ifdef DOXYGEN
ap_rxplus_nmatch(ap_rxplus_t * rx)183 AP_DECLARE(int) ap_rxplus_nmatch(ap_rxplus_t *rx)
184 {
185     return (rx->match != NULL) ? rx->nmatch : 0;
186 }
187 #endif
188 
189 /* If this blows up on you, see the notes in the header/apidoc
190  * rx->match is a pointer and it's your responsibility to ensure
191  * it hasn't gone out-of-scope since the last ap_rxplus_exec
192  */
ap_rxplus_match(ap_rxplus_t * rx,int n,int * len,const char ** match)193 AP_DECLARE(void) ap_rxplus_match(ap_rxplus_t *rx, int n, int *len,
194                                  const char **match)
195 {
196     if (n >= 0 && n < ap_rxplus_nmatch(rx)) {
197         *match = rx->match + rx->pmatch[n].rm_so;
198         *len = rx->pmatch[n].rm_eo - rx->pmatch[n].rm_so;
199     }
200     else {
201         *len = -1;
202         *match = NULL;
203     }
204 }
ap_rxplus_pmatch(apr_pool_t * pool,ap_rxplus_t * rx,int n)205 AP_DECLARE(char*) ap_rxplus_pmatch(apr_pool_t *pool, ap_rxplus_t *rx, int n)
206 {
207     int len;
208     const char *match;
209     ap_rxplus_match(rx, n, &len, &match);
210     return apr_pstrndup(pool, match, len);
211 }
212