1 /*
2  * Copyright (c) 2005, 2008 Sun Microsystems, Inc. All Rights Reserved.
3  * Use is subject to license terms.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *  http://www.apache.org/licenses/LICENSE-2.0.
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
13  * or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 #include "httpd.h"
19 #include "http_config.h"
20 #include "http_log.h"
21 #include "apr_strings.h"
22 #include "apr_general.h"
23 #include "util_filter.h"
24 #include "apr_buckets.h"
25 #include "http_request.h"
26 #include "libsed.h"
27 
28 static const char *sed_filter_name = "Sed";
29 #define MODSED_OUTBUF_SIZE 8000
30 #define MAX_TRANSIENT_BUCKETS 50
31 
32 typedef struct sed_expr_config
33 {
34     sed_commands_t *sed_cmds;
35     const char *last_error;
36 } sed_expr_config;
37 
38 typedef struct sed_config
39 {
40     sed_expr_config output;
41     sed_expr_config input;
42 } sed_config;
43 
44 /* Context for filter invocation for single HTTP request */
45 typedef struct sed_filter_ctxt
46 {
47     sed_eval_t eval;
48     ap_filter_t *f;
49     request_rec *r;
50     apr_bucket_brigade *bb;
51     apr_bucket_brigade *bbinp;
52     char *outbuf;
53     char *curoutbuf;
54     apr_size_t bufsize;
55     apr_pool_t *tpool;
56     int numbuckets;
57 } sed_filter_ctxt;
58 
59 module AP_MODULE_DECLARE_DATA sed_module;
60 
61 /* This function will be call back from libsed functions if there is any error
62  * happend during execution of sed scripts
63  */
log_sed_errf(void * data,const char * error)64 static apr_status_t log_sed_errf(void *data, const char *error)
65 {
66     request_rec *r = (request_rec *) data;
67     ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(02998) "%s", error);
68     return APR_SUCCESS;
69 }
70 
71 /* This function will be call back from libsed functions if there is any
72  * compilation error.
73  */
sed_compile_errf(void * data,const char * error)74 static apr_status_t sed_compile_errf(void *data, const char *error)
75 {
76     sed_expr_config *sed_cfg = (sed_expr_config *) data;
77     sed_cfg->last_error = error;
78     return APR_SUCCESS;
79 }
80 
81 /* clear the temporary pool (used for transient buckets)
82  */
clear_ctxpool(sed_filter_ctxt * ctx)83 static void clear_ctxpool(sed_filter_ctxt* ctx)
84 {
85     apr_pool_clear(ctx->tpool);
86     ctx->outbuf = NULL;
87     ctx->curoutbuf = NULL;
88     ctx->numbuckets = 0;
89 }
90 
91 /* alloc_outbuf
92  * allocate output buffer
93  */
alloc_outbuf(sed_filter_ctxt * ctx)94 static void alloc_outbuf(sed_filter_ctxt* ctx)
95 {
96     ctx->outbuf = apr_palloc(ctx->tpool, ctx->bufsize + 1);
97     ctx->curoutbuf = ctx->outbuf;
98 }
99 
100 /* append_bucket
101  * Allocate a new bucket from buf and sz and append to ctx->bb
102  */
append_bucket(sed_filter_ctxt * ctx,char * buf,apr_size_t sz)103 static apr_status_t append_bucket(sed_filter_ctxt* ctx, char* buf, apr_size_t sz)
104 {
105     apr_status_t status = APR_SUCCESS;
106     apr_bucket *b;
107     if (ctx->tpool == ctx->r->pool) {
108         /* We are not using transient bucket */
109         b = apr_bucket_pool_create(buf, sz, ctx->r->pool,
110                                    ctx->r->connection->bucket_alloc);
111         APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
112     }
113     else {
114         /* We are using transient bucket */
115         b = apr_bucket_transient_create(buf, sz,
116                                         ctx->r->connection->bucket_alloc);
117         APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
118         ctx->numbuckets++;
119         if (ctx->numbuckets >= MAX_TRANSIENT_BUCKETS) {
120             b = apr_bucket_flush_create(ctx->r->connection->bucket_alloc);
121             APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
122             status = ap_pass_brigade(ctx->f->next, ctx->bb);
123             apr_brigade_cleanup(ctx->bb);
124             clear_ctxpool(ctx);
125         }
126     }
127     return status;
128 }
129 
130 /*
131  * flush_output_buffer
132  * Flush the  output data (stored in ctx->outbuf)
133  */
flush_output_buffer(sed_filter_ctxt * ctx)134 static apr_status_t flush_output_buffer(sed_filter_ctxt *ctx)
135 {
136     apr_size_t size = ctx->curoutbuf - ctx->outbuf;
137     char *out;
138     apr_status_t status = APR_SUCCESS;
139     if ((ctx->outbuf == NULL) || (size <=0))
140         return status;
141     out = apr_pmemdup(ctx->tpool, ctx->outbuf, size);
142     status = append_bucket(ctx, out, size);
143     ctx->curoutbuf = ctx->outbuf;
144     return status;
145 }
146 
147 /* This is a call back function. When libsed wants to generate the output,
148  * this function will be invoked.
149  */
sed_write_output(void * dummy,char * buf,apr_size_t sz)150 static apr_status_t sed_write_output(void *dummy, char *buf, apr_size_t sz)
151 {
152     /* dummy is basically filter context. Context is passed during invocation
153      * of sed_eval_buffer
154      */
155     apr_size_t remainbytes = 0;
156     apr_status_t status = APR_SUCCESS;
157     sed_filter_ctxt *ctx = (sed_filter_ctxt *) dummy;
158     if (ctx->outbuf == NULL) {
159         alloc_outbuf(ctx);
160     }
161     remainbytes = ctx->bufsize - (ctx->curoutbuf - ctx->outbuf);
162     if (sz >= remainbytes) {
163         if (remainbytes > 0) {
164             memcpy(ctx->curoutbuf, buf, remainbytes);
165             buf += remainbytes;
166             sz -= remainbytes;
167             ctx->curoutbuf += remainbytes;
168         }
169         /* buffer is now full */
170         status = append_bucket(ctx, ctx->outbuf, ctx->bufsize);
171         if (status == APR_SUCCESS) {
172             /* if size is bigger than the allocated buffer directly add to output
173              * brigade */
174             if (sz >= ctx->bufsize) {
175                 char* newbuf = apr_pmemdup(ctx->tpool, buf, sz);
176                 status = append_bucket(ctx, newbuf, sz);
177                 if (status == APR_SUCCESS) {
178                     /* old buffer is now used so allocate new buffer */
179                     alloc_outbuf(ctx);
180                 }
181                 else {
182                     clear_ctxpool(ctx);
183                 }
184             }
185             else {
186                 /* old buffer is now used so allocate new buffer */
187                 alloc_outbuf(ctx);
188                 memcpy(ctx->curoutbuf, buf, sz);
189                 ctx->curoutbuf += sz;
190             }
191         }
192         else {
193             clear_ctxpool(ctx);
194         }
195     }
196     else {
197         memcpy(ctx->curoutbuf, buf, sz);
198         ctx->curoutbuf += sz;
199     }
200     return status;
201 }
202 
203 /* Compile a sed expression. Compiled context is saved in sed_cfg->sed_cmds.
204  * Memory required for compilation context is allocated from cmd->pool.
205  */
compile_sed_expr(sed_expr_config * sed_cfg,cmd_parms * cmd,const char * expr)206 static apr_status_t compile_sed_expr(sed_expr_config *sed_cfg,
207                                      cmd_parms *cmd,
208                                      const char *expr)
209 {
210     apr_status_t status = APR_SUCCESS;
211 
212     if (!sed_cfg->sed_cmds) {
213         sed_commands_t *sed_cmds;
214         sed_cmds = apr_pcalloc(cmd->pool, sizeof(sed_commands_t));
215         status = sed_init_commands(sed_cmds, sed_compile_errf, sed_cfg,
216                                    cmd->pool);
217         if (status != APR_SUCCESS) {
218             sed_destroy_commands(sed_cmds);
219             return status;
220         }
221         sed_cfg->sed_cmds = sed_cmds;
222     }
223     status = sed_compile_string(sed_cfg->sed_cmds, expr);
224     if (status != APR_SUCCESS) {
225         sed_destroy_commands(sed_cfg->sed_cmds);
226         sed_cfg->sed_cmds = NULL;
227     }
228     return status;
229 }
230 
231 /* sed eval cleanup function */
sed_eval_cleanup(void * data)232 static apr_status_t sed_eval_cleanup(void *data)
233 {
234     sed_eval_t *eval = (sed_eval_t *) data;
235     sed_destroy_eval(eval);
236     return APR_SUCCESS;
237 }
238 
239 /* Initialize sed filter context. If successful then context is set in f->ctx
240  */
init_context(ap_filter_t * f,sed_expr_config * sed_cfg,int usetpool)241 static apr_status_t init_context(ap_filter_t *f, sed_expr_config *sed_cfg, int usetpool)
242 {
243     apr_status_t status;
244     sed_filter_ctxt* ctx;
245     request_rec *r = f->r;
246     /* Create the context. Call sed_init_eval. libsed will generated
247      * output by calling sed_write_output and generates any error by
248      * invoking log_sed_errf.
249      */
250     ctx = apr_pcalloc(r->pool, sizeof(sed_filter_ctxt));
251     ctx->r = r;
252     ctx->bb = NULL;
253     ctx->numbuckets = 0;
254     ctx->f = f;
255     status = sed_init_eval(&ctx->eval, sed_cfg->sed_cmds, log_sed_errf,
256                            r, &sed_write_output, r->pool);
257     if (status != APR_SUCCESS) {
258         return status;
259     }
260     apr_pool_cleanup_register(r->pool, &ctx->eval, sed_eval_cleanup,
261                               apr_pool_cleanup_null);
262     ctx->bufsize = MODSED_OUTBUF_SIZE;
263     if (usetpool) {
264         apr_pool_create(&(ctx->tpool), r->pool);
265         apr_pool_tag(ctx->tpool, "sed_tpool");
266     }
267     else {
268         ctx->tpool = r->pool;
269     }
270     alloc_outbuf(ctx);
271     f->ctx = ctx;
272     return APR_SUCCESS;
273 }
274 
275 /* Entry function for Sed output filter */
sed_response_filter(ap_filter_t * f,apr_bucket_brigade * bb)276 static apr_status_t sed_response_filter(ap_filter_t *f,
277                                         apr_bucket_brigade *bb)
278 {
279     apr_bucket *b;
280     apr_status_t status;
281     sed_config *cfg = ap_get_module_config(f->r->per_dir_config,
282                                            &sed_module);
283     sed_filter_ctxt *ctx = f->ctx;
284     sed_expr_config *sed_cfg = &cfg->output;
285 
286     if ((sed_cfg == NULL) || (sed_cfg->sed_cmds == NULL)) {
287         /* No sed expressions */
288         ap_remove_output_filter(f);
289         return ap_pass_brigade(f->next, bb);
290     }
291 
292     if (ctx == NULL) {
293 
294         if (APR_BUCKET_IS_EOS(APR_BRIGADE_FIRST(bb))) {
295             /* no need to run sed filter for Head requests */
296             ap_remove_output_filter(f);
297             return ap_pass_brigade(f->next, bb);
298         }
299 
300         status = init_context(f, sed_cfg, 1);
301         if (status != APR_SUCCESS)
302              return status;
303         ctx = f->ctx;
304         apr_table_unset(f->r->headers_out, "Content-Length");
305     }
306 
307     ctx->bb = apr_brigade_create(f->r->pool, f->c->bucket_alloc);
308 
309     /* Here is the main logic. Iterate through all the buckets, read the
310      * content of the bucket, call sed_eval_buffer on the data.
311      * sed_eval_buffer will read the data line by line, run filters on each
312      * line. sed_eval_buffer will generates the output by calling
313      * sed_write_output which will add the output to ctx->bb. At the end of
314      * the loop, ctx->bb is passed to the next filter in chain. At the end of
315      * the data, if new line is not found then sed_eval_buffer will store the
316      * data in its own buffer.
317      *
318      * Once eos bucket is found then sed_finalize_eval will flush the rest of
319      * the data. If there is no new line in last line of data, new line is
320      * appended (that is a solaris sed behavior). libsed's internal memory for
321      * evaluation is allocated on request's pool so it will be cleared once
322      * request is over.
323      *
324      * If flush bucket is found then append the flush bucket to ctx->bb
325      * and pass it to next filter. There may be some data which will still be
326      * in sed's internal buffer which can't be flushed until new line
327      * character is arrived.
328      */
329     for (b = APR_BRIGADE_FIRST(bb); b != APR_BRIGADE_SENTINEL(bb);) {
330         const char *buf = NULL;
331         apr_size_t bytes = 0;
332         if (APR_BUCKET_IS_EOS(b)) {
333             apr_bucket *b1 = APR_BUCKET_NEXT(b);
334             /* Now clean up the internal sed buffer */
335             sed_finalize_eval(&ctx->eval, ctx);
336             status = flush_output_buffer(ctx);
337             if (status != APR_SUCCESS) {
338                 clear_ctxpool(ctx);
339                 return status;
340             }
341             APR_BUCKET_REMOVE(b);
342             /* Insert the eos bucket to ctx->bb brigade */
343             APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
344             b = b1;
345         }
346         else if (APR_BUCKET_IS_FLUSH(b)) {
347             apr_bucket *b1 = APR_BUCKET_NEXT(b);
348             APR_BUCKET_REMOVE(b);
349             status = flush_output_buffer(ctx);
350             if (status != APR_SUCCESS) {
351                 clear_ctxpool(ctx);
352                 return status;
353             }
354             APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
355             b = b1;
356         }
357         else if (APR_BUCKET_IS_METADATA(b)) {
358             b = APR_BUCKET_NEXT(b);
359         }
360         else if (apr_bucket_read(b, &buf, &bytes, APR_BLOCK_READ)
361                  == APR_SUCCESS) {
362             apr_bucket *b1 = APR_BUCKET_NEXT(b);
363             status = sed_eval_buffer(&ctx->eval, buf, bytes, ctx);
364             if (status != APR_SUCCESS) {
365                 clear_ctxpool(ctx);
366                 return status;
367             }
368             APR_BUCKET_REMOVE(b);
369             apr_bucket_delete(b);
370             b = b1;
371         }
372         else {
373             apr_bucket *b1 = APR_BUCKET_NEXT(b);
374             APR_BUCKET_REMOVE(b);
375             b = b1;
376         }
377     }
378     apr_brigade_cleanup(bb);
379     status = flush_output_buffer(ctx);
380     if (status != APR_SUCCESS) {
381         clear_ctxpool(ctx);
382         return status;
383     }
384     if (!APR_BRIGADE_EMPTY(ctx->bb)) {
385         status = ap_pass_brigade(f->next, ctx->bb);
386         apr_brigade_cleanup(ctx->bb);
387     }
388     clear_ctxpool(ctx);
389     return status;
390 }
391 
392 /* Entry function for Sed input filter */
sed_request_filter(ap_filter_t * f,apr_bucket_brigade * bb,ap_input_mode_t mode,apr_read_type_e block,apr_off_t readbytes)393 static apr_status_t sed_request_filter(ap_filter_t *f,
394                                        apr_bucket_brigade *bb,
395                                        ap_input_mode_t mode,
396                                        apr_read_type_e block,
397                                        apr_off_t readbytes)
398 {
399     sed_config *cfg = ap_get_module_config(f->r->per_dir_config,
400                                            &sed_module);
401     sed_filter_ctxt *ctx = f->ctx;
402     apr_status_t status;
403     apr_bucket_brigade *bbinp;
404     sed_expr_config *sed_cfg = &cfg->input;
405 
406     if (mode != AP_MODE_READBYTES) {
407         return ap_get_brigade(f->next, bb, mode, block, readbytes);
408     }
409 
410     if ((sed_cfg == NULL) || (sed_cfg->sed_cmds == NULL)) {
411         /* No sed expression */
412         return ap_get_brigade(f->next, bb, mode, block, readbytes);
413     }
414 
415     if (!ctx) {
416         if (!ap_is_initial_req(f->r)) {
417             ap_remove_input_filter(f);
418             /* XXX : Should we filter the sub requests too */
419             return ap_get_brigade(f->next, bb, mode, block, readbytes);
420         }
421         status = init_context(f, sed_cfg, 0);
422         if (status != APR_SUCCESS)
423              return status;
424         ctx = f->ctx;
425         ctx->bb    = apr_brigade_create(f->r->pool, f->c->bucket_alloc);
426         ctx->bbinp = apr_brigade_create(f->r->pool, f->c->bucket_alloc);
427     }
428 
429     bbinp = ctx->bbinp;
430 
431     /* Here is the logic :
432      * Read the readbytes data from next level fiter into bbinp. Loop through
433      * the buckets in bbinp and read the data from buckets and invoke
434      * sed_eval_buffer on the data. libsed will generate its output using
435      * sed_write_output which will add data in ctx->bb. Do it until it have
436      * atleast one bucket in ctx->bb. At the end of data eos bucket
437      * should be there.
438      *
439      * Once eos bucket is seen, then invoke sed_finalize_eval to clear the
440      * output. If the last byte of data is not a new line character then sed
441      * will add a new line to the data that is default sed behaviour. Note
442      * that using this filter with POST data, caller may not expect this
443      * behaviour.
444      *
445      * If next level fiter generate the flush bucket, we can't do much about
446      * it. If we want to return the flush bucket in brigade bb (to the caller)
447      * the question is where to add it?
448      */
449     while (APR_BRIGADE_EMPTY(ctx->bb)) {
450         apr_bucket *b;
451 
452         /* read the bytes from next level filter */
453         apr_brigade_cleanup(bbinp);
454         status = ap_get_brigade(f->next, bbinp, mode, block, readbytes);
455         if (status != APR_SUCCESS) {
456             return status;
457         }
458         for (b = APR_BRIGADE_FIRST(bbinp); b != APR_BRIGADE_SENTINEL(bbinp);
459              b = APR_BUCKET_NEXT(b)) {
460             const char *buf = NULL;
461             apr_size_t bytes;
462 
463             if (APR_BUCKET_IS_EOS(b)) {
464                 /* eos bucket. Clear the internal sed buffers */
465                 sed_finalize_eval(&ctx->eval, ctx);
466                 flush_output_buffer(ctx);
467                 APR_BUCKET_REMOVE(b);
468                 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
469                 break;
470             }
471             else if (APR_BUCKET_IS_FLUSH(b)) {
472                 /* What should we do with flush bucket */
473                 continue;
474             }
475             if (apr_bucket_read(b, &buf, &bytes, APR_BLOCK_READ)
476                      == APR_SUCCESS) {
477                 status = sed_eval_buffer(&ctx->eval, buf, bytes, ctx);
478                 if (status != APR_SUCCESS)
479                     return status;
480                 flush_output_buffer(ctx);
481             }
482         }
483     }
484 
485     if (!APR_BRIGADE_EMPTY(ctx->bb)) {
486         apr_bucket *b = NULL;
487 
488         if (apr_brigade_partition(ctx->bb, readbytes, &b) == APR_INCOMPLETE) {
489             APR_BRIGADE_CONCAT(bb, ctx->bb);
490         }
491         else {
492             APR_BRIGADE_CONCAT(bb, ctx->bb);
493             apr_brigade_split_ex(bb, b, ctx->bb);
494         }
495     }
496     return APR_SUCCESS;
497 }
498 
sed_add_expr(cmd_parms * cmd,void * cfg,const char * arg)499 static const char *sed_add_expr(cmd_parms *cmd, void *cfg, const char *arg)
500 {
501     int offset = (int) (long) cmd->info;
502     sed_expr_config *sed_cfg =
503                 (sed_expr_config *) (((char *) cfg) + offset);
504     if (compile_sed_expr(sed_cfg, cmd, arg) != APR_SUCCESS) {
505         return apr_psprintf(cmd->temp_pool,
506                             "Failed to compile sed expression. %s",
507                             sed_cfg->last_error);
508     }
509     return NULL;
510 }
511 
create_sed_dir_config(apr_pool_t * p,char * s)512 static void *create_sed_dir_config(apr_pool_t *p, char *s)
513 {
514     sed_config *cfg = apr_pcalloc(p, sizeof(sed_config));
515     return cfg;
516 }
517 
518 static const command_rec sed_filter_cmds[] = {
519     AP_INIT_TAKE1("OutputSed", sed_add_expr,
520                   (void *) APR_OFFSETOF(sed_config, output),
521                   ACCESS_CONF,
522                   "Sed regular expression for Response"),
523     AP_INIT_TAKE1("InputSed", sed_add_expr,
524                   (void *) APR_OFFSETOF(sed_config, input),
525                   ACCESS_CONF,
526                   "Sed regular expression for Request"),
527     {NULL}
528 };
529 
register_hooks(apr_pool_t * p)530 static void register_hooks(apr_pool_t *p)
531 {
532     ap_register_output_filter(sed_filter_name, sed_response_filter, NULL,
533                               AP_FTYPE_RESOURCE);
534     ap_register_input_filter(sed_filter_name, sed_request_filter, NULL,
535                              AP_FTYPE_RESOURCE);
536 }
537 
538 AP_DECLARE_MODULE(sed) = {
539     STANDARD20_MODULE_STUFF,
540     create_sed_dir_config,      /* dir config creater */
541     NULL,                       /* dir merger --- default is to override */
542     NULL,                       /* server config */
543     NULL,                       /* merge server config */
544     sed_filter_cmds,            /* command table */
545     register_hooks              /* register hooks */
546 };
547