1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2  * contributor license agreements.  See the NOTICE file distributed with
3  * this work for additional information regarding copyright ownership.
4  * The ASF licenses this file to You under the Apache License, Version 2.0
5  * (the "License"); you may not use this file except in compliance with
6  * the License.  You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "httpd.h"
18 #include "http_core.h"
19 #include "http_log.h"
20 #include "apr_strings.h"
21 
22 #include <brotli/encode.h>
23 
24 module AP_MODULE_DECLARE_DATA brotli_module;
25 
26 typedef enum {
27     ETAG_MODE_ADDSUFFIX = 0,
28     ETAG_MODE_NOCHANGE = 1,
29     ETAG_MODE_REMOVE = 2
30 } etag_mode_e;
31 
32 typedef struct brotli_server_config_t {
33     int quality;
34     int lgwin;
35     int lgblock;
36     etag_mode_e etag_mode;
37     const char *note_ratio_name;
38     const char *note_input_name;
39     const char *note_output_name;
40 } brotli_server_config_t;
41 
create_server_config(apr_pool_t * p,server_rec * s)42 static void *create_server_config(apr_pool_t *p, server_rec *s)
43 {
44     brotli_server_config_t *conf = apr_pcalloc(p, sizeof(*conf));
45 
46     /* These default values allow mod_brotli to behave similarly to
47      * mod_deflate in terms of compression speed and memory usage.
48      *
49      * The idea is that since Brotli (generally) gives better compression
50      * ratio than Deflate, simply enabling mod_brotli on the server
51      * will reduce the amount of transferred data while keeping everything
52      * else unchanged.  See https://quixdb.github.io/squash-benchmark/
53      */
54     conf->quality = 5;
55     conf->lgwin = 18;
56     /* Zero is a special value for BROTLI_PARAM_LGBLOCK that allows
57      * Brotli to automatically select the optimal input block size based
58      * on other encoder parameters.  See enc/quality.h: ComputeLgBlock().
59      */
60     conf->lgblock = 0;
61     conf->etag_mode = ETAG_MODE_ADDSUFFIX;
62 
63     return conf;
64 }
65 
set_filter_note(cmd_parms * cmd,void * dummy,const char * arg1,const char * arg2)66 static const char *set_filter_note(cmd_parms *cmd, void *dummy,
67                                    const char *arg1, const char *arg2)
68 {
69     brotli_server_config_t *conf =
70         ap_get_module_config(cmd->server->module_config, &brotli_module);
71 
72     if (!arg2) {
73         conf->note_ratio_name = arg1;
74         return NULL;
75     }
76 
77     if (ap_cstr_casecmp(arg1, "Ratio") == 0) {
78         conf->note_ratio_name = arg2;
79     }
80     else if (ap_cstr_casecmp(arg1, "Input") == 0) {
81         conf->note_input_name = arg2;
82     }
83     else if (ap_cstr_casecmp(arg1, "Output") == 0) {
84         conf->note_output_name = arg2;
85     }
86     else {
87         return apr_psprintf(cmd->pool, "Unknown BrotliFilterNote type '%s'",
88                             arg1);
89     }
90 
91     return NULL;
92 }
93 
set_compression_quality(cmd_parms * cmd,void * dummy,const char * arg)94 static const char *set_compression_quality(cmd_parms *cmd, void *dummy,
95                                            const char *arg)
96 {
97     brotli_server_config_t *conf =
98         ap_get_module_config(cmd->server->module_config, &brotli_module);
99     int val = atoi(arg);
100 
101     if (val < 0 || val > 11) {
102         return "BrotliCompressionQuality must be between 0 and 11";
103     }
104 
105     conf->quality = val;
106     return NULL;
107 }
108 
set_compression_lgwin(cmd_parms * cmd,void * dummy,const char * arg)109 static const char *set_compression_lgwin(cmd_parms *cmd, void *dummy,
110                                          const char *arg)
111 {
112     brotli_server_config_t *conf =
113         ap_get_module_config(cmd->server->module_config, &brotli_module);
114     int val = atoi(arg);
115 
116     if (val < 10 || val > 24) {
117         return "BrotliCompressionWindow must be between 10 and 24";
118     }
119 
120     conf->lgwin = val;
121     return NULL;
122 }
123 
set_compression_lgblock(cmd_parms * cmd,void * dummy,const char * arg)124 static const char *set_compression_lgblock(cmd_parms *cmd, void *dummy,
125                                            const char *arg)
126 {
127     brotli_server_config_t *conf =
128         ap_get_module_config(cmd->server->module_config, &brotli_module);
129     int val = atoi(arg);
130 
131     if (val < 16 || val > 24) {
132         return "BrotliCompressionMaxInputBlock must be between 16 and 24";
133     }
134 
135     conf->lgblock = val;
136     return NULL;
137 }
138 
set_etag_mode(cmd_parms * cmd,void * dummy,const char * arg)139 static const char *set_etag_mode(cmd_parms *cmd, void *dummy,
140                                  const char *arg)
141 {
142     brotli_server_config_t *conf =
143         ap_get_module_config(cmd->server->module_config, &brotli_module);
144 
145     if (ap_cstr_casecmp(arg, "AddSuffix") == 0) {
146         conf->etag_mode = ETAG_MODE_ADDSUFFIX;
147     }
148     else if (ap_cstr_casecmp(arg, "NoChange") == 0) {
149         conf->etag_mode = ETAG_MODE_NOCHANGE;
150     }
151     else if (ap_cstr_casecmp(arg, "Remove") == 0) {
152         conf->etag_mode = ETAG_MODE_REMOVE;
153     }
154     else {
155         return "BrotliAlterETag accepts only 'AddSuffix', 'NoChange' and 'Remove'";
156     }
157 
158     return NULL;
159 }
160 
161 typedef struct brotli_ctx_t {
162     BrotliEncoderState *state;
163     apr_bucket_brigade *bb;
164     apr_off_t total_in;
165     apr_off_t total_out;
166 } brotli_ctx_t;
167 
alloc_func(void * opaque,size_t size)168 static void *alloc_func(void *opaque, size_t size)
169 {
170     return apr_bucket_alloc(size, opaque);
171 }
172 
free_func(void * opaque,void * block)173 static void free_func(void *opaque, void *block)
174 {
175     if (block) {
176         apr_bucket_free(block);
177     }
178 }
179 
cleanup_ctx(void * data)180 static apr_status_t cleanup_ctx(void *data)
181 {
182     brotli_ctx_t *ctx = data;
183 
184     BrotliEncoderDestroyInstance(ctx->state);
185     ctx->state = NULL;
186     return APR_SUCCESS;
187 }
188 
create_ctx(int quality,int lgwin,int lgblock,apr_bucket_alloc_t * alloc,apr_pool_t * pool)189 static brotli_ctx_t *create_ctx(int quality,
190                                 int lgwin,
191                                 int lgblock,
192                                 apr_bucket_alloc_t *alloc,
193                                 apr_pool_t *pool)
194 {
195     brotli_ctx_t *ctx = apr_pcalloc(pool, sizeof(*ctx));
196 
197     ctx->state = BrotliEncoderCreateInstance(alloc_func, free_func, alloc);
198     BrotliEncoderSetParameter(ctx->state, BROTLI_PARAM_QUALITY, quality);
199     BrotliEncoderSetParameter(ctx->state, BROTLI_PARAM_LGWIN, lgwin);
200     BrotliEncoderSetParameter(ctx->state, BROTLI_PARAM_LGBLOCK, lgblock);
201     apr_pool_cleanup_register(pool, ctx, cleanup_ctx, apr_pool_cleanup_null);
202 
203     ctx->bb = apr_brigade_create(pool, alloc);
204     ctx->total_in = 0;
205     ctx->total_out = 0;
206 
207     return ctx;
208 }
209 
process_chunk(brotli_ctx_t * ctx,const void * data,apr_size_t len,ap_filter_t * f)210 static apr_status_t process_chunk(brotli_ctx_t *ctx,
211                                   const void *data,
212                                   apr_size_t len,
213                                   ap_filter_t *f)
214 {
215     const apr_byte_t *next_in = data;
216     apr_size_t avail_in = len;
217 
218     while (avail_in > 0) {
219         apr_byte_t *next_out = NULL;
220         apr_size_t avail_out = 0;
221 
222         if (!BrotliEncoderCompressStream(ctx->state,
223                                          BROTLI_OPERATION_PROCESS,
224                                          &avail_in, &next_in,
225                                          &avail_out, &next_out, NULL)) {
226             ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, f->r, APLOGNO(03459)
227                           "Error while compressing data");
228             return APR_EGENERAL;
229         }
230 
231         if (BrotliEncoderHasMoreOutput(ctx->state)) {
232             apr_size_t output_len = 0;
233             const apr_byte_t *output;
234             apr_status_t rv;
235             apr_bucket *b;
236 
237             /* Drain the accumulated output.  Avoid copying the data by
238              * wrapping a pointer to the internal output buffer and passing
239              * it down to the next filter.  The pointer is only valid until
240              * the next call to BrotliEncoderCompressStream(), but we're okay
241              * with that, since the brigade is cleaned up right after the
242              * ap_pass_brigade() call.
243              */
244             output = BrotliEncoderTakeOutput(ctx->state, &output_len);
245             ctx->total_out += output_len;
246 
247             b = apr_bucket_transient_create((const char *)output, output_len,
248                                             ctx->bb->bucket_alloc);
249             APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
250 
251             rv = ap_pass_brigade(f->next, ctx->bb);
252             apr_brigade_cleanup(ctx->bb);
253             if (rv != APR_SUCCESS) {
254                 return rv;
255             }
256         }
257     }
258 
259     ctx->total_in += len;
260     return APR_SUCCESS;
261 }
262 
flush(brotli_ctx_t * ctx,BrotliEncoderOperation op,ap_filter_t * f)263 static apr_status_t flush(brotli_ctx_t *ctx,
264                           BrotliEncoderOperation op,
265                           ap_filter_t *f)
266 {
267     while (1) {
268         const apr_byte_t *next_in = NULL;
269         apr_size_t avail_in = 0;
270         apr_byte_t *next_out = NULL;
271         apr_size_t avail_out = 0;
272         apr_size_t output_len;
273         const apr_byte_t *output;
274         apr_bucket *b;
275 
276         if (!BrotliEncoderCompressStream(ctx->state, op,
277                                          &avail_in, &next_in,
278                                          &avail_out, &next_out, NULL)) {
279             ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, f->r, APLOGNO(03460)
280                           "Error while compressing data");
281             return APR_EGENERAL;
282         }
283 
284         if (!BrotliEncoderHasMoreOutput(ctx->state)) {
285             break;
286         }
287 
288         /* A flush can require several calls to BrotliEncoderCompressStream(),
289          * so place the data on the heap (otherwise, the pointer will become
290          * invalid after the next call to BrotliEncoderCompressStream()).
291          */
292         output_len = 0;
293         output = BrotliEncoderTakeOutput(ctx->state, &output_len);
294         ctx->total_out += output_len;
295 
296         b = apr_bucket_heap_create((const char *)output, output_len, NULL,
297                                    ctx->bb->bucket_alloc);
298         APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
299     }
300 
301     return APR_SUCCESS;
302 }
303 
get_content_encoding(request_rec * r)304 static const char *get_content_encoding(request_rec *r)
305 {
306     const char *encoding;
307 
308     encoding = apr_table_get(r->headers_out, "Content-Encoding");
309     if (encoding) {
310         const char *err_enc;
311 
312         err_enc = apr_table_get(r->err_headers_out, "Content-Encoding");
313         if (err_enc) {
314             encoding = apr_pstrcat(r->pool, encoding, ",", err_enc, NULL);
315         }
316     }
317     else {
318         encoding = apr_table_get(r->err_headers_out, "Content-Encoding");
319     }
320 
321     if (r->content_encoding) {
322         encoding = encoding ? apr_pstrcat(r->pool, encoding, ",",
323                                           r->content_encoding, NULL)
324                             : r->content_encoding;
325     }
326 
327     return encoding;
328 }
329 
compress_filter(ap_filter_t * f,apr_bucket_brigade * bb)330 static apr_status_t compress_filter(ap_filter_t *f, apr_bucket_brigade *bb)
331 {
332     request_rec *r = f->r;
333     brotli_ctx_t *ctx = f->ctx;
334     apr_status_t rv;
335     brotli_server_config_t *conf;
336 
337     if (APR_BRIGADE_EMPTY(bb)) {
338         return APR_SUCCESS;
339     }
340 
341     conf = ap_get_module_config(r->server->module_config, &brotli_module);
342 
343     if (!ctx) {
344         const char *encoding;
345         const char *token;
346         const char *accepts;
347         const char *q = NULL;
348 
349         /* Only work on main request, not subrequests, that are not
350          * a 204 response with no content, and are not tagged with the
351          * no-brotli env variable, and are not a partial response to
352          * a Range request.
353          *
354          * Note that responding to 304 is handled separately to set
355          * the required headers (such as ETag) per RFC7232, 4.1.
356          */
357         if (r->main || r->status == HTTP_NO_CONTENT
358             || apr_table_get(r->subprocess_env, "no-brotli")
359             || apr_table_get(r->headers_out, "Content-Range")) {
360             ap_remove_output_filter(f);
361             return ap_pass_brigade(f->next, bb);
362         }
363 
364         /* Let's see what our current Content-Encoding is. */
365         encoding = get_content_encoding(r);
366 
367         if (encoding) {
368             const char *tmp = encoding;
369 
370             token = ap_get_token(r->pool, &tmp, 0);
371             while (token && *token) {
372                 if (strcmp(token, "identity") != 0 &&
373                     strcmp(token, "7bit") != 0 &&
374                     strcmp(token, "8bit") != 0 &&
375                     strcmp(token, "binary") != 0) {
376                     /* The data is already encoded, do nothing. */
377                     ap_remove_output_filter(f);
378                     return ap_pass_brigade(f->next, bb);
379                 }
380 
381                 if (*tmp) {
382                     ++tmp;
383                 }
384                 token = (*tmp) ? ap_get_token(r->pool, &tmp, 0) : NULL;
385             }
386         }
387 
388         /* Even if we don't accept this request based on it not having
389          * the Accept-Encoding, we need to note that we were looking
390          * for this header and downstream proxies should be aware of
391          * that.
392          */
393         apr_table_mergen(r->headers_out, "Vary", "Accept-Encoding");
394 
395         accepts = apr_table_get(r->headers_in, "Accept-Encoding");
396         if (!accepts) {
397             ap_remove_output_filter(f);
398             return ap_pass_brigade(f->next, bb);
399         }
400 
401         /* Do we have Accept-Encoding: br? */
402         token = ap_get_token(r->pool, &accepts, 0);
403         while (token && token[0] && ap_cstr_casecmp(token, "br") != 0) {
404             while (*accepts == ';') {
405                 ++accepts;
406                 ap_get_token(r->pool, &accepts, 1);
407             }
408 
409             if (*accepts == ',') {
410                 ++accepts;
411             }
412             token = (*accepts) ? ap_get_token(r->pool, &accepts, 0) : NULL;
413         }
414 
415         /* Find the qvalue, if provided */
416         if (*accepts) {
417             while (*accepts == ';') {
418                 ++accepts;
419             }
420             q = ap_get_token(r->pool, &accepts, 1);
421             ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, r,
422                           "token: '%s' - q: '%s'", token ? token : "NULL", q);
423         }
424 
425         /* No acceptable token found or q=0 */
426         if (!token || token[0] == '\0' ||
427             (q && strlen(q) >= 3 && strncmp("q=0.000", q, strlen(q)) == 0)) {
428             ap_remove_output_filter(f);
429             return ap_pass_brigade(f->next, bb);
430         }
431 
432         /* If the entire Content-Encoding is "identity", we can replace it. */
433         if (!encoding || ap_cstr_casecmp(encoding, "identity") == 0) {
434             apr_table_setn(r->headers_out, "Content-Encoding", "br");
435         } else {
436             apr_table_mergen(r->headers_out, "Content-Encoding", "br");
437         }
438 
439         if (r->content_encoding) {
440             r->content_encoding = apr_table_get(r->headers_out,
441                                                 "Content-Encoding");
442         }
443 
444         apr_table_unset(r->headers_out, "Content-Length");
445         apr_table_unset(r->headers_out, "Content-MD5");
446 
447         /* https://bz.apache.org/bugzilla/show_bug.cgi?id=39727
448          * https://bz.apache.org/bugzilla/show_bug.cgi?id=45023
449          *
450          * ETag must be unique among the possible representations, so a
451          * change to content-encoding requires a corresponding change to the
452          * ETag.  We make this behavior configurable, and mimic mod_deflate's
453          * DeflateAlterETag with BrotliAlterETag to keep the transition from
454          * mod_deflate seamless.
455          */
456         if (conf->etag_mode == ETAG_MODE_REMOVE) {
457             apr_table_unset(r->headers_out, "ETag");
458         }
459         else if (conf->etag_mode == ETAG_MODE_ADDSUFFIX) {
460             const char *etag = apr_table_get(r->headers_out, "ETag");
461 
462             if (etag) {
463                 apr_size_t len = strlen(etag);
464 
465                 if (len > 2 && etag[len - 1] == '"') {
466                     etag = apr_pstrmemdup(r->pool, etag, len - 1);
467                     etag = apr_pstrcat(r->pool, etag, "-br\"", NULL);
468                     apr_table_setn(r->headers_out, "ETag", etag);
469                 }
470             }
471         }
472 
473         /* For 304 responses, we only need to send out the headers. */
474         if (r->status == HTTP_NOT_MODIFIED) {
475             ap_remove_output_filter(f);
476             return ap_pass_brigade(f->next, bb);
477         }
478 
479         ctx = create_ctx(conf->quality, conf->lgwin, conf->lgblock,
480                          f->c->bucket_alloc, r->pool);
481         f->ctx = ctx;
482     }
483 
484     while (!APR_BRIGADE_EMPTY(bb)) {
485         apr_bucket *e = APR_BRIGADE_FIRST(bb);
486 
487         /* Optimization: If we are a HEAD request and bytes_sent is not zero
488          * it means that we have passed the content-length filter once and
489          * have more data to send.  This means that the content-length filter
490          * could not determine our content-length for the response to the
491          * HEAD request anyway (the associated GET request would deliver the
492          * body in chunked encoding) and we can stop compressing.
493          */
494         if (r->header_only && r->bytes_sent) {
495             ap_remove_output_filter(f);
496             return ap_pass_brigade(f->next, bb);
497         }
498 
499         if (APR_BUCKET_IS_EOS(e)) {
500             rv = flush(ctx, BROTLI_OPERATION_FINISH, f);
501             if (rv != APR_SUCCESS) {
502                 return rv;
503             }
504 
505             /* Leave notes for logging. */
506             if (conf->note_input_name) {
507                 apr_table_setn(r->notes, conf->note_input_name,
508                                apr_off_t_toa(r->pool, ctx->total_in));
509             }
510             if (conf->note_output_name) {
511                 apr_table_setn(r->notes, conf->note_output_name,
512                                apr_off_t_toa(r->pool, ctx->total_out));
513             }
514             if (conf->note_ratio_name) {
515                 if (ctx->total_in > 0) {
516                     int ratio = (int) (ctx->total_out * 100 / ctx->total_in);
517 
518                     apr_table_setn(r->notes, conf->note_ratio_name,
519                                    apr_itoa(r->pool, ratio));
520                 }
521                 else {
522                     apr_table_setn(r->notes, conf->note_ratio_name, "-");
523                 }
524             }
525 
526             APR_BUCKET_REMOVE(e);
527             APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
528 
529             rv = ap_pass_brigade(f->next, ctx->bb);
530             apr_brigade_cleanup(ctx->bb);
531             apr_pool_cleanup_run(r->pool, ctx, cleanup_ctx);
532             return rv;
533         }
534         else if (APR_BUCKET_IS_FLUSH(e)) {
535             rv = flush(ctx, BROTLI_OPERATION_FLUSH, f);
536             if (rv != APR_SUCCESS) {
537                 return rv;
538             }
539 
540             APR_BUCKET_REMOVE(e);
541             APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
542 
543             rv = ap_pass_brigade(f->next, ctx->bb);
544             apr_brigade_cleanup(ctx->bb);
545             if (rv != APR_SUCCESS) {
546                 return rv;
547             }
548         }
549         else if (APR_BUCKET_IS_METADATA(e)) {
550             APR_BUCKET_REMOVE(e);
551             APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
552         }
553         else {
554             const char *data;
555             apr_size_t len;
556 
557             rv = apr_bucket_read(e, &data, &len, APR_BLOCK_READ);
558             if (rv != APR_SUCCESS) {
559                 return rv;
560             }
561             rv = process_chunk(ctx, data, len, f);
562             if (rv != APR_SUCCESS) {
563                 return rv;
564             }
565             apr_bucket_delete(e);
566         }
567     }
568     return APR_SUCCESS;
569 }
570 
register_hooks(apr_pool_t * p)571 static void register_hooks(apr_pool_t *p)
572 {
573     ap_register_output_filter("BROTLI_COMPRESS", compress_filter, NULL,
574                               AP_FTYPE_CONTENT_SET);
575 }
576 
577 static const command_rec cmds[] = {
578     AP_INIT_TAKE12("BrotliFilterNote", set_filter_note,
579                    NULL, RSRC_CONF,
580                    "Set a note to report on compression ratio"),
581     AP_INIT_TAKE1("BrotliCompressionQuality", set_compression_quality,
582                   NULL, RSRC_CONF,
583                   "Compression quality between 0 and 11 (higher quality means "
584                   "slower compression)"),
585     AP_INIT_TAKE1("BrotliCompressionWindow", set_compression_lgwin,
586                   NULL, RSRC_CONF,
587                   "Sliding window size between 10 and 24 (larger windows can "
588                   "improve compression, but require more memory)"),
589     AP_INIT_TAKE1("BrotliCompressionMaxInputBlock", set_compression_lgblock,
590                   NULL, RSRC_CONF,
591                   "Maximum input block size between 16 and 24 (larger block "
592                   "sizes require more memory)"),
593     AP_INIT_TAKE1("BrotliAlterETag", set_etag_mode,
594                   NULL, RSRC_CONF,
595                   "Set how mod_brotli should modify ETag response headers: "
596                   "'AddSuffix' (default), 'NoChange', 'Remove'"),
597     {NULL}
598 };
599 
600 AP_DECLARE_MODULE(brotli) = {
601     STANDARD20_MODULE_STUFF,
602     NULL,                      /* create per-directory config structure */
603     NULL,                      /* merge per-directory config structures */
604     create_server_config,      /* create per-server config structure */
605     NULL,                      /* merge per-server config structures */
606     cmds,                      /* command apr_table_t */
607     register_hooks             /* register hooks */
608 };
609