1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2  * contributor license agreements.  See the NOTICE file distributed with
3  * this work for additional information regarding copyright ownership.
4  * The ASF licenses this file to You under the Apache License, Version 2.0
5  * (the "License"); you may not use this file except in compliance with
6  * the License.  You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <assert.h>
18 #include <apr_strings.h>
19 #include <ap_mpm.h>
20 #include <mpm_common.h>
21 
22 #include <httpd.h>
23 #include <http_core.h>
24 #include <http_log.h>
25 #include <http_connection.h>
26 #include <http_protocol.h>
27 #include <http_request.h>
28 #include <http_ssl.h>
29 
30 #include "h2_private.h"
31 #include "h2_bucket_eos.h"
32 #include "h2_config.h"
33 #include "h2_c1.h"
34 #include "h2_c1_io.h"
35 #include "h2_protocol.h"
36 #include "h2_session.h"
37 #include "h2_util.h"
38 
39 #define TLS_DATA_MAX          (16*1024)
40 
41 /* Calculated like this: assuming MTU 1500 bytes
42  * 1500 - 40 (IP) - 20 (TCP) - 40 (TCP options)
43  *      - TLS overhead (60-100)
44  * ~= 1300 bytes */
45 #define WRITE_SIZE_INITIAL    1300
46 
47 /* The maximum we'd like to write in one chunk is
48  * the max size of a TLS record. When pushing
49  * many frames down the h2 connection, this might
50  * align differently because of headers and other
51  * frames or simply as not sufficient data is
52  * in a response body.
53  * However keeping frames at or below this limit
54  * should make optimizations at the layer that writes
55  * to TLS easier.
56  */
57 #define WRITE_SIZE_MAX        (TLS_DATA_MAX)
58 
59 #define BUF_REMAIN            ((apr_size_t)(bmax-off))
60 
h2_c1_io_bb_log(conn_rec * c,int stream_id,int level,const char * tag,apr_bucket_brigade * bb)61 static void h2_c1_io_bb_log(conn_rec *c, int stream_id, int level,
62                             const char *tag, apr_bucket_brigade *bb)
63 {
64     char buffer[16 * 1024];
65     const char *line = "(null)";
66     int bmax = sizeof(buffer)/sizeof(buffer[0]);
67     int off = 0;
68     apr_bucket *b;
69 
70     (void)stream_id;
71     if (bb) {
72         memset(buffer, 0, bmax--);
73         for (b = APR_BRIGADE_FIRST(bb);
74              bmax && (b != APR_BRIGADE_SENTINEL(bb));
75              b = APR_BUCKET_NEXT(b)) {
76 
77             if (APR_BUCKET_IS_METADATA(b)) {
78                 if (APR_BUCKET_IS_EOS(b)) {
79                     off += apr_snprintf(buffer+off, BUF_REMAIN, "eos ");
80                 }
81                 else if (APR_BUCKET_IS_FLUSH(b)) {
82                     off += apr_snprintf(buffer+off, BUF_REMAIN, "flush ");
83                 }
84                 else if (AP_BUCKET_IS_EOR(b)) {
85                     off += apr_snprintf(buffer+off, BUF_REMAIN, "eor ");
86                 }
87                 else if (H2_BUCKET_IS_H2EOS(b)) {
88                     off += apr_snprintf(buffer+off, BUF_REMAIN, "h2eos ");
89                 }
90                 else {
91                     off += apr_snprintf(buffer+off, BUF_REMAIN, "meta(unknown) ");
92                 }
93             }
94             else {
95                 const char *btype = "data";
96                 if (APR_BUCKET_IS_FILE(b)) {
97                     btype = "file";
98                 }
99                 else if (APR_BUCKET_IS_PIPE(b)) {
100                     btype = "pipe";
101                 }
102                 else if (APR_BUCKET_IS_SOCKET(b)) {
103                     btype = "socket";
104                 }
105                 else if (APR_BUCKET_IS_HEAP(b)) {
106                     btype = "heap";
107                 }
108                 else if (APR_BUCKET_IS_TRANSIENT(b)) {
109                     btype = "transient";
110                 }
111                 else if (APR_BUCKET_IS_IMMORTAL(b)) {
112                     btype = "immortal";
113                 }
114 #if APR_HAS_MMAP
115                 else if (APR_BUCKET_IS_MMAP(b)) {
116                     btype = "mmap";
117                 }
118 #endif
119                 else if (APR_BUCKET_IS_POOL(b)) {
120                     btype = "pool";
121                 }
122 
123                 off += apr_snprintf(buffer+off, BUF_REMAIN, "%s[%ld] ",
124                                     btype,
125                                     (long)(b->length == ((apr_size_t)-1)? -1UL : b->length));
126             }
127         }
128         line = *buffer? buffer : "(empty)";
129     }
130     /* Intentional no APLOGNO */
131     ap_log_cerror(APLOG_MARK, level, 0, c, "h2_session(%ld)-%s: %s",
132                   c->id, tag, line);
133 
134 }
135 #define C1_IO_BB_LOG(c, stream_id, level, tag, bb) \
136     if (APLOG_C_IS_LEVEL(c, level)) { \
137         h2_c1_io_bb_log((c), (stream_id), (level), (tag), (bb)); \
138     }
139 
140 
h2_c1_io_init(h2_c1_io * io,h2_session * session)141 apr_status_t h2_c1_io_init(h2_c1_io *io, h2_session *session)
142 {
143     conn_rec *c = session->c1;
144 
145     io->session = session;
146     io->output = apr_brigade_create(c->pool, c->bucket_alloc);
147     io->is_tls = ap_ssl_conn_is_ssl(session->c1);
148     io->buffer_output  = io->is_tls;
149     io->flush_threshold = 4 * (apr_size_t)h2_config_sgeti64(session->s, H2_CONF_STREAM_MAX_MEM);
150 
151     if (io->buffer_output) {
152         /* This is what we start with,
153          * see https://issues.apache.org/jira/browse/TS-2503
154          */
155         io->warmup_size = h2_config_sgeti64(session->s, H2_CONF_TLS_WARMUP_SIZE);
156         io->cooldown_usecs = (h2_config_sgeti(session->s, H2_CONF_TLS_COOLDOWN_SECS)
157                               * APR_USEC_PER_SEC);
158         io->cooldown_usecs = 0;
159         io->write_size = (io->cooldown_usecs > 0?
160                           WRITE_SIZE_INITIAL : WRITE_SIZE_MAX);
161     }
162     else {
163         io->warmup_size = 0;
164         io->cooldown_usecs = 0;
165         io->write_size = 0;
166     }
167 
168     if (APLOGctrace1(c)) {
169         ap_log_cerror(APLOG_MARK, APLOG_TRACE4, 0, c,
170                       "h2_c1_io(%ld): init, buffering=%d, warmup_size=%ld, "
171                       "cd_secs=%f", c->id, io->buffer_output,
172                       (long)io->warmup_size,
173                       ((double)io->cooldown_usecs/APR_USEC_PER_SEC));
174     }
175 
176     return APR_SUCCESS;
177 }
178 
append_scratch(h2_c1_io * io)179 static void append_scratch(h2_c1_io *io)
180 {
181     if (io->scratch && io->slen > 0) {
182         apr_bucket *b = apr_bucket_heap_create(io->scratch, io->slen,
183                                                apr_bucket_free,
184                                                io->session->c1->bucket_alloc);
185         APR_BRIGADE_INSERT_TAIL(io->output, b);
186         io->buffered_len += io->slen;
187         io->scratch = NULL;
188         io->slen = io->ssize = 0;
189     }
190 }
191 
assure_scratch_space(h2_c1_io * io)192 static apr_size_t assure_scratch_space(h2_c1_io *io) {
193     apr_size_t remain = io->ssize - io->slen;
194     if (io->scratch && remain == 0) {
195         append_scratch(io);
196     }
197     if (!io->scratch) {
198         /* we control the size and it is larger than what buckets usually
199          * allocate. */
200         io->scratch = apr_bucket_alloc(io->write_size, io->session->c1->bucket_alloc);
201         io->ssize = io->write_size;
202         io->slen = 0;
203         remain = io->ssize;
204     }
205     return remain;
206 }
207 
read_to_scratch(h2_c1_io * io,apr_bucket * b)208 static apr_status_t read_to_scratch(h2_c1_io *io, apr_bucket *b)
209 {
210     apr_status_t status;
211     const char *data;
212     apr_size_t len;
213 
214     if (!b->length) {
215         return APR_SUCCESS;
216     }
217 
218     ap_assert(b->length <= (io->ssize - io->slen));
219     if (APR_BUCKET_IS_FILE(b)) {
220         apr_bucket_file *f = (apr_bucket_file *)b->data;
221         apr_file_t *fd = f->fd;
222         apr_off_t offset = b->start;
223 
224         len = b->length;
225         /* file buckets will read 8000 byte chunks and split
226          * themselves. However, we do know *exactly* how many
227          * bytes we need where. So we read the file directly to
228          * where we need it.
229          */
230         status = apr_file_seek(fd, APR_SET, &offset);
231         if (status != APR_SUCCESS) {
232             return status;
233         }
234         status = apr_file_read(fd, io->scratch + io->slen, &len);
235         if (status != APR_SUCCESS && status != APR_EOF) {
236             return status;
237         }
238         io->slen += len;
239     }
240     else if (APR_BUCKET_IS_MMAP(b)) {
241         ap_log_cerror(APLOG_MARK, APLOG_INFO, 0, io->session->c1,
242                       "h2_c1_io(%ld): seeing mmap bucket of size %ld, scratch remain=%ld",
243                       io->session->c1->id, (long)b->length, (long)(io->ssize - io->slen));
244         status = apr_bucket_read(b, &data, &len, APR_BLOCK_READ);
245         if (status == APR_SUCCESS) {
246             memcpy(io->scratch+io->slen, data, len);
247             io->slen += len;
248         }
249     }
250     else {
251         status = apr_bucket_read(b, &data, &len, APR_BLOCK_READ);
252         if (status == APR_SUCCESS) {
253             memcpy(io->scratch+io->slen, data, len);
254             io->slen += len;
255         }
256     }
257     return status;
258 }
259 
pass_output(h2_c1_io * io,int flush)260 static apr_status_t pass_output(h2_c1_io *io, int flush)
261 {
262     conn_rec *c = io->session->c1;
263     apr_off_t bblen;
264     apr_status_t rv;
265 
266     append_scratch(io);
267     if (flush) {
268         if (!APR_BUCKET_IS_FLUSH(APR_BRIGADE_LAST(io->output))) {
269             apr_bucket *b = apr_bucket_flush_create(c->bucket_alloc);
270             APR_BRIGADE_INSERT_TAIL(io->output, b);
271         }
272     }
273     if (APR_BRIGADE_EMPTY(io->output)) {
274         return APR_SUCCESS;
275     }
276 
277     io->unflushed = !APR_BUCKET_IS_FLUSH(APR_BRIGADE_LAST(io->output));
278     apr_brigade_length(io->output, 0, &bblen);
279     C1_IO_BB_LOG(c, 0, APLOG_TRACE2, "out", io->output);
280 
281     rv = ap_pass_brigade(c->output_filters, io->output);
282     if (APR_SUCCESS != rv) goto cleanup;
283 
284     io->buffered_len = 0;
285     io->bytes_written += (apr_size_t)bblen;
286 
287     if (io->write_size < WRITE_SIZE_MAX
288          && io->bytes_written >= io->warmup_size) {
289         /* connection is hot, use max size */
290         io->write_size = WRITE_SIZE_MAX;
291     }
292     else if (io->cooldown_usecs > 0
293              && io->write_size > WRITE_SIZE_INITIAL) {
294         apr_time_t now = apr_time_now();
295         if ((now - io->last_write) >= io->cooldown_usecs) {
296             /* long time not written, reset write size */
297             io->write_size = WRITE_SIZE_INITIAL;
298             io->bytes_written = 0;
299         }
300         else {
301             io->last_write = now;
302         }
303     }
304 
305 cleanup:
306     if (APR_SUCCESS != rv) {
307         ap_log_cerror(APLOG_MARK, APLOG_DEBUG, rv, c, APLOGNO(03044)
308                       "h2_c1_io(%ld): pass_out brigade %ld bytes",
309                       c->id, (long)bblen);
310     }
311     apr_brigade_cleanup(io->output);
312     return rv;
313 }
314 
h2_c1_io_needs_flush(h2_c1_io * io)315 int h2_c1_io_needs_flush(h2_c1_io *io)
316 {
317     return io->buffered_len >= io->flush_threshold;
318 }
319 
h2_c1_io_pending(h2_c1_io * io)320 int h2_c1_io_pending(h2_c1_io *io)
321 {
322     return !APR_BRIGADE_EMPTY(io->output) || (io->scratch && io->slen > 0);
323 }
324 
h2_c1_io_pass(h2_c1_io * io)325 apr_status_t h2_c1_io_pass(h2_c1_io *io)
326 {
327     apr_status_t rv = APR_SUCCESS;
328 
329     if (h2_c1_io_pending(io)) {
330         rv = pass_output(io, 0);
331     }
332     return rv;
333 }
334 
h2_c1_io_assure_flushed(h2_c1_io * io)335 apr_status_t h2_c1_io_assure_flushed(h2_c1_io *io)
336 {
337     apr_status_t rv = APR_SUCCESS;
338 
339     if (h2_c1_io_pending(io) || io->unflushed) {
340         rv = pass_output(io, 1);
341         if (APR_SUCCESS != rv) goto cleanup;
342     }
343 cleanup:
344     return rv;
345 }
346 
h2_c1_io_add_data(h2_c1_io * io,const char * data,size_t length)347 apr_status_t h2_c1_io_add_data(h2_c1_io *io, const char *data, size_t length)
348 {
349     apr_status_t status = APR_SUCCESS;
350     apr_size_t remain;
351 
352     ap_log_cerror(APLOG_MARK, APLOG_TRACE2, 0, io->session->c1,
353                   "h2_c1_io(%ld): adding %ld data bytes",
354                   io->session->c1->id, (long)length);
355     if (io->buffer_output) {
356         while (length > 0) {
357             remain = assure_scratch_space(io);
358             if (remain >= length) {
359                 memcpy(io->scratch + io->slen, data, length);
360                 io->slen += length;
361                 length = 0;
362             }
363             else {
364                 memcpy(io->scratch + io->slen, data, remain);
365                 io->slen += remain;
366                 data += remain;
367                 length -= remain;
368             }
369         }
370     }
371     else {
372         status = apr_brigade_write(io->output, NULL, NULL, data, length);
373         io->buffered_len += length;
374     }
375     return status;
376 }
377 
h2_c1_io_append(h2_c1_io * io,apr_bucket_brigade * bb)378 apr_status_t h2_c1_io_append(h2_c1_io *io, apr_bucket_brigade *bb)
379 {
380     apr_bucket *b;
381     apr_status_t rv = APR_SUCCESS;
382 
383     while (!APR_BRIGADE_EMPTY(bb)) {
384         b = APR_BRIGADE_FIRST(bb);
385         if (APR_BUCKET_IS_METADATA(b) || APR_BUCKET_IS_MMAP(b)) {
386             /* need to finish any open scratch bucket, as meta data
387              * needs to be forward "in order". */
388             append_scratch(io);
389             APR_BUCKET_REMOVE(b);
390             APR_BRIGADE_INSERT_TAIL(io->output, b);
391         }
392         else if (io->buffer_output) {
393             apr_size_t remain = assure_scratch_space(io);
394             if (b->length > remain) {
395                 apr_bucket_split(b, remain);
396                 if (io->slen == 0) {
397                     /* complete write_size bucket, append unchanged */
398                     APR_BUCKET_REMOVE(b);
399                     APR_BRIGADE_INSERT_TAIL(io->output, b);
400                     io->buffered_len += b->length;
401                     continue;
402                 }
403             }
404             else {
405                 /* bucket fits in remain, copy to scratch */
406                 rv = read_to_scratch(io, b);
407                 apr_bucket_delete(b);
408                 if (APR_SUCCESS != rv) goto cleanup;
409                 continue;
410             }
411         }
412         else {
413             /* no buffering, forward buckets setaside on flush */
414             apr_bucket_setaside(b, io->session->c1->pool);
415             APR_BUCKET_REMOVE(b);
416             APR_BRIGADE_INSERT_TAIL(io->output, b);
417             io->buffered_len += b->length;
418         }
419     }
420 cleanup:
421     return rv;
422 }
423 
c1_in_feed_bucket(h2_session * session,apr_bucket * b,apr_ssize_t * inout_len)424 static apr_status_t c1_in_feed_bucket(h2_session *session,
425                                       apr_bucket *b, apr_ssize_t *inout_len)
426 {
427     apr_status_t rv = APR_SUCCESS;
428     apr_size_t len;
429     const char *data;
430     ssize_t n;
431 
432     rv = apr_bucket_read(b, &data, &len, APR_BLOCK_READ);
433     while (APR_SUCCESS == rv && len > 0) {
434         n = nghttp2_session_mem_recv(session->ngh2, (const uint8_t *)data, len);
435 
436         ap_log_cerror(APLOG_MARK, APLOG_TRACE4, 0, session->c1,
437                       H2_SSSN_MSG(session, "fed %ld bytes to nghttp2, %ld read"),
438                       (long)len, (long)n);
439         if (n < 0) {
440             if (nghttp2_is_fatal((int)n)) {
441                 h2_session_event(session, H2_SESSION_EV_PROTO_ERROR,
442                                  (int)n, nghttp2_strerror((int)n));
443                 rv = APR_EGENERAL;
444             }
445         }
446         else {
447             *inout_len += n;
448             if ((apr_ssize_t)len <= n) {
449                 break;
450             }
451             len -= (apr_size_t)n;
452             data += n;
453         }
454     }
455 
456     return rv;
457 }
458 
c1_in_feed_brigade(h2_session * session,apr_bucket_brigade * bb,apr_ssize_t * inout_len)459 static apr_status_t c1_in_feed_brigade(h2_session *session,
460                                        apr_bucket_brigade *bb,
461                                        apr_ssize_t *inout_len)
462 {
463     apr_status_t rv = APR_SUCCESS;
464     apr_bucket* b;
465 
466     *inout_len = 0;
467     while (!APR_BRIGADE_EMPTY(bb)) {
468         b = APR_BRIGADE_FIRST(bb);
469         if (!APR_BUCKET_IS_METADATA(b)) {
470             rv = c1_in_feed_bucket(session, b, inout_len);
471             if (APR_SUCCESS != rv) goto cleanup;
472         }
473         apr_bucket_delete(b);
474     }
475 cleanup:
476     apr_brigade_cleanup(bb);
477     return rv;
478 }
479 
read_and_feed(h2_session * session)480 static apr_status_t read_and_feed(h2_session *session)
481 {
482     apr_ssize_t bytes_fed, bytes_requested;
483     apr_status_t rv;
484 
485     bytes_requested = H2MAX(APR_BUCKET_BUFF_SIZE, session->max_stream_mem * 4);
486     rv = ap_get_brigade(session->c1->input_filters,
487                         session->bbtmp, AP_MODE_READBYTES,
488                         APR_NONBLOCK_READ, bytes_requested);
489 
490     if (APR_SUCCESS == rv) {
491         h2_util_bb_log(session->c1, session->id, APLOG_TRACE2, "c1 in", session->bbtmp);
492         rv = c1_in_feed_brigade(session, session->bbtmp, &bytes_fed);
493         session->io.bytes_read += bytes_fed;
494     }
495     return rv;
496 }
497 
h2_c1_read(h2_session * session)498 apr_status_t h2_c1_read(h2_session *session)
499 {
500     apr_status_t rv;
501 
502     /* H2_IN filter handles all incoming data against the session.
503      * We just pull at the filter chain to make it happen */
504     ap_log_cerror(APLOG_MARK, APLOG_TRACE2, 0, session->c1,
505                   H2_SSSN_MSG(session, "session_read start"));
506     rv = read_and_feed(session);
507 
508     if (APR_SUCCESS == rv) {
509         h2_session_dispatch_event(session, H2_SESSION_EV_INPUT_PENDING, 0, NULL);
510     }
511     else if (APR_STATUS_IS_EAGAIN(rv)) {
512         /* Signal that we have exhausted the input momentarily.
513          * This might switch to polling the socket */
514         h2_session_dispatch_event(session, H2_SESSION_EV_INPUT_EXHAUSTED, 0, NULL);
515     }
516     else if (APR_SUCCESS != rv) {
517         if (APR_STATUS_IS_ETIMEDOUT(rv)
518             || APR_STATUS_IS_ECONNABORTED(rv)
519             || APR_STATUS_IS_ECONNRESET(rv)
520             || APR_STATUS_IS_EOF(rv)
521             || APR_STATUS_IS_EBADF(rv)) {
522             /* common status for a client that has left */
523             ap_log_cerror(APLOG_MARK, APLOG_TRACE2, rv, session->c1,
524                           H2_SSSN_MSG(session, "input gone"));
525         }
526         else {
527             /* uncommon status, log on INFO so that we see this */
528             ap_log_cerror( APLOG_MARK, APLOG_DEBUG, rv, session->c1,
529                           H2_SSSN_LOG(APLOGNO(02950), session,
530                           "error reading, terminating"));
531         }
532         h2_session_dispatch_event(session, H2_SESSION_EV_CONN_ERROR, 0, NULL);
533     }
534 
535     apr_brigade_cleanup(session->bbtmp);
536     ap_log_cerror(APLOG_MARK, APLOG_TRACE2, rv, session->c1,
537                   H2_SSSN_MSG(session, "session_read done"));
538     return rv;
539 }
540