1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <assert.h>
18 #include <apr_strings.h>
19 #include <ap_mpm.h>
20 #include <mpm_common.h>
21
22 #include <httpd.h>
23 #include <http_core.h>
24 #include <http_log.h>
25 #include <http_connection.h>
26 #include <http_protocol.h>
27 #include <http_request.h>
28 #include <http_ssl.h>
29
30 #include "h2_private.h"
31 #include "h2_bucket_eos.h"
32 #include "h2_config.h"
33 #include "h2_c1.h"
34 #include "h2_c1_io.h"
35 #include "h2_protocol.h"
36 #include "h2_session.h"
37 #include "h2_util.h"
38
39 #define TLS_DATA_MAX (16*1024)
40
41 /* Calculated like this: assuming MTU 1500 bytes
42 * 1500 - 40 (IP) - 20 (TCP) - 40 (TCP options)
43 * - TLS overhead (60-100)
44 * ~= 1300 bytes */
45 #define WRITE_SIZE_INITIAL 1300
46
47 /* The maximum we'd like to write in one chunk is
48 * the max size of a TLS record. When pushing
49 * many frames down the h2 connection, this might
50 * align differently because of headers and other
51 * frames or simply as not sufficient data is
52 * in a response body.
53 * However keeping frames at or below this limit
54 * should make optimizations at the layer that writes
55 * to TLS easier.
56 */
57 #define WRITE_SIZE_MAX (TLS_DATA_MAX)
58
59 #define BUF_REMAIN ((apr_size_t)(bmax-off))
60
h2_c1_io_bb_log(conn_rec * c,int stream_id,int level,const char * tag,apr_bucket_brigade * bb)61 static void h2_c1_io_bb_log(conn_rec *c, int stream_id, int level,
62 const char *tag, apr_bucket_brigade *bb)
63 {
64 char buffer[16 * 1024];
65 const char *line = "(null)";
66 int bmax = sizeof(buffer)/sizeof(buffer[0]);
67 int off = 0;
68 apr_bucket *b;
69
70 (void)stream_id;
71 if (bb) {
72 memset(buffer, 0, bmax--);
73 for (b = APR_BRIGADE_FIRST(bb);
74 bmax && (b != APR_BRIGADE_SENTINEL(bb));
75 b = APR_BUCKET_NEXT(b)) {
76
77 if (APR_BUCKET_IS_METADATA(b)) {
78 if (APR_BUCKET_IS_EOS(b)) {
79 off += apr_snprintf(buffer+off, BUF_REMAIN, "eos ");
80 }
81 else if (APR_BUCKET_IS_FLUSH(b)) {
82 off += apr_snprintf(buffer+off, BUF_REMAIN, "flush ");
83 }
84 else if (AP_BUCKET_IS_EOR(b)) {
85 off += apr_snprintf(buffer+off, BUF_REMAIN, "eor ");
86 }
87 else if (H2_BUCKET_IS_H2EOS(b)) {
88 off += apr_snprintf(buffer+off, BUF_REMAIN, "h2eos ");
89 }
90 else {
91 off += apr_snprintf(buffer+off, BUF_REMAIN, "meta(unknown) ");
92 }
93 }
94 else {
95 const char *btype = "data";
96 if (APR_BUCKET_IS_FILE(b)) {
97 btype = "file";
98 }
99 else if (APR_BUCKET_IS_PIPE(b)) {
100 btype = "pipe";
101 }
102 else if (APR_BUCKET_IS_SOCKET(b)) {
103 btype = "socket";
104 }
105 else if (APR_BUCKET_IS_HEAP(b)) {
106 btype = "heap";
107 }
108 else if (APR_BUCKET_IS_TRANSIENT(b)) {
109 btype = "transient";
110 }
111 else if (APR_BUCKET_IS_IMMORTAL(b)) {
112 btype = "immortal";
113 }
114 #if APR_HAS_MMAP
115 else if (APR_BUCKET_IS_MMAP(b)) {
116 btype = "mmap";
117 }
118 #endif
119 else if (APR_BUCKET_IS_POOL(b)) {
120 btype = "pool";
121 }
122
123 off += apr_snprintf(buffer+off, BUF_REMAIN, "%s[%ld] ",
124 btype,
125 (long)(b->length == ((apr_size_t)-1)? -1UL : b->length));
126 }
127 }
128 line = *buffer? buffer : "(empty)";
129 }
130 /* Intentional no APLOGNO */
131 ap_log_cerror(APLOG_MARK, level, 0, c, "h2_session(%ld)-%s: %s",
132 c->id, tag, line);
133
134 }
135 #define C1_IO_BB_LOG(c, stream_id, level, tag, bb) \
136 if (APLOG_C_IS_LEVEL(c, level)) { \
137 h2_c1_io_bb_log((c), (stream_id), (level), (tag), (bb)); \
138 }
139
140
h2_c1_io_init(h2_c1_io * io,h2_session * session)141 apr_status_t h2_c1_io_init(h2_c1_io *io, h2_session *session)
142 {
143 conn_rec *c = session->c1;
144
145 io->session = session;
146 io->output = apr_brigade_create(c->pool, c->bucket_alloc);
147 io->is_tls = ap_ssl_conn_is_ssl(session->c1);
148 io->buffer_output = io->is_tls;
149 io->flush_threshold = 4 * (apr_size_t)h2_config_sgeti64(session->s, H2_CONF_STREAM_MAX_MEM);
150
151 if (io->buffer_output) {
152 /* This is what we start with,
153 * see https://issues.apache.org/jira/browse/TS-2503
154 */
155 io->warmup_size = h2_config_sgeti64(session->s, H2_CONF_TLS_WARMUP_SIZE);
156 io->cooldown_usecs = (h2_config_sgeti(session->s, H2_CONF_TLS_COOLDOWN_SECS)
157 * APR_USEC_PER_SEC);
158 io->cooldown_usecs = 0;
159 io->write_size = (io->cooldown_usecs > 0?
160 WRITE_SIZE_INITIAL : WRITE_SIZE_MAX);
161 }
162 else {
163 io->warmup_size = 0;
164 io->cooldown_usecs = 0;
165 io->write_size = 0;
166 }
167
168 if (APLOGctrace1(c)) {
169 ap_log_cerror(APLOG_MARK, APLOG_TRACE4, 0, c,
170 "h2_c1_io(%ld): init, buffering=%d, warmup_size=%ld, "
171 "cd_secs=%f", c->id, io->buffer_output,
172 (long)io->warmup_size,
173 ((double)io->cooldown_usecs/APR_USEC_PER_SEC));
174 }
175
176 return APR_SUCCESS;
177 }
178
append_scratch(h2_c1_io * io)179 static void append_scratch(h2_c1_io *io)
180 {
181 if (io->scratch && io->slen > 0) {
182 apr_bucket *b = apr_bucket_heap_create(io->scratch, io->slen,
183 apr_bucket_free,
184 io->session->c1->bucket_alloc);
185 APR_BRIGADE_INSERT_TAIL(io->output, b);
186 io->buffered_len += io->slen;
187 io->scratch = NULL;
188 io->slen = io->ssize = 0;
189 }
190 }
191
assure_scratch_space(h2_c1_io * io)192 static apr_size_t assure_scratch_space(h2_c1_io *io) {
193 apr_size_t remain = io->ssize - io->slen;
194 if (io->scratch && remain == 0) {
195 append_scratch(io);
196 }
197 if (!io->scratch) {
198 /* we control the size and it is larger than what buckets usually
199 * allocate. */
200 io->scratch = apr_bucket_alloc(io->write_size, io->session->c1->bucket_alloc);
201 io->ssize = io->write_size;
202 io->slen = 0;
203 remain = io->ssize;
204 }
205 return remain;
206 }
207
read_to_scratch(h2_c1_io * io,apr_bucket * b)208 static apr_status_t read_to_scratch(h2_c1_io *io, apr_bucket *b)
209 {
210 apr_status_t status;
211 const char *data;
212 apr_size_t len;
213
214 if (!b->length) {
215 return APR_SUCCESS;
216 }
217
218 ap_assert(b->length <= (io->ssize - io->slen));
219 if (APR_BUCKET_IS_FILE(b)) {
220 apr_bucket_file *f = (apr_bucket_file *)b->data;
221 apr_file_t *fd = f->fd;
222 apr_off_t offset = b->start;
223
224 len = b->length;
225 /* file buckets will read 8000 byte chunks and split
226 * themselves. However, we do know *exactly* how many
227 * bytes we need where. So we read the file directly to
228 * where we need it.
229 */
230 status = apr_file_seek(fd, APR_SET, &offset);
231 if (status != APR_SUCCESS) {
232 return status;
233 }
234 status = apr_file_read(fd, io->scratch + io->slen, &len);
235 if (status != APR_SUCCESS && status != APR_EOF) {
236 return status;
237 }
238 io->slen += len;
239 }
240 else if (APR_BUCKET_IS_MMAP(b)) {
241 ap_log_cerror(APLOG_MARK, APLOG_INFO, 0, io->session->c1,
242 "h2_c1_io(%ld): seeing mmap bucket of size %ld, scratch remain=%ld",
243 io->session->c1->id, (long)b->length, (long)(io->ssize - io->slen));
244 status = apr_bucket_read(b, &data, &len, APR_BLOCK_READ);
245 if (status == APR_SUCCESS) {
246 memcpy(io->scratch+io->slen, data, len);
247 io->slen += len;
248 }
249 }
250 else {
251 status = apr_bucket_read(b, &data, &len, APR_BLOCK_READ);
252 if (status == APR_SUCCESS) {
253 memcpy(io->scratch+io->slen, data, len);
254 io->slen += len;
255 }
256 }
257 return status;
258 }
259
pass_output(h2_c1_io * io,int flush)260 static apr_status_t pass_output(h2_c1_io *io, int flush)
261 {
262 conn_rec *c = io->session->c1;
263 apr_off_t bblen;
264 apr_status_t rv;
265
266 append_scratch(io);
267 if (flush) {
268 if (!APR_BUCKET_IS_FLUSH(APR_BRIGADE_LAST(io->output))) {
269 apr_bucket *b = apr_bucket_flush_create(c->bucket_alloc);
270 APR_BRIGADE_INSERT_TAIL(io->output, b);
271 }
272 }
273 if (APR_BRIGADE_EMPTY(io->output)) {
274 return APR_SUCCESS;
275 }
276
277 io->unflushed = !APR_BUCKET_IS_FLUSH(APR_BRIGADE_LAST(io->output));
278 apr_brigade_length(io->output, 0, &bblen);
279 C1_IO_BB_LOG(c, 0, APLOG_TRACE2, "out", io->output);
280
281 rv = ap_pass_brigade(c->output_filters, io->output);
282 if (APR_SUCCESS != rv) goto cleanup;
283
284 io->buffered_len = 0;
285 io->bytes_written += (apr_size_t)bblen;
286
287 if (io->write_size < WRITE_SIZE_MAX
288 && io->bytes_written >= io->warmup_size) {
289 /* connection is hot, use max size */
290 io->write_size = WRITE_SIZE_MAX;
291 }
292 else if (io->cooldown_usecs > 0
293 && io->write_size > WRITE_SIZE_INITIAL) {
294 apr_time_t now = apr_time_now();
295 if ((now - io->last_write) >= io->cooldown_usecs) {
296 /* long time not written, reset write size */
297 io->write_size = WRITE_SIZE_INITIAL;
298 io->bytes_written = 0;
299 }
300 else {
301 io->last_write = now;
302 }
303 }
304
305 cleanup:
306 if (APR_SUCCESS != rv) {
307 ap_log_cerror(APLOG_MARK, APLOG_DEBUG, rv, c, APLOGNO(03044)
308 "h2_c1_io(%ld): pass_out brigade %ld bytes",
309 c->id, (long)bblen);
310 }
311 apr_brigade_cleanup(io->output);
312 return rv;
313 }
314
h2_c1_io_needs_flush(h2_c1_io * io)315 int h2_c1_io_needs_flush(h2_c1_io *io)
316 {
317 return io->buffered_len >= io->flush_threshold;
318 }
319
h2_c1_io_pending(h2_c1_io * io)320 int h2_c1_io_pending(h2_c1_io *io)
321 {
322 return !APR_BRIGADE_EMPTY(io->output) || (io->scratch && io->slen > 0);
323 }
324
h2_c1_io_pass(h2_c1_io * io)325 apr_status_t h2_c1_io_pass(h2_c1_io *io)
326 {
327 apr_status_t rv = APR_SUCCESS;
328
329 if (h2_c1_io_pending(io)) {
330 rv = pass_output(io, 0);
331 }
332 return rv;
333 }
334
h2_c1_io_assure_flushed(h2_c1_io * io)335 apr_status_t h2_c1_io_assure_flushed(h2_c1_io *io)
336 {
337 apr_status_t rv = APR_SUCCESS;
338
339 if (h2_c1_io_pending(io) || io->unflushed) {
340 rv = pass_output(io, 1);
341 if (APR_SUCCESS != rv) goto cleanup;
342 }
343 cleanup:
344 return rv;
345 }
346
h2_c1_io_add_data(h2_c1_io * io,const char * data,size_t length)347 apr_status_t h2_c1_io_add_data(h2_c1_io *io, const char *data, size_t length)
348 {
349 apr_status_t status = APR_SUCCESS;
350 apr_size_t remain;
351
352 ap_log_cerror(APLOG_MARK, APLOG_TRACE2, 0, io->session->c1,
353 "h2_c1_io(%ld): adding %ld data bytes",
354 io->session->c1->id, (long)length);
355 if (io->buffer_output) {
356 while (length > 0) {
357 remain = assure_scratch_space(io);
358 if (remain >= length) {
359 memcpy(io->scratch + io->slen, data, length);
360 io->slen += length;
361 length = 0;
362 }
363 else {
364 memcpy(io->scratch + io->slen, data, remain);
365 io->slen += remain;
366 data += remain;
367 length -= remain;
368 }
369 }
370 }
371 else {
372 status = apr_brigade_write(io->output, NULL, NULL, data, length);
373 io->buffered_len += length;
374 }
375 return status;
376 }
377
h2_c1_io_append(h2_c1_io * io,apr_bucket_brigade * bb)378 apr_status_t h2_c1_io_append(h2_c1_io *io, apr_bucket_brigade *bb)
379 {
380 apr_bucket *b;
381 apr_status_t rv = APR_SUCCESS;
382
383 while (!APR_BRIGADE_EMPTY(bb)) {
384 b = APR_BRIGADE_FIRST(bb);
385 if (APR_BUCKET_IS_METADATA(b) || APR_BUCKET_IS_MMAP(b)) {
386 /* need to finish any open scratch bucket, as meta data
387 * needs to be forward "in order". */
388 append_scratch(io);
389 APR_BUCKET_REMOVE(b);
390 APR_BRIGADE_INSERT_TAIL(io->output, b);
391 }
392 else if (io->buffer_output) {
393 apr_size_t remain = assure_scratch_space(io);
394 if (b->length > remain) {
395 apr_bucket_split(b, remain);
396 if (io->slen == 0) {
397 /* complete write_size bucket, append unchanged */
398 APR_BUCKET_REMOVE(b);
399 APR_BRIGADE_INSERT_TAIL(io->output, b);
400 io->buffered_len += b->length;
401 continue;
402 }
403 }
404 else {
405 /* bucket fits in remain, copy to scratch */
406 rv = read_to_scratch(io, b);
407 apr_bucket_delete(b);
408 if (APR_SUCCESS != rv) goto cleanup;
409 continue;
410 }
411 }
412 else {
413 /* no buffering, forward buckets setaside on flush */
414 apr_bucket_setaside(b, io->session->c1->pool);
415 APR_BUCKET_REMOVE(b);
416 APR_BRIGADE_INSERT_TAIL(io->output, b);
417 io->buffered_len += b->length;
418 }
419 }
420 cleanup:
421 return rv;
422 }
423
c1_in_feed_bucket(h2_session * session,apr_bucket * b,apr_ssize_t * inout_len)424 static apr_status_t c1_in_feed_bucket(h2_session *session,
425 apr_bucket *b, apr_ssize_t *inout_len)
426 {
427 apr_status_t rv = APR_SUCCESS;
428 apr_size_t len;
429 const char *data;
430 ssize_t n;
431
432 rv = apr_bucket_read(b, &data, &len, APR_BLOCK_READ);
433 while (APR_SUCCESS == rv && len > 0) {
434 n = nghttp2_session_mem_recv(session->ngh2, (const uint8_t *)data, len);
435
436 ap_log_cerror(APLOG_MARK, APLOG_TRACE4, 0, session->c1,
437 H2_SSSN_MSG(session, "fed %ld bytes to nghttp2, %ld read"),
438 (long)len, (long)n);
439 if (n < 0) {
440 if (nghttp2_is_fatal((int)n)) {
441 h2_session_event(session, H2_SESSION_EV_PROTO_ERROR,
442 (int)n, nghttp2_strerror((int)n));
443 rv = APR_EGENERAL;
444 }
445 }
446 else {
447 *inout_len += n;
448 if ((apr_ssize_t)len <= n) {
449 break;
450 }
451 len -= (apr_size_t)n;
452 data += n;
453 }
454 }
455
456 return rv;
457 }
458
c1_in_feed_brigade(h2_session * session,apr_bucket_brigade * bb,apr_ssize_t * inout_len)459 static apr_status_t c1_in_feed_brigade(h2_session *session,
460 apr_bucket_brigade *bb,
461 apr_ssize_t *inout_len)
462 {
463 apr_status_t rv = APR_SUCCESS;
464 apr_bucket* b;
465
466 *inout_len = 0;
467 while (!APR_BRIGADE_EMPTY(bb)) {
468 b = APR_BRIGADE_FIRST(bb);
469 if (!APR_BUCKET_IS_METADATA(b)) {
470 rv = c1_in_feed_bucket(session, b, inout_len);
471 if (APR_SUCCESS != rv) goto cleanup;
472 }
473 apr_bucket_delete(b);
474 }
475 cleanup:
476 apr_brigade_cleanup(bb);
477 return rv;
478 }
479
read_and_feed(h2_session * session)480 static apr_status_t read_and_feed(h2_session *session)
481 {
482 apr_ssize_t bytes_fed, bytes_requested;
483 apr_status_t rv;
484
485 bytes_requested = H2MAX(APR_BUCKET_BUFF_SIZE, session->max_stream_mem * 4);
486 rv = ap_get_brigade(session->c1->input_filters,
487 session->bbtmp, AP_MODE_READBYTES,
488 APR_NONBLOCK_READ, bytes_requested);
489
490 if (APR_SUCCESS == rv) {
491 h2_util_bb_log(session->c1, session->id, APLOG_TRACE2, "c1 in", session->bbtmp);
492 rv = c1_in_feed_brigade(session, session->bbtmp, &bytes_fed);
493 session->io.bytes_read += bytes_fed;
494 }
495 return rv;
496 }
497
h2_c1_read(h2_session * session)498 apr_status_t h2_c1_read(h2_session *session)
499 {
500 apr_status_t rv;
501
502 /* H2_IN filter handles all incoming data against the session.
503 * We just pull at the filter chain to make it happen */
504 ap_log_cerror(APLOG_MARK, APLOG_TRACE2, 0, session->c1,
505 H2_SSSN_MSG(session, "session_read start"));
506 rv = read_and_feed(session);
507
508 if (APR_SUCCESS == rv) {
509 h2_session_dispatch_event(session, H2_SESSION_EV_INPUT_PENDING, 0, NULL);
510 }
511 else if (APR_STATUS_IS_EAGAIN(rv)) {
512 /* Signal that we have exhausted the input momentarily.
513 * This might switch to polling the socket */
514 h2_session_dispatch_event(session, H2_SESSION_EV_INPUT_EXHAUSTED, 0, NULL);
515 }
516 else if (APR_SUCCESS != rv) {
517 if (APR_STATUS_IS_ETIMEDOUT(rv)
518 || APR_STATUS_IS_ECONNABORTED(rv)
519 || APR_STATUS_IS_ECONNRESET(rv)
520 || APR_STATUS_IS_EOF(rv)
521 || APR_STATUS_IS_EBADF(rv)) {
522 /* common status for a client that has left */
523 ap_log_cerror(APLOG_MARK, APLOG_TRACE2, rv, session->c1,
524 H2_SSSN_MSG(session, "input gone"));
525 }
526 else {
527 /* uncommon status, log on INFO so that we see this */
528 ap_log_cerror( APLOG_MARK, APLOG_DEBUG, rv, session->c1,
529 H2_SSSN_LOG(APLOGNO(02950), session,
530 "error reading, terminating"));
531 }
532 h2_session_dispatch_event(session, H2_SESSION_EV_CONN_ERROR, 0, NULL);
533 }
534
535 apr_brigade_cleanup(session->bbtmp);
536 ap_log_cerror(APLOG_MARK, APLOG_TRACE2, rv, session->c1,
537 H2_SSSN_MSG(session, "session_read done"));
538 return rv;
539 }
540