1 #include "first.h"
2
3 /**
4 * the network chunk-API
5 *
6 *
7 */
8
9 #include "chunk.h"
10 #include "fdevent.h"
11 #include "log.h"
12
13 #include <sys/types.h>
14 #include <sys/stat.h>
15 #include "sys-mmap.h"
16
17 #include <stdlib.h>
18 #include <fcntl.h>
19 #include <unistd.h>
20
21 #include <errno.h>
22 #include <string.h>
23
24 /* default 1 MB */
25 #define DEFAULT_TEMPFILE_SIZE (1 * 1024 * 1024)
26
27 static size_t chunk_buf_sz = 8192;
28 static chunk *chunks, *chunks_oversized, *chunks_filechunk;
29 static chunk *chunk_buffers;
30 static int chunks_oversized_n;
31 static const array *chunkqueue_default_tempdirs = NULL;
32 static off_t chunkqueue_default_tempfile_size = DEFAULT_TEMPFILE_SIZE;
33
chunkqueue_set_chunk_size(size_t sz)34 void chunkqueue_set_chunk_size (size_t sz)
35 {
36 size_t x = 1024;
37 while (x < sz && x < (1u << 30)) x <<= 1;
38 chunk_buf_sz = sz > 0 ? x : 8192;
39 }
40
chunkqueue_set_tempdirs_default_reset(void)41 void chunkqueue_set_tempdirs_default_reset (void)
42 {
43 chunk_buf_sz = 8192;
44 chunkqueue_default_tempdirs = NULL;
45 chunkqueue_default_tempfile_size = DEFAULT_TEMPFILE_SIZE;
46 }
47
chunkqueue_init(chunkqueue * cq)48 chunkqueue *chunkqueue_init(chunkqueue *cq) {
49 /* (if caller passes non-NULL cq, it must be 0-init) */
50 if (NULL == cq) {
51 cq = calloc(1, sizeof(*cq));
52 force_assert(NULL != cq);
53 }
54
55 cq->first = NULL;
56 cq->last = NULL;
57
58 cq->tempdirs = chunkqueue_default_tempdirs;
59 cq->upload_temp_file_size = chunkqueue_default_tempfile_size;
60
61 return cq;
62 }
63
64 __attribute_returns_nonnull__
chunk_init(void)65 static chunk *chunk_init(void) {
66 chunk * const restrict c = calloc(1, sizeof(*c));
67 force_assert(NULL != c);
68
69 #if 0 /*(zeroed by calloc())*/
70 c->type = MEM_CHUNK;
71 c->next = NULL;
72 c->offset = 0;
73 c->file.length = 0;
74 c->file.mmap.length = c->file.mmap.offset = 0;
75 c->file.is_temp = 0;
76 #endif
77 c->file.fd = -1;
78 c->file.mmap.start = MAP_FAILED;
79
80 c->mem = buffer_init();
81 return c;
82 }
83
84 __attribute_returns_nonnull__
chunk_init_sz(size_t sz)85 static chunk *chunk_init_sz(size_t sz) {
86 chunk * const restrict c = chunk_init();
87 buffer_string_prepare_copy(c->mem, sz-1);
88 return c;
89 }
90
chunk_reset_file_chunk(chunk * c)91 static void chunk_reset_file_chunk(chunk *c) {
92 if (c->file.is_temp) {
93 c->file.is_temp = 0;
94 if (!buffer_is_blank(c->mem))
95 unlink(c->mem->ptr);
96 }
97 if (c->file.refchg) {
98 c->file.refchg(c->file.ref, -1);
99 c->file.refchg = 0; /* NULL fn ptr */
100 c->file.ref = NULL;
101 }
102 else if (c->file.fd != -1) {
103 close(c->file.fd);
104 }
105 if (MAP_FAILED != c->file.mmap.start) {
106 munmap(c->file.mmap.start, c->file.mmap.length);
107 c->file.mmap.start = MAP_FAILED;
108 c->file.mmap.length = c->file.mmap.offset = 0;
109 }
110 c->file.fd = -1;
111 c->file.length = 0;
112 c->type = MEM_CHUNK;
113 }
114
chunk_reset(chunk * c)115 static void chunk_reset(chunk *c) {
116 if (c->type == FILE_CHUNK) chunk_reset_file_chunk(c);
117
118 buffer_clear(c->mem);
119 c->offset = 0;
120 }
121
chunk_free(chunk * c)122 static void chunk_free(chunk *c) {
123 if (c->type == FILE_CHUNK) chunk_reset_file_chunk(c);
124 buffer_free(c->mem);
125 free(c);
126 }
127
chunk_pop_oversized(size_t sz)128 static chunk * chunk_pop_oversized(size_t sz) {
129 /* future: might have buckets of certain sizes, up to socket buf sizes */
130 if (chunks_oversized && chunks_oversized->mem->size >= sz) {
131 --chunks_oversized_n;
132 chunk *c = chunks_oversized;
133 chunks_oversized = c->next;
134 return c;
135 }
136 return NULL;
137 }
138
chunk_push_oversized(chunk * const c,const size_t sz)139 static void chunk_push_oversized(chunk * const c, const size_t sz) {
140 /* XXX: chunk_buffer_yield() may have removed need for list size limit */
141 if (chunks_oversized_n < 64 && chunk_buf_sz >= 4096) {
142 ++chunks_oversized_n;
143 chunk **co = &chunks_oversized;
144 while (*co && sz < (*co)->mem->size) co = &(*co)->next;
145 c->next = *co;
146 *co = c;
147 }
148 else {
149 buffer * const tb = chunks_oversized ? chunks_oversized->mem : NULL;
150 if (tb && tb->size < sz) {
151 /* swap larger mem block onto head of list; free smaller mem */
152 chunks_oversized->mem = c->mem;
153 c->mem = tb;
154 }
155 chunk_free(c);
156 }
157 }
158
159 __attribute_returns_nonnull__
chunk_buffer_acquire_sz(const size_t sz)160 static buffer * chunk_buffer_acquire_sz(const size_t sz) {
161 chunk *c;
162 buffer *b;
163 if (sz <= (chunk_buf_sz|1)) {
164 if (chunks) {
165 c = chunks;
166 chunks = c->next;
167 }
168 else
169 c = chunk_init_sz(chunk_buf_sz);
170 }
171 else {
172 c = chunk_pop_oversized(sz);
173 if (NULL == c) {
174 /*(round up to nearest chunk_buf_sz)*/
175 /* NB: round down power-2 + 1 to avoid excess allocation
176 * (sz & ~1uL) relies on buffer_realloc() adding +1 *and* on callers
177 * of this func never passing power-2 + 1 sz unless direct caller
178 * adds +1 for '\0', as is done in chunk_buffer_prepare_append() */
179 c = chunk_init_sz(((sz&~1uL)+(chunk_buf_sz-1)) & ~(chunk_buf_sz-1));
180 }
181 }
182 c->next = chunk_buffers;
183 chunk_buffers = c;
184 b = c->mem;
185 c->mem = NULL;
186 return b;
187 }
188
chunk_buffer_acquire(void)189 buffer * chunk_buffer_acquire(void) {
190 return chunk_buffer_acquire_sz(chunk_buf_sz);
191 }
192
chunk_buffer_release(buffer * b)193 void chunk_buffer_release(buffer *b) {
194 if (NULL == b) return;
195 if (chunk_buffers) {
196 chunk *c = chunk_buffers;
197 chunk_buffers = c->next;
198 c->mem = b;
199 buffer_clear(b);
200 if (b->size == (chunk_buf_sz|1)) {
201 c->next = chunks;
202 chunks = c;
203 }
204 else if (b->size > chunk_buf_sz)
205 chunk_push_oversized(c, b->size);
206 else
207 chunk_free(c);
208 }
209 else {
210 buffer_free(b);
211 }
212 }
213
chunk_buffer_yield(buffer * b)214 void chunk_buffer_yield(buffer *b) {
215 if (b->size == (chunk_buf_sz|1)) return;
216
217 buffer * const cb = chunk_buffer_acquire_sz(chunk_buf_sz);
218 buffer tb = *b;
219 *b = *cb;
220 *cb = tb;
221 chunk_buffer_release(cb);
222 }
223
chunk_buffer_prepare_append(buffer * const b,size_t sz)224 size_t chunk_buffer_prepare_append(buffer * const b, size_t sz) {
225 if (sz > buffer_string_space(b)) {
226 sz += b->used ? b->used : 1;
227 buffer * const cb = chunk_buffer_acquire_sz(sz);
228 /* swap buffer contents and copy original b->ptr into larger b->ptr */
229 /*(this does more than buffer_move())*/
230 buffer tb = *b;
231 *b = *cb;
232 *cb = tb;
233 if ((b->used = tb.used))
234 memcpy(b->ptr, tb.ptr, tb.used);
235 chunk_buffer_release(cb);
236 }
237 return buffer_string_space(b);
238 }
239
240 __attribute_returns_nonnull__
chunk_acquire(size_t sz)241 static chunk * chunk_acquire(size_t sz) {
242 if (sz <= (chunk_buf_sz|1)) {
243 if (chunks) {
244 chunk *c = chunks;
245 chunks = c->next;
246 return c;
247 }
248 sz = chunk_buf_sz;
249 }
250 else {
251 /*(round up to nearest chunk_buf_sz)*/
252 sz = (sz + (chunk_buf_sz-1)) & ~(chunk_buf_sz-1);
253 chunk *c = chunk_pop_oversized(sz);
254 if (c) return c;
255 }
256
257 return chunk_init_sz(sz);
258 }
259
chunk_release(chunk * c)260 static void chunk_release(chunk *c) {
261 const size_t sz = c->mem->size;
262 if (sz == (chunk_buf_sz|1)) {
263 chunk_reset(c);
264 c->next = chunks;
265 chunks = c;
266 }
267 else if (sz > chunk_buf_sz) {
268 chunk_reset(c);
269 chunk_push_oversized(c, sz);
270 }
271 else if (c->type == FILE_CHUNK) {
272 chunk_reset(c);
273 c->next = chunks_filechunk;
274 chunks_filechunk = c;
275 }
276 else {
277 chunk_free(c);
278 }
279 }
280
281 __attribute_returns_nonnull__
chunk_acquire_filechunk(void)282 static chunk * chunk_acquire_filechunk(void) {
283 if (chunks_filechunk) {
284 chunk *c = chunks_filechunk;
285 chunks_filechunk = c->next;
286 return c;
287 }
288 return chunk_init();
289 }
290
chunkqueue_chunk_pool_clear(void)291 void chunkqueue_chunk_pool_clear(void)
292 {
293 for (chunk *next, *c = chunks; c; c = next) {
294 next = c->next;
295 chunk_free(c);
296 }
297 chunks = NULL;
298 for (chunk *next, *c = chunks_oversized; c; c = next) {
299 next = c->next;
300 chunk_free(c);
301 }
302 chunks_oversized = NULL;
303 chunks_oversized_n = 0;
304 for (chunk *next, *c = chunks_filechunk; c; c = next) {
305 next = c->next;
306 chunk_free(c);
307 }
308 chunks_filechunk = NULL;
309 }
310
chunkqueue_chunk_pool_free(void)311 void chunkqueue_chunk_pool_free(void)
312 {
313 chunkqueue_chunk_pool_clear();
314 for (chunk *next, *c = chunk_buffers; c; c = next) {
315 next = c->next;
316 #if 1 /*(chunk_buffers contains MEM_CHUNK with (c->mem == NULL))*/
317 free(c);
318 #else /*(c->mem = buffer_init() is no longer necessary below)*/
319 c->mem = buffer_init(); /*(chunk_reset() expects c->mem != NULL)*/
320 chunk_free(c);
321 #endif
322 }
323 chunk_buffers = NULL;
324 }
325
326 __attribute_pure__
chunk_remaining_length(const chunk * c)327 static off_t chunk_remaining_length(const chunk *c) {
328 /* MEM_CHUNK or FILE_CHUNK */
329 return (c->type == MEM_CHUNK
330 ? (off_t)buffer_clen(c->mem)
331 : c->file.length)
332 - c->offset;
333 }
334
chunkqueue_release_chunks(chunkqueue * cq)335 static void chunkqueue_release_chunks(chunkqueue *cq) {
336 cq->last = NULL;
337 for (chunk *c; (c = cq->first); ) {
338 cq->first = c->next;
339 chunk_release(c);
340 }
341 }
342
chunkqueue_free(chunkqueue * cq)343 void chunkqueue_free(chunkqueue *cq) {
344 if (NULL == cq) return;
345 chunkqueue_release_chunks(cq);
346 free(cq);
347 }
348
chunkqueue_prepend_chunk(chunkqueue * const restrict cq,chunk * const restrict c)349 static void chunkqueue_prepend_chunk(chunkqueue * const restrict cq, chunk * const restrict c) {
350 if (NULL == (c->next = cq->first)) cq->last = c;
351 cq->first = c;
352 }
353
chunkqueue_append_chunk(chunkqueue * const restrict cq,chunk * const restrict c)354 static void chunkqueue_append_chunk(chunkqueue * const restrict cq, chunk * const restrict c) {
355 c->next = NULL;
356 *(cq->last ? &cq->last->next : &cq->first) = c;
357 cq->last = c;
358 }
359
360 __attribute_returns_nonnull__
chunkqueue_prepend_mem_chunk(chunkqueue * cq,size_t sz)361 static chunk * chunkqueue_prepend_mem_chunk(chunkqueue *cq, size_t sz) {
362 chunk *c = chunk_acquire(sz);
363 chunkqueue_prepend_chunk(cq, c);
364 return c;
365 }
366
367 __attribute_returns_nonnull__
chunkqueue_append_mem_chunk(chunkqueue * cq,size_t sz)368 static chunk * chunkqueue_append_mem_chunk(chunkqueue *cq, size_t sz) {
369 chunk *c = chunk_acquire(sz);
370 chunkqueue_append_chunk(cq, c);
371 return c;
372 }
373
__attribute_nonnull__()374 __attribute_nonnull__()
375 __attribute_returns_nonnull__
376 static chunk * chunkqueue_append_file_chunk(chunkqueue * const restrict cq, const buffer * const restrict fn, off_t offset, off_t len) {
377 chunk * const c = chunk_acquire_filechunk();
378 chunkqueue_append_chunk(cq, c);
379 c->type = FILE_CHUNK;
380 c->offset = offset;
381 c->file.length = offset + len;
382 cq->bytes_in += len;
383 buffer_copy_buffer(c->mem, fn);
384 return c;
385 }
386
chunkqueue_reset(chunkqueue * cq)387 void chunkqueue_reset(chunkqueue *cq) {
388 chunkqueue_release_chunks(cq);
389 cq->bytes_in = 0;
390 cq->bytes_out = 0;
391 cq->tempdir_idx = 0;
392 }
393
chunkqueue_append_file_fd(chunkqueue * const restrict cq,const buffer * const restrict fn,int fd,off_t offset,off_t len)394 void chunkqueue_append_file_fd(chunkqueue * const restrict cq, const buffer * const restrict fn, int fd, off_t offset, off_t len) {
395 if (len > 0) {
396 (chunkqueue_append_file_chunk(cq, fn, offset, len))->file.fd = fd;
397 }
398 else {
399 close(fd);
400 }
401 }
402
chunkqueue_append_file(chunkqueue * const restrict cq,const buffer * const restrict fn,off_t offset,off_t len)403 void chunkqueue_append_file(chunkqueue * const restrict cq, const buffer * const restrict fn, off_t offset, off_t len) {
404 if (len > 0) {
405 chunkqueue_append_file_chunk(cq, fn, offset, len);
406 }
407 }
408
409
chunkqueue_append_mem_extend_chunk(chunkqueue * const restrict cq,const char * const restrict mem,size_t len)410 static int chunkqueue_append_mem_extend_chunk(chunkqueue * const restrict cq, const char * const restrict mem, size_t len) {
411 chunk *c = cq->last;
412 if (0 == len) return 1;
413 if (c != NULL && c->type == MEM_CHUNK
414 && buffer_string_space(c->mem) >= len) {
415 buffer_append_string_len(c->mem, mem, len);
416 cq->bytes_in += len;
417 return 1;
418 }
419 return 0;
420 }
421
422
chunkqueue_append_buffer(chunkqueue * const restrict cq,buffer * const restrict mem)423 void chunkqueue_append_buffer(chunkqueue * const restrict cq, buffer * const restrict mem) {
424 chunk *c;
425 const size_t len = buffer_clen(mem);
426 if (len < 1024 && chunkqueue_append_mem_extend_chunk(cq, mem->ptr, len)) {
427 buffer_clear(mem);
428 return;
429 }
430
431 c = chunkqueue_append_mem_chunk(cq, chunk_buf_sz);
432 cq->bytes_in += len;
433 buffer_move(c->mem, mem);
434 }
435
436
chunkqueue_append_mem(chunkqueue * const restrict cq,const char * const restrict mem,size_t len)437 void chunkqueue_append_mem(chunkqueue * const restrict cq, const char * const restrict mem, size_t len) {
438 chunk *c;
439 if (len < chunk_buf_sz && chunkqueue_append_mem_extend_chunk(cq, mem, len))
440 return;
441
442 c = chunkqueue_append_mem_chunk(cq, len+1);
443 cq->bytes_in += len;
444 buffer_copy_string_len(c->mem, mem, len);
445 }
446
447
chunkqueue_append_mem_min(chunkqueue * const restrict cq,const char * const restrict mem,size_t len)448 void chunkqueue_append_mem_min(chunkqueue * const restrict cq, const char * const restrict mem, size_t len) {
449 chunk *c;
450 if (len < chunk_buf_sz && chunkqueue_append_mem_extend_chunk(cq, mem, len))
451 return;
452
453 c = chunk_init_sz(len+1);
454 chunkqueue_append_chunk(cq, c);
455 cq->bytes_in += len;
456 buffer_copy_string_len(c->mem, mem, len);
457 }
458
459
chunkqueue_append_chunkqueue(chunkqueue * const restrict cq,chunkqueue * const restrict src)460 void chunkqueue_append_chunkqueue(chunkqueue * const restrict cq, chunkqueue * const restrict src) {
461 if (NULL == src->first) return;
462
463 if (NULL == cq->first) {
464 cq->first = src->first;
465 } else {
466 cq->last->next = src->first;
467 }
468 cq->last = src->last;
469 cq->bytes_in += chunkqueue_length(src);
470
471 src->first = NULL;
472 src->last = NULL;
473 src->bytes_out = src->bytes_in;
474 }
475
476
chunkqueue_prepend_buffer_open_sz(chunkqueue * cq,size_t sz)477 buffer * chunkqueue_prepend_buffer_open_sz(chunkqueue *cq, size_t sz) {
478 chunk * const c = chunkqueue_prepend_mem_chunk(cq, sz);
479 return c->mem;
480 }
481
482
chunkqueue_prepend_buffer_open(chunkqueue * cq)483 buffer * chunkqueue_prepend_buffer_open(chunkqueue *cq) {
484 return chunkqueue_prepend_buffer_open_sz(cq, chunk_buf_sz);
485 }
486
487
chunkqueue_prepend_buffer_commit(chunkqueue * cq)488 void chunkqueue_prepend_buffer_commit(chunkqueue *cq) {
489 cq->bytes_in += buffer_clen(cq->first->mem);
490 }
491
492
chunkqueue_append_buffer_open_sz(chunkqueue * cq,size_t sz)493 buffer * chunkqueue_append_buffer_open_sz(chunkqueue *cq, size_t sz) {
494 chunk * const c = chunkqueue_append_mem_chunk(cq, sz);
495 return c->mem;
496 }
497
498
chunkqueue_append_buffer_open(chunkqueue * cq)499 buffer * chunkqueue_append_buffer_open(chunkqueue *cq) {
500 return chunkqueue_append_buffer_open_sz(cq, chunk_buf_sz);
501 }
502
503
chunkqueue_append_buffer_commit(chunkqueue * cq)504 void chunkqueue_append_buffer_commit(chunkqueue *cq) {
505 cq->bytes_in += buffer_clen(cq->last->mem);
506 }
507
508
chunkqueue_get_memory(chunkqueue * const restrict cq,size_t * const restrict len)509 char * chunkqueue_get_memory(chunkqueue * const restrict cq, size_t * const restrict len) {
510 size_t sz = *len ? *len : (chunk_buf_sz >> 1);
511 buffer *b;
512 chunk *c = cq->last;
513 if (NULL != c && MEM_CHUNK == c->type) {
514 /* return pointer into existing buffer if large enough */
515 size_t avail = buffer_string_space(c->mem);
516 if (avail >= sz) {
517 *len = avail;
518 b = c->mem;
519 return b->ptr + buffer_clen(b);
520 }
521 }
522
523 /* allocate new chunk */
524 b = chunkqueue_append_buffer_open_sz(cq, sz);
525 *len = buffer_string_space(b);
526 return b->ptr;
527 }
528
chunkqueue_use_memory(chunkqueue * const restrict cq,chunk * ckpt,size_t len)529 void chunkqueue_use_memory(chunkqueue * const restrict cq, chunk *ckpt, size_t len) {
530 buffer *b = cq->last->mem;
531
532 if (len > 0) {
533 buffer_commit(b, len);
534 cq->bytes_in += len;
535 if (cq->last == ckpt || NULL == ckpt || MEM_CHUNK != ckpt->type
536 || len > buffer_string_space(ckpt->mem)) return;
537
538 buffer_append_string_buffer(ckpt->mem, b);
539 }
540 else if (!buffer_is_blank(b)) { /*(cq->last == ckpt)*/
541 return; /* last chunk is not empty */
542 }
543
544 /* remove empty last chunk */
545 chunk_release(cq->last);
546 cq->last = ckpt;
547 *(ckpt ? &ckpt->next : &cq->first) = NULL;
548 }
549
chunkqueue_update_file(chunkqueue * const restrict cq,chunk * c,off_t len)550 void chunkqueue_update_file(chunkqueue * const restrict cq, chunk *c, off_t len) {
551 /*assert(c->type == FILE_CHUNK);*/
552 c->file.length += len;
553 cq->bytes_in += len;
554 if (0 == chunk_remaining_length(c))
555 chunkqueue_remove_empty_chunks(cq);
556 }
557
chunkqueue_set_tempdirs_default(const array * tempdirs,off_t upload_temp_file_size)558 void chunkqueue_set_tempdirs_default (const array *tempdirs, off_t upload_temp_file_size) {
559 if (upload_temp_file_size == 0)
560 upload_temp_file_size = DEFAULT_TEMPFILE_SIZE;
561 chunkqueue_default_tempdirs = tempdirs;
562 chunkqueue_default_tempfile_size = upload_temp_file_size;
563 }
564
chunkqueue_set_tempdirs(chunkqueue * const restrict cq,const array * const restrict tempdirs,off_t upload_temp_file_size)565 void chunkqueue_set_tempdirs(chunkqueue * const restrict cq, const array * const restrict tempdirs, off_t upload_temp_file_size) {
566 if (upload_temp_file_size == 0)
567 upload_temp_file_size = chunkqueue_default_tempfile_size;
568 cq->tempdirs = tempdirs;
569 cq->upload_temp_file_size = upload_temp_file_size;
570 cq->tempdir_idx = 0;
571 }
572
573 __attribute_noinline__
chunkqueue_dup_file_chunk_fd(chunk * const restrict d,const chunk * const restrict c)574 static void chunkqueue_dup_file_chunk_fd (chunk * const restrict d, const chunk * const restrict c) {
575 /*assert(d != c);*/
576 /*assert(d->type == FILE_CHUNK);*/
577 /*assert(c->type == FILE_CHUNK);*/
578 if (c->file.fd >= 0) {
579 if (c->file.refchg) {
580 d->file.fd = c->file.fd;
581 d->file.ref = c->file.ref;
582 d->file.refchg = c->file.refchg;
583 d->file.refchg(d->file.ref, 1);
584 }
585 else
586 d->file.fd = fdevent_dup_cloexec(c->file.fd);
587 }
588 }
589
590 __attribute_noinline__
chunkqueue_steal_partial_file_chunk(chunkqueue * const restrict dest,const chunk * const restrict c,const off_t len)591 static void chunkqueue_steal_partial_file_chunk(chunkqueue * const restrict dest, const chunk * const restrict c, const off_t len) {
592 chunkqueue_append_file(dest, c->mem, c->offset, len);
593 chunkqueue_dup_file_chunk_fd(dest->last, c);
594 }
595
chunkqueue_steal(chunkqueue * const restrict dest,chunkqueue * const restrict src,off_t len)596 void chunkqueue_steal(chunkqueue * const restrict dest, chunkqueue * const restrict src, off_t len) {
597 for (off_t clen; len > 0; len -= clen) {
598 chunk * const c = src->first;
599 if (__builtin_expect( (NULL == c), 0)) break;
600
601 clen = chunk_remaining_length(c);
602
603 if (len >= clen) {
604 /* move complete chunk */
605 src->first = c->next;
606 if (c == src->last) src->last = NULL;
607
608 if (__builtin_expect( (0 != clen), 1)) {
609 chunkqueue_append_chunk(dest, c);
610 dest->bytes_in += clen;
611 }
612 else /* drop empty chunk */
613 chunk_release(c);
614 } else {
615 /* copy partial chunk */
616
617 switch (c->type) {
618 case MEM_CHUNK:
619 chunkqueue_append_mem(dest, c->mem->ptr + c->offset, len);
620 break;
621 case FILE_CHUNK:
622 /* tempfile flag is in "last" chunk after the split */
623 chunkqueue_steal_partial_file_chunk(dest, c, len);
624 break;
625 }
626
627 c->offset += len;
628 clen = len;
629 }
630
631 src->bytes_out += clen;
632 }
633 }
634
chunkqueue_get_append_mkstemp(buffer * const b,const char * path,const uint32_t len)635 static int chunkqueue_get_append_mkstemp(buffer * const b, const char *path, const uint32_t len) {
636 buffer_copy_path_len2(b,path,len,CONST_STR_LEN("lighttpd-upload-XXXXXX"));
637 #if defined(HAVE_SPLICE) && defined(HAVE_PWRITE)
638 /*(splice() rejects O_APPEND target; omit flag if also using pwrite())*/
639 return fdevent_mkostemp(b->ptr, 0);
640 #else
641 return fdevent_mkostemp(b->ptr, O_APPEND);
642 #endif
643 }
644
chunkqueue_get_append_newtempfile(chunkqueue * const restrict cq,log_error_st * const restrict errh)645 static chunk *chunkqueue_get_append_newtempfile(chunkqueue * const restrict cq, log_error_st * const restrict errh) {
646 static const buffer emptyb = { "", 0, 0 };
647 chunk * const restrict last = cq->last;
648 chunk * const restrict c = chunkqueue_append_file_chunk(cq, &emptyb, 0, 0);
649 buffer * const restrict template = c->mem;
650 c->file.is_temp = 1;
651
652 if (cq->tempdirs && cq->tempdirs->used) {
653 /* we have several tempdirs, only if all of them fail we jump out */
654 for (errno = EIO; cq->tempdir_idx < cq->tempdirs->used; ++cq->tempdir_idx) {
655 data_string *ds = (data_string *)cq->tempdirs->data[cq->tempdir_idx];
656 c->file.fd =
657 chunkqueue_get_append_mkstemp(template, BUF_PTR_LEN(&ds->value));
658 if (-1 != c->file.fd) return c;
659 }
660 }
661 else {
662 c->file.fd =
663 chunkqueue_get_append_mkstemp(template, CONST_STR_LEN("/var/tmp"));
664 if (-1 != c->file.fd) return c;
665 }
666
667 /* (report only last error to mkstemp() even if multiple temp dirs tried) */
668 log_perror(errh, __FILE__, __LINE__,
669 "opening temp-file failed: %s", template->ptr);
670 /* remove (failed) final chunk */
671 c->file.is_temp = 0;
672 if ((cq->last = last))
673 last->next = NULL;
674 else
675 cq->first = NULL;
676 chunk_release(c);
677 return NULL;
678 }
679
chunkqueue_get_append_tempfile(chunkqueue * const restrict cq,log_error_st * const restrict errh)680 static chunk *chunkqueue_get_append_tempfile(chunkqueue * const restrict cq, log_error_st * const restrict errh) {
681 /*
682 * if the last chunk is
683 * - smaller than cq->upload_temp_file_size
684 * -> append to it (and it then might exceed cq->upload_temp_file_size)
685 * otherwise
686 * -> create a new chunk
687 */
688
689 chunk * const c = cq->last;
690 if (NULL != c && c->file.is_temp && c->file.fd >= 0) {
691
692 if (c->file.length < (off_t)cq->upload_temp_file_size)
693 return c; /* ok, take the last chunk for our job */
694
695 /* the chunk is too large now, close it */
696 force_assert(0 == c->file.refchg); /*(else should not happen)*/
697 int rc = close(c->file.fd);
698 c->file.fd = -1;
699 if (0 != rc) {
700 log_perror(errh, __FILE__, __LINE__,
701 "close() temp-file %s failed", c->mem->ptr);
702 return NULL;
703 }
704 }
705 return chunkqueue_get_append_newtempfile(cq, errh);
706 }
707
708 __attribute_cold__
chunkqueue_append_tempfile_err(chunkqueue * const cq,log_error_st * const restrict errh,chunk * const c)709 static int chunkqueue_append_tempfile_err(chunkqueue * const cq, log_error_st * const restrict errh, chunk * const c) {
710 const int errnum = errno;
711 if (errnum == EINTR) return 1; /* retry */
712
713 int retry = (errnum == ENOSPC && cq->tempdirs
714 && ++cq->tempdir_idx < cq->tempdirs->used);
715 if (!retry)
716 log_perror(errh, __FILE__, __LINE__,
717 "write() temp-file %s failed", c->mem->ptr);
718
719 if (0 == chunk_remaining_length(c)) {
720 /*(remove empty chunk and unlink tempfile)*/
721 chunkqueue_remove_empty_chunks(cq);
722 }
723 else {/*(close tempfile; avoid later attempts to append)*/
724 force_assert(0 == c->file.refchg); /*(else should not happen)*/
725 int rc = close(c->file.fd);
726 c->file.fd = -1;
727 if (0 != rc) {
728 log_perror(errh, __FILE__, __LINE__,
729 "close() temp-file %s failed", c->mem->ptr);
730 retry = 0;
731 }
732 }
733 return retry;
734 }
735
736 __attribute_cold__
737 __attribute_noinline__
chunkqueue_to_tempfiles(chunkqueue * const restrict dest,log_error_st * const restrict errh)738 static int chunkqueue_to_tempfiles(chunkqueue * const restrict dest, log_error_st * const restrict errh) {
739 /* transfer chunks from dest to src, adjust dest->bytes_in, and then call
740 * chunkqueue_steal_with_tempfiles() to write chunks from src back into
741 * dest, but into tempfiles. chunkqueue_steal_with_tempfiles() calls back
742 * into chunkqueue_append_mem_to_tempfile(), but will not re-enter this func
743 * since chunks moved to src, and dest made empty before recursive call */
744 const off_t cqlen = chunkqueue_length(dest);
745 chunkqueue src = *dest; /*(copy struct)*/
746 dest->first = dest->last = NULL;
747 dest->bytes_in -= cqlen;
748 if (0 == chunkqueue_steal_with_tempfiles(dest, &src, cqlen, errh))
749 return 0;
750 else {
751 const int errnum = errno;
752 chunkqueue_release_chunks(&src);
753 return -errnum;
754 }
755 }
756
chunkqueue_append_mem_to_tempfile(chunkqueue * const restrict dest,const char * restrict mem,size_t len,log_error_st * const restrict errh)757 int chunkqueue_append_mem_to_tempfile(chunkqueue * const restrict dest, const char * restrict mem, size_t len, log_error_st * const restrict errh) {
758 chunk *dst_c = dest->first;
759
760 /* check if prior MEM_CHUNK(s) exist and write to tempfile
761 * (check first chunk only, since if we are using tempfiles, then
762 * we expect further chunks to be tempfiles after starting tempfiles)*/
763 if (dst_c && dst_c->type == MEM_CHUNK
764 && 0 != chunkqueue_to_tempfiles(dest, errh)) {
765 return -1;
766 }
767
768 do {
769 /*(aside: arg len is permitted to be 0 and creates tempfile as a
770 * side effect. This is used by mod_ssi for ssi exec, as the func
771 * chunkqueue_get_append_tempfile() is not public. The result is
772 * an empty chunk at the end of the chunkqueue, which typically
773 * should be avoided)*/
774 dst_c = chunkqueue_get_append_tempfile(dest, errh);
775 if (NULL == dst_c)
776 return -1;
777 #ifdef __COVERITY__
778 if (dst_c->file.fd < 0) return -1;
779 #endif
780 #ifdef HAVE_PWRITE
781 /* coverity[negative_returns : FALSE] */
782 const ssize_t written =pwrite(dst_c->file.fd, mem, len, dst_c->file.length);
783 #else
784 /* coverity[negative_returns : FALSE] */
785 const ssize_t written = write(dst_c->file.fd, mem, len);
786 #endif
787
788 if ((size_t) written == len) {
789 dst_c->file.length += len;
790 dest->bytes_in += len;
791 return 0;
792 } else if (written >= 0) {
793 /*(assume EINTR if partial write and retry write();
794 * retry write() might fail with ENOSPC if no more space on volume)*/
795 dest->bytes_in += written;
796 mem += written;
797 len -= (size_t)written;
798 dst_c->file.length += (size_t)written;
799 /* continue; retry */
800 } else if (!chunkqueue_append_tempfile_err(dest, errh, dst_c)) {
801 break; /* return -1; */
802 } /* else continue; retry */
803 } while (len);
804
805 return -1;
806 }
807
808 #ifdef HAVE_PWRITEV
809
810 #ifdef HAVE_SYS_UIO_H
811 #include <sys/uio.h>
812 #endif
813
814 __attribute_cold__
815 __attribute_noinline__
chunkqueue_append_cqmem_to_tempfile_partial(chunkqueue * const dest,chunk * const c,ssize_t wr,log_error_st * const restrict errh)816 static ssize_t chunkqueue_append_cqmem_to_tempfile_partial(chunkqueue * const dest, chunk * const c, ssize_t wr, log_error_st * const restrict errh) {
817 /* recover from partial write of existing dest MEM_CHUNK to tempfile */
818 chunk *ckpt = dest->first;
819 while (ckpt->next != c) ckpt = ckpt->next;
820 ckpt->next = NULL;
821 dest->last = ckpt;
822 dest->bytes_in -= wr; /*(avoid double count in dest cq)*/
823 dest->bytes_out -= wr;
824 chunkqueue_mark_written(dest, wr);/*(remove MEM_CHUNK written to tempfile)*/
825
826 c->next = dest->first; /*(place tempfile at beginning of dest cq)*/
827 dest->first = c;
828 return (0 == chunkqueue_to_tempfiles(dest, errh)) ? 0 : -1;
829 }
830
chunkqueue_append_cqmem_to_tempfile(chunkqueue * const restrict dest,chunkqueue * const restrict src,off_t len,log_error_st * const restrict errh)831 static ssize_t chunkqueue_append_cqmem_to_tempfile(chunkqueue * const restrict dest, chunkqueue * const restrict src, off_t len, log_error_st * const restrict errh) {
832 /* write multiple MEM_CHUNKs to tempfile in single pwritev() syscall */
833 /*(could lseek() and writev() if pwritev() is not available,
834 * but if writev() is available, pwritev() is likely available,
835 * e.g. any modern Linux or *BSD, and possibly anything not Windows)*/
836 unsigned int iovcnt = 0;
837 struct iovec iov[16];
838
839 off_t dlen = 0;
840 chunk *c;
841 for (c = dest->first; c && c->type == MEM_CHUNK; c = c->next) {
842 const off_t clen = chunk_remaining_length(c);
843 iov[iovcnt].iov_base = c->mem->ptr + c->offset;
844 iov[iovcnt].iov_len = (size_t)clen;
845 dlen += clen;
846 ++iovcnt;
847 if (__builtin_expect( (iovcnt == sizeof(iov)/sizeof(*iov)), 0))
848 break; /*(not expecting large number of MEM_CHUNK)*/
849 }
850 if (__builtin_expect( (c != NULL), 0) && dest->first->type == MEM_CHUNK) {
851 /*(expecting only MEM_CHUNK if dest cq starts w/ MEM_CHUNK)*/
852 /*(use less efficient fallback if that assumption does not hold true)*/
853 if (0 != chunkqueue_to_tempfiles(dest, errh))
854 return -1;
855 dlen = 0;
856 iovcnt = 0;
857 }
858
859 if (__builtin_expect( (iovcnt < sizeof(iov)/sizeof(*iov)), 1)) {
860 for (c = src->first; c && c->type == MEM_CHUNK; c = c->next) {
861 off_t clen = chunk_remaining_length(c);
862 if (clen > len) clen = len;
863 iov[iovcnt].iov_base = c->mem->ptr + c->offset;
864 iov[iovcnt].iov_len = (size_t)clen;
865 len -= clen;
866 ++iovcnt;
867 if (0 == len) break;
868 if (__builtin_expect( (iovcnt == sizeof(iov)/sizeof(*iov)), 0))
869 break; /*(not expecting large number of MEM_CHUNK)*/
870 }
871 }
872
873 if (__builtin_expect( (0 == iovcnt), 0)) return 0; /*(should not happen)*/
874
875 c = chunkqueue_get_append_tempfile(dest, errh);
876 if (NULL == c)
877 return -1;
878 #ifdef __COVERITY__
879 if (c->file.fd < 0) return -1;
880 #endif
881 /* coverity[negative_returns : FALSE] */
882 ssize_t wr = pwritev(c->file.fd, iov, (int)iovcnt, c->file.length);
883
884 /*(memory use in chunkqueues is expected to be limited before spilling
885 * to tempfiles, so common case will write entire iovec to tempfile,
886 * and we return amount written *from src cq*, even if partial write;
887 * (not looping here to retry writing more, but caller might loop))*/
888
889 if (wr >= 0) {
890 c->file.length += wr;
891 dest->bytes_in += wr;
892 if (dlen) {
893 if (__builtin_expect( (wr < dlen), 0))
894 return
895 chunkqueue_append_cqmem_to_tempfile_partial(dest,c,wr,errh);
896 wr -= (ssize_t)dlen;
897 dest->bytes_in -= dlen; /*(avoid double count in dest cq)*/
898 dest->bytes_out -= dlen;
899 chunkqueue_mark_written(dest, dlen);
900 }
901 }
902
903 return wr;
904 }
905
906 #endif /* HAVE_PWRITEV */
907
908 #ifdef HAVE_SPLICE
909
910 __attribute_cold__
911 __attribute_noinline__
chunkqueue_append_drain_pipe_tempfile(chunkqueue * const restrict cq,const int fd,unsigned int len,log_error_st * const restrict errh)912 static ssize_t chunkqueue_append_drain_pipe_tempfile(chunkqueue * const restrict cq, const int fd, unsigned int len, log_error_st * const restrict errh) {
913 /* attempt to drain full 'len' from pipe
914 * (even if len not reduced to opts->max_per_read limit)
915 * since data may have already been moved from socket to pipe
916 *(returns 0 on success, or -errno (negative errno) if error,
917 * even if partial write occurred)*/
918 char buf[16384];
919 ssize_t rd;
920 do {
921 do {
922 rd = read(fd, buf, sizeof(buf));
923 } while (rd < 0 && errno == EINTR);
924 if (rd < 0) break;
925 if (0 != chunkqueue_append_mem_to_tempfile(cq, buf, (size_t)rd, errh))
926 break;
927 } while ((len -= (unsigned int)rd));
928
929 if (0 == len)
930 return 0;
931 else {
932 const int errnum = errno;
933 if (cq->last && 0 == chunk_remaining_length(cq->last)) {
934 /*(remove empty chunk and unlink tempfile)*/
935 chunkqueue_remove_empty_chunks(cq);
936 }
937 return -errnum;
938 }
939 }
940
chunkqueue_append_splice_pipe_tempfile(chunkqueue * const restrict cq,const int fd,unsigned int len,log_error_st * const restrict errh)941 ssize_t chunkqueue_append_splice_pipe_tempfile(chunkqueue * const restrict cq, const int fd, unsigned int len, log_error_st * const restrict errh) {
942 /* check if prior MEM_CHUNK(s) exist and write to tempfile
943 * (check first chunk only, since if we are using tempfiles, then
944 * we expect further chunks to be tempfiles after starting tempfiles)*/
945 if (cq->first && cq->first->type == MEM_CHUNK) {
946 int rc = chunkqueue_to_tempfiles(cq, errh);
947 if (__builtin_expect( (0 != rc), 0)) return rc;
948 }
949
950 /*(returns num bytes written, or -errno (negative errno) if error)*/
951 ssize_t total = 0;
952 do {
953 chunk * const c = chunkqueue_get_append_tempfile(cq, errh);
954 if (__builtin_expect( (NULL == c), 0)) return -errno;
955
956 loff_t off = c->file.length;
957 ssize_t wr = splice(fd, NULL, c->file.fd, &off, len,
958 SPLICE_F_MOVE | SPLICE_F_NONBLOCK);
959
960 if (__builtin_expect(((size_t)wr == len), 1)) {
961 c->file.length += len;
962 cq->bytes_in += len;
963 return total + len;
964 }
965 else if (wr >= 0) {
966 /*(assume EINTR if partial write and retry;
967 * retry might fail with ENOSPC if no more space on volume)*/
968 cq->bytes_in += wr;
969 total += wr;
970 len -= (size_t)wr;
971 c->file.length += (size_t)wr;
972 /* continue; retry */
973 }
974 else {
975 const int errnum = errno;
976 switch (errnum) {
977 case EAGAIN:
978 #ifdef EWOULDBLOCK
979 #if EWOULDBLOCK != EAGAIN
980 case EWOULDBLOCK:
981 #endif
982 #endif
983 if (0 == chunk_remaining_length(c)) {
984 /*(remove empty chunk and unlink tempfile)*/
985 chunkqueue_remove_empty_chunks(cq);
986 }
987 return total;
988 case EINVAL: /*(assume total == 0 if EINVAL)*/
989 wr = chunkqueue_append_drain_pipe_tempfile(cq, fd, len, errh);
990 return (0 == wr) ? total + (ssize_t)len : wr;
991 default:
992 if (!chunkqueue_append_tempfile_err(cq, errh, c))
993 return -errnum;
994 break; /* else continue; retry */
995 }
996 }
997 } while (len);
998 return -EIO; /*(not reached)*/
999 }
1000
1001 static int cqpipes[2] = { -1, -1 };
1002
1003 __attribute_cold__
1004 __attribute_noinline__
chunkqueue_internal_pipes(int init)1005 void chunkqueue_internal_pipes(int init) {
1006 /*(intended for internal use within a single lighttpd process;
1007 * must be initialized after fork() and graceful-restart to avoid
1008 * sharing pipes between processes)*/
1009 if (-1 != cqpipes[0]) { close(cqpipes[0]); cqpipes[0] = -1; }
1010 if (-1 != cqpipes[1]) { close(cqpipes[1]); cqpipes[1] = -1; }
1011 if (init)
1012 if (0 != fdevent_pipe_cloexec(cqpipes, 262144)) { } /*(ignore error)*/
1013 }
1014
1015 __attribute_cold__
1016 __attribute_noinline__
chunkqueue_pipe_read_discard(void)1017 static void chunkqueue_pipe_read_discard (void) {
1018 char buf[16384];
1019 ssize_t rd;
1020 do {
1021 rd = read(cqpipes[0], buf, sizeof(buf));
1022 } while (rd > 0 || (rd < 0 && errno == EINTR));
1023 if (rd < 0
1024 #ifdef EWOULDBLOCK
1025 #if EWOULDBLOCK != EAGAIN
1026 && errno != EWOULDBLOCK
1027 #endif
1028 #endif
1029 && errno != EAGAIN) {
1030 chunkqueue_internal_pipes(1); /*(close() and re-initialize)*/
1031 }
1032 }
1033
chunkqueue_append_splice_sock_tempfile(chunkqueue * const restrict cq,const int fd,unsigned int len,log_error_st * const restrict errh)1034 ssize_t chunkqueue_append_splice_sock_tempfile(chunkqueue * const restrict cq, const int fd, unsigned int len, log_error_st * const restrict errh) {
1035 /*(returns num bytes written, or -errno (negative errno) if error)*/
1036 int * const pipes = cqpipes;
1037 if (-1 == pipes[1])
1038 return -EINVAL; /*(not configured; not handled here)*/
1039
1040 /* splice() socket data to intermediate pipe */
1041 ssize_t wr = splice(fd, NULL, pipes[1], NULL, len,
1042 SPLICE_F_MOVE | SPLICE_F_NONBLOCK);
1043 if (__builtin_expect( (wr <= 0), 0))
1044 return -EINVAL; /*(reuse to indicate not handled here)*/
1045 len = (unsigned int)wr;
1046
1047 /* splice() data from intermediate pipe to tempfile */
1048 wr = chunkqueue_append_splice_pipe_tempfile(cq, pipes[0], len, errh);
1049 if (wr < 0) /* expect (wr == (ssize_t)len) or (wr == -1) */
1050 chunkqueue_pipe_read_discard();/* discard data from intermediate pipe */
1051 return wr;
1052 }
1053
1054 #endif /* HAVE_SPLICE */
1055
chunkqueue_steal_with_tempfiles(chunkqueue * const restrict dest,chunkqueue * const restrict src,off_t len,log_error_st * const restrict errh)1056 int chunkqueue_steal_with_tempfiles(chunkqueue * const restrict dest, chunkqueue * const restrict src, off_t len, log_error_st * const restrict errh) {
1057 for (off_t clen; len > 0; len -= clen) {
1058 chunk * const c = src->first;
1059 if (__builtin_expect( (NULL == c), 0)) break;
1060
1061 #ifdef HAVE_PWRITEV
1062
1063 if (c->type == MEM_CHUNK) {
1064 clen = chunkqueue_append_cqmem_to_tempfile(dest, src, len, errh);
1065 if (__builtin_expect( (clen < 0), 0)) return -1;
1066 chunkqueue_mark_written(src, clen); /*(updates src->bytes_out)*/
1067 }
1068 else { /* (c->type == FILE_CHUNK) */
1069 clen = chunk_remaining_length(c);
1070 if (len < clen) clen = len;
1071 chunkqueue_steal(dest, src, clen);/*(same as below for FILE_CHUNK)*/
1072 }
1073
1074 #else
1075
1076 clen = chunk_remaining_length(c);
1077 if (__builtin_expect( (0 == clen), 0)) {
1078 /* drop empty chunk */
1079 src->first = c->next;
1080 if (c == src->last) src->last = NULL;
1081 chunk_release(c);
1082 continue;
1083 }
1084
1085 switch (c->type) {
1086 case FILE_CHUNK:
1087 if (len >= clen) {
1088 /* move complete chunk */
1089 src->first = c->next;
1090 if (c == src->last) src->last = NULL;
1091 chunkqueue_append_chunk(dest, c);
1092 dest->bytes_in += clen;
1093 } else {
1094 /* copy partial chunk */
1095 /* tempfile flag is in "last" chunk after the split */
1096 chunkqueue_steal_partial_file_chunk(dest, c, len);
1097 c->offset += len;
1098 clen = len;
1099 }
1100 break;
1101
1102 case MEM_CHUNK:
1103 /* store bytes from memory chunk in tempfile */
1104 if (0 != chunkqueue_append_mem_to_tempfile(dest, c->mem->ptr + c->offset,
1105 (len >= clen) ? clen : len, errh)) {
1106 return -1;
1107 }
1108
1109 if (len >= clen) {
1110 /* finished chunk */
1111 src->first = c->next;
1112 if (c == src->last) src->last = NULL;
1113 chunk_release(c);
1114 } else {
1115 /* partial chunk */
1116 c->offset += len;
1117 clen = len;
1118 }
1119 break;
1120 }
1121
1122 src->bytes_out += clen;
1123
1124 #endif
1125 }
1126
1127 return 0;
1128 }
1129
chunkqueue_append_cq_range(chunkqueue * const dst,const chunkqueue * const src,off_t offset,off_t len)1130 void chunkqueue_append_cq_range (chunkqueue * const dst, const chunkqueue * const src, off_t offset, off_t len) {
1131 /* similar to chunkqueue_steal() but copy and append src range to dst cq */
1132 /* (dst cq and src cq can be the same cq, so neither is marked restrict) */
1133
1134 /* copy and append range len from src to dst */
1135 for (const chunk *c = src->first; len > 0 && c != NULL; c = c->next) {
1136 /* scan into src to range offset (also skips empty chunks) */
1137 off_t clen = chunk_remaining_length(c);
1138 if (offset >= clen) {
1139 offset -= clen;
1140 continue;
1141 }
1142 clen -= offset;
1143 if (len < clen) clen = len;
1144 len -= clen;
1145
1146 if (c->type == FILE_CHUNK) {
1147 chunkqueue_append_file(dst, c->mem, c->offset + offset, clen);
1148 chunkqueue_dup_file_chunk_fd(dst->last, c);
1149 }
1150 else { /*(c->type == MEM_CHUNK)*/
1151 /*(string refs would reduce copying,
1152 * but this path is not expected to be hot)*/
1153 chunkqueue_append_mem(dst, c->mem->ptr + c->offset + offset, clen);
1154 }
1155 offset = 0;
1156 }
1157 }
1158
chunkqueue_mark_written(chunkqueue * cq,off_t len)1159 void chunkqueue_mark_written(chunkqueue *cq, off_t len) {
1160 cq->bytes_out += len;
1161
1162 for (chunk *c = cq->first; c; ) {
1163 off_t c_len = chunk_remaining_length(c);
1164 if (len >= c_len) { /* chunk got finished */
1165 chunk * const x = c;
1166 c = c->next;
1167 len -= c_len;
1168 chunk_release(x);
1169 }
1170 else { /* partial chunk */
1171 c->offset += len;
1172 cq->first = c;
1173 return; /* chunk not finished */
1174 }
1175 }
1176 cq->first = cq->last = NULL;
1177 }
1178
chunkqueue_remove_finished_chunks(chunkqueue * cq)1179 void chunkqueue_remove_finished_chunks(chunkqueue *cq) {
1180 for (chunk *c; (c = cq->first) && 0 == chunk_remaining_length(c); ){
1181 if (NULL == (cq->first = c->next)) cq->last = NULL;
1182 chunk_release(c);
1183 }
1184 }
1185
chunkqueue_remove_empty_chunks(chunkqueue * cq)1186 void chunkqueue_remove_empty_chunks(chunkqueue *cq) {
1187 chunk *c;
1188 chunkqueue_remove_finished_chunks(cq);
1189
1190 for (c = cq->first; c && c->next; c = c->next) {
1191 if (0 == chunk_remaining_length(c->next)) {
1192 chunk *empty = c->next;
1193 c->next = empty->next;
1194 if (empty == cq->last) cq->last = c;
1195 chunk_release(empty);
1196 }
1197 }
1198 }
1199
chunkqueue_compact_mem_offset(chunkqueue * const cq)1200 void chunkqueue_compact_mem_offset(chunkqueue * const cq) {
1201 chunk * const restrict c = cq->first;
1202 if (0 == c->offset) return;
1203 if (c->type != MEM_CHUNK) return; /*(should not happen)*/
1204
1205 buffer * const restrict b = c->mem;
1206 size_t len = buffer_clen(b) - c->offset;
1207 memmove(b->ptr, b->ptr+c->offset, len);
1208 c->offset = 0;
1209 buffer_truncate(b, len);
1210 }
1211
chunkqueue_compact_mem(chunkqueue * cq,size_t clen)1212 void chunkqueue_compact_mem(chunkqueue *cq, size_t clen) {
1213 /* caller must guarantee that chunks in chunkqueue are MEM_CHUNK,
1214 * which is currently always true when reading input from client */
1215 chunk *c = cq->first;
1216 buffer *b = c->mem;
1217 size_t len = buffer_clen(b) - c->offset;
1218 if (len >= clen) return;
1219 if (b->size > clen) {
1220 if (buffer_string_space(b) < clen - len)
1221 chunkqueue_compact_mem_offset(cq);
1222 }
1223 else {
1224 b = chunkqueue_prepend_buffer_open_sz(cq, clen+1);
1225 buffer_append_string_len(b, c->mem->ptr + c->offset, len);
1226 cq->first->next = c->next;
1227 if (NULL == c->next) cq->last = cq->first;
1228 chunk_release(c);
1229 c = cq->first;
1230 }
1231
1232 for (chunk *fc = c; ((clen -= len) && (c = fc->next)); ) {
1233 len = buffer_clen(c->mem) - c->offset;
1234 if (len > clen) {
1235 buffer_append_string_len(b, c->mem->ptr + c->offset, clen);
1236 c->offset += clen;
1237 break;
1238 }
1239 buffer_append_string_len(b, c->mem->ptr + c->offset, len);
1240 fc->next = c->next;
1241 if (NULL == c->next) cq->last = fc;
1242 chunk_release(c);
1243 }
1244 /* chunkqueue_prepend_buffer_commit() is not called here;
1245 * no data added/removed from chunkqueue; consolidated only */
1246 }
1247
chunk_open_file_chunk(chunk * const restrict c,log_error_st * const restrict errh)1248 static int chunk_open_file_chunk(chunk * const restrict c, log_error_st * const restrict errh) {
1249 if (-1 == c->file.fd) {
1250 /* (permit symlinks; should already have been checked. However, TOC-TOU remains) */
1251 if (-1 == (c->file.fd = fdevent_open_cloexec(c->mem->ptr, 1, O_RDONLY, 0))) {
1252 log_perror(errh, __FILE__, __LINE__, "open failed: %s",c->mem->ptr);
1253 return -1;
1254 }
1255 }
1256
1257 /*(skip file size checks if file is temp file created by lighttpd)*/
1258 if (c->file.is_temp) return 0;
1259
1260 struct stat st;
1261 if (-1 == fstat(c->file.fd, &st)) {
1262 log_perror(errh, __FILE__, __LINE__, "fstat failed");
1263 return -1;
1264 }
1265
1266 const off_t offset = c->offset;
1267 const off_t len = c->file.length - c->offset;
1268 force_assert(offset >= 0 && len >= 0);
1269 if (offset > st.st_size - len) {
1270 log_error(errh, __FILE__, __LINE__, "file shrunk: %s", c->mem->ptr);
1271 return -1;
1272 }
1273
1274 return 0;
1275 }
1276
chunkqueue_open_file_chunk(chunkqueue * const restrict cq,log_error_st * const restrict errh)1277 int chunkqueue_open_file_chunk(chunkqueue * const restrict cq, log_error_st * const restrict errh) {
1278 return chunk_open_file_chunk(cq->first, errh);
1279 }
1280
1281
1282 static ssize_t
chunkqueue_write_data(const int fd,const void * buf,size_t count)1283 chunkqueue_write_data (const int fd, const void *buf, size_t count)
1284 {
1285 ssize_t wr;
1286 do { wr = write(fd, buf, count); } while (-1 == wr && errno == EINTR);
1287 return wr;
1288 }
1289
1290
1291 #if defined(HAVE_MMAP) || defined(_WIN32) /*(see local sys-mmap.h)*/
1292 __attribute_cold__
1293 #endif
1294 __attribute_noinline__
1295 static ssize_t
chunkqueue_write_chunk_file_intermed(const int fd,chunk * const restrict c,log_error_st * const errh)1296 chunkqueue_write_chunk_file_intermed (const int fd, chunk * const restrict c, log_error_st * const errh)
1297 {
1298 char buf[16384];
1299 char *data = buf;
1300 const off_t count = c->file.length - c->offset;
1301 uint32_t dlen = count < (off_t)sizeof(buf) ? (uint32_t)count : sizeof(buf);
1302 chunkqueue cq = {c,c,0,0,0,0,0}; /*(fake cq for chunkqueue_peek_data())*/
1303 if (0 != chunkqueue_peek_data(&cq, &data, &dlen, errh) && 0 == dlen)
1304 return -1;
1305 return chunkqueue_write_data(fd, data, dlen);
1306 }
1307
1308
1309 #if defined(HAVE_MMAP) || defined(_WIN32) /*(see local sys-mmap.h)*/
1310
1311 /*(improved from network_write_mmap.c)*/
1312 static off_t
mmap_align_offset(off_t start)1313 mmap_align_offset (off_t start)
1314 {
1315 static off_t pagemask = 0;
1316 if (0 == pagemask) {
1317 #ifndef _WIN32
1318 long pagesize = sysconf(_SC_PAGESIZE);
1319 #else
1320 long pagesize = -1; /*(not implemented (yet))*/
1321 #endif
1322 if (-1 == pagesize) pagesize = 4096;
1323 pagemask = ~((off_t)pagesize - 1); /* pagesize always power-of-2 */
1324 }
1325 return (start & pagemask);
1326 }
1327
1328
1329 __attribute_noinline__
1330 static char *
chunkqueue_mmap_chunk_len(chunk * const c,off_t len)1331 chunkqueue_mmap_chunk_len (chunk * const c, off_t len)
1332 {
1333 /* (re)mmap the buffer to file length if range is not covered completely */
1334 /*(caller is responsible for handling SIGBUS if chunkqueue might contain
1335 * untrusted file, i.e. any file other than lighttpd-created tempfile)*/
1336 /*(tempfiles are expected for input, MAP_PRIVATE used for portability)*/
1337 /*(mmaps and writes complete chunk instead of only small parts; files
1338 * are expected to be temp files with reasonable chunk sizes)*/
1339 if (MAP_FAILED == c->file.mmap.start
1340 || c->offset < c->file.mmap.offset
1341 || c->offset+len > (off_t)(c->file.mmap.offset + c->file.mmap.length)) {
1342
1343 if (MAP_FAILED != c->file.mmap.start) {
1344 munmap(c->file.mmap.start, c->file.mmap.length);
1345 /*c->file.mmap.start = MAP_FAILED;*//*(assigned below)*/
1346 }
1347
1348 c->file.mmap.offset = mmap_align_offset(c->offset);
1349 c->file.mmap.length = c->file.length - c->file.mmap.offset;
1350 c->file.mmap.start =
1351 mmap(NULL, c->file.mmap.length, PROT_READ, MAP_PRIVATE,
1352 c->file.fd, c->file.mmap.offset);
1353 if (MAP_FAILED == c->file.mmap.start) return NULL;
1354
1355 #if 0 /*(review callers before changing; some expect open file)*/
1356 /* close() fd as soon as fully mmap() rather than when done w/ chunk
1357 * (possibly worthwhile to keep active fd count lower) */
1358 if (c->file.is_temp && !c->file.refchg) {
1359 close(c->file.fd);
1360 c->file.fd = -1;
1361 }
1362 #endif
1363 }
1364 return c->file.mmap.start + c->offset - c->file.mmap.offset;
1365 }
1366
1367 #endif
1368
1369
1370 #if defined HAVE_SYS_SENDFILE_H && defined HAVE_SENDFILE \
1371 && (!defined _LARGEFILE_SOURCE || defined HAVE_SENDFILE64) \
1372 && defined(__linux__) && !defined HAVE_SENDFILE_BROKEN
1373 #include <sys/sendfile.h>
1374 #include <stdint.h>
1375 #endif
1376 static ssize_t
chunkqueue_write_chunk_file(const int fd,chunk * const restrict c,log_error_st * const errh)1377 chunkqueue_write_chunk_file (const int fd, chunk * const restrict c, log_error_st * const errh)
1378 {
1379 /*(similar to network_write_file_chunk_mmap(), but does not use send() on
1380 * Windows because fd is expected to be file or pipe here, not socket)*/
1381
1382 if (0 != chunk_open_file_chunk(c, errh))
1383 return -1;
1384
1385 const off_t count = c->file.length - c->offset;
1386 if (0 == count) return 0; /*(sanity check)*/
1387
1388 #if defined HAVE_SYS_SENDFILE_H && defined HAVE_SENDFILE \
1389 && (!defined _LARGEFILE_SOURCE || defined HAVE_SENDFILE64) \
1390 && defined(__linux__) && !defined HAVE_SENDFILE_BROKEN
1391 /* Linux kernel >= 2.6.33 supports sendfile() between most fd types */
1392 off_t offset = c->offset;
1393 const ssize_t wr =
1394 sendfile(fd, c->file.fd, &offset, count<INT32_MAX ? count : INT32_MAX);
1395 if (__builtin_expect( (wr >= 0), 1) || (errno != EINVAL && errno != ENOSYS))
1396 return wr;
1397 #endif
1398
1399 #if defined(HAVE_MMAP) || defined(_WIN32) /*(see local sys-mmap.h)*/
1400 const char * const data = chunkqueue_mmap_chunk_len(c, count);
1401 if (NULL != data)
1402 return chunkqueue_write_data(fd, data, count);
1403 #endif
1404
1405 return chunkqueue_write_chunk_file_intermed(fd, c, errh);
1406 }
1407
1408
1409 static ssize_t
chunkqueue_write_chunk_mem(const int fd,const chunk * const restrict c)1410 chunkqueue_write_chunk_mem (const int fd, const chunk * const restrict c)
1411 {
1412 const void * const buf = c->mem->ptr + c->offset;
1413 const size_t count = buffer_clen(c->mem) - (size_t)c->offset;
1414 ssize_t wr;
1415 do { wr = write(fd, buf, count); } while (-1 == wr && errno == EINTR);
1416 return wr;
1417 }
1418
1419
1420 ssize_t
chunkqueue_write_chunk(const int fd,chunkqueue * const restrict cq,log_error_st * const restrict errh)1421 chunkqueue_write_chunk (const int fd, chunkqueue * const restrict cq, log_error_st * const restrict errh)
1422 {
1423 /*(note: expects non-empty cq->first)*/
1424 chunk * const c = cq->first;
1425 switch (c->type) {
1426 case MEM_CHUNK:
1427 return chunkqueue_write_chunk_mem(fd, c);
1428 case FILE_CHUNK:
1429 return chunkqueue_write_chunk_file(fd, c, errh);
1430 default:
1431 errno = EINVAL;
1432 return -1;
1433 }
1434 }
1435
1436
1437 ssize_t
chunkqueue_write_chunk_to_pipe(const int fd,chunkqueue * const restrict cq,log_error_st * const restrict errh)1438 chunkqueue_write_chunk_to_pipe (const int fd, chunkqueue * const restrict cq, log_error_st * const restrict errh)
1439 {
1440 /*(note: expects non-empty cq->first)*/
1441 #ifdef HAVE_SPLICE /* splice() temp files to pipe on Linux */
1442 chunk * const c = cq->first;
1443 if (c->type == FILE_CHUNK) {
1444 loff_t abs_offset = c->offset;
1445 return (0 == chunk_open_file_chunk(c, errh))
1446 ? splice(c->file.fd, &abs_offset, fd, NULL,
1447 (size_t)(c->file.length - c->offset), SPLICE_F_NONBLOCK)
1448 : -1;
1449 }
1450 #endif
1451 return chunkqueue_write_chunk(fd, cq, errh);
1452 }
1453
1454
1455 void
chunkqueue_small_resp_optim(chunkqueue * const restrict cq)1456 chunkqueue_small_resp_optim (chunkqueue * const restrict cq)
1457 {
1458 /*(caller must verify response is small (and non-empty) before calling)*/
1459 /*(caller must verify first chunk is MEM_CHUNK, i.e. response headers)*/
1460 /*(caller must verify response is non-zero length)*/
1461
1462 /*(optimization to use fewer syscalls to send a small response by reading
1463 * small files into memory, thereby avoiding use of sendfile() and multiple
1464 * calls to writev() (benefit for cleartext (non-TLS) and <= HTTP/1.1))
1465 *(If TLS, then will shortly need to be in memory for encryption anyway)*/
1466
1467 /*assert(cq->first);*/
1468 /*assert(cq->first->type == MEM_CHUNK);*/
1469 /*assert(cq->first->next);*/
1470 chunk * restrict c = cq->first;
1471 chunk * const restrict filec = c->next; /*(require file already be open)*/
1472 if (filec != cq->last || filec->type != FILE_CHUNK || filec->file.fd < 0)
1473 return;
1474
1475 #ifndef HAVE_PREAD
1476 if (-1 == lseek(filec->file.fd, filec->offset, SEEK_SET)) return;
1477 #endif
1478
1479 /* Note: there should be no size change in chunkqueue,
1480 * so cq->bytes_in and cq->bytes_out should not be modified */
1481
1482 off_t len = filec->file.length - filec->offset;
1483 if ((size_t)len > buffer_string_space(c->mem)) {
1484 c->next = chunk_acquire((size_t)len+1);
1485 c = c->next;
1486 /*c->next = filec;*/
1487 }
1488 /* detach filec from chunkqueue; file expected to be read fully */
1489 c->next = NULL;
1490 cq->last = c;
1491
1492 ssize_t rd;
1493 off_t offset = 0;
1494 char * const ptr = buffer_extend(c->mem, len);
1495 do {
1496 #ifdef HAVE_PREAD
1497 rd =pread(filec->file.fd, ptr+offset, (size_t)len,filec->offset+offset);
1498 #else
1499 rd = read(filec->file.fd, ptr+offset, (size_t)len);
1500 #endif
1501 } while (rd > 0 ? (offset += rd, len -= rd) : errno == EINTR);
1502 /*(contents of chunkqueue kept valid even if error reading from file)*/
1503 if (__builtin_expect( (0 == len), 1))
1504 chunk_release(filec);
1505 else { /*(unexpected; error recovery)*/
1506 buffer_truncate(c->mem, (uint32_t)(ptr + offset - c->mem->ptr));
1507 cq->last = c->next = filec;
1508 if (offset)
1509 filec->offset += offset;
1510 else if (__builtin_expect( (cq->first != c), 0)) {
1511 cq->first->next = filec;
1512 chunk_release(c);
1513 }
1514 }
1515 }
1516
1517
1518 int
chunkqueue_peek_data(chunkqueue * const cq,char ** const data,uint32_t * const dlen,log_error_st * const errh)1519 chunkqueue_peek_data (chunkqueue * const cq,
1520 char ** const data, uint32_t * const dlen,
1521 log_error_st * const errh)
1522 {
1523 char * const data_in = *data;
1524 const uint32_t data_insz = *dlen;
1525 *dlen = 0;
1526
1527 for (chunk *c = cq->first; c; ) {
1528 uint32_t space = data_insz - *dlen;
1529 switch (c->type) {
1530 case MEM_CHUNK:
1531 {
1532 uint32_t have = buffer_clen(c->mem) - (uint32_t)c->offset;
1533 if (have > space)
1534 have = space;
1535 if (*dlen)
1536 memcpy(data_in + *dlen, c->mem->ptr + c->offset, have);
1537 else
1538 *data = c->mem->ptr + c->offset; /*(reference; defer copy)*/
1539 *dlen += have;
1540 break;
1541 }
1542
1543 case FILE_CHUNK:
1544 if (c->file.fd >= 0 || 0 == chunk_open_file_chunk(c, errh)) {
1545 off_t offset = c->offset;
1546 off_t len = c->file.length - c->offset;
1547 if (len > (off_t)space)
1548 len = (off_t)space;
1549 if (0 == len)
1550 break;
1551
1552 #if 0 /* XXX: might improve performance on some system workloads */
1553 #if defined(_LP64) || defined(__LP64__) || defined(_WIN64)
1554 #if defined(HAVE_MMAP) || defined(_WIN32) /*see local sys-mmap.h*/
1555 /* mmap file to access data
1556 * (Only consider temp files here since not catching SIGBUS)
1557 * (For now, also limit to 64-bit to avoid address space issues)
1558 * If temp file is used, data should be large enough that mmap
1559 * is worthwhile. fd need not be kept open for the mmap once
1560 * the mmap has been created, but is currently kept open for
1561 * other pre-existing logic which checks fd and opens file,
1562 * such as the condition for entering this code block above. */
1563 /* Note: under heavy load (or microbenchmark), system-reported
1564 * memory use for RSS can be very, very large, due to presence
1565 * of lots and lots of temp file read-only memory maps.
1566 * pmap -X and exclude lighttpd temporary files to get a better
1567 * view of memory use */
1568 char *mdata;
1569 if (c->file.is_temp
1570 && (mdata = chunkqueue_mmap_chunk_len(c, len))) {
1571 if (*dlen) {
1572 if (*data != data_in) {
1573 memcpy(data_in, *data, *dlen);
1574 *data = data_in;
1575 }
1576 memcpy(data_in+*dlen, mdata, (size_t)len);
1577 }
1578 else {
1579 *data = mdata;
1580 }
1581 *dlen += (uint32_t)len;
1582 break;
1583 }
1584 #endif
1585 #endif
1586 #endif
1587
1588 #ifndef HAVE_PREAD
1589 if (-1 == lseek(c->file.fd, offset, SEEK_SET)) {
1590 log_perror(errh, __FILE__, __LINE__, "lseek(\"%s\")",
1591 c->mem->ptr);
1592 return -1;
1593 }
1594 #endif
1595 ssize_t rd;
1596 do {
1597 #ifdef HAVE_PREAD
1598 rd =pread(c->file.fd, data_in + *dlen, (size_t)len, offset);
1599 #else
1600 rd = read(c->file.fd, data_in + *dlen, (size_t)len);
1601 #endif
1602 } while (-1 == rd && errno == EINTR);
1603 if (rd <= 0) { /* -1 error; 0 EOF (unexpected) */
1604 log_perror(errh, __FILE__, __LINE__, "read(\"%s\")",
1605 c->mem->ptr);
1606 return -1;
1607 }
1608
1609 *dlen += (uint32_t)rd;
1610 break;
1611 }
1612 return -1;
1613
1614 default:
1615 return -1;
1616 }
1617
1618 if (*dlen == data_insz)
1619 break;
1620
1621 c = c->next;
1622 if (NULL == c)
1623 break;
1624
1625 if (*dlen && *data != data_in) {
1626 memcpy(data_in, *data, *dlen);
1627 *data = data_in;
1628 }
1629 }
1630
1631 return 0;
1632 }
1633
1634
1635 int
chunkqueue_read_data(chunkqueue * const cq,char * const data,const uint32_t dlen,log_error_st * const errh)1636 chunkqueue_read_data (chunkqueue * const cq,
1637 char * const data, const uint32_t dlen,
1638 log_error_st * const errh)
1639 {
1640 char *ptr = data;
1641 uint32_t len = dlen;
1642 if (chunkqueue_peek_data(cq, &ptr, &len, errh) < 0 || len != dlen)
1643 return -1;
1644 if (data != ptr) memcpy(data, ptr, len);
1645 chunkqueue_mark_written(cq, len);
1646 return 0;
1647 }
1648
1649
1650 buffer *
chunkqueue_read_squash(chunkqueue * const restrict cq,log_error_st * const restrict errh)1651 chunkqueue_read_squash (chunkqueue * const restrict cq, log_error_st * const restrict errh)
1652 {
1653 /* read and replace chunkqueue contents with single MEM_CHUNK.
1654 * cq->bytes_out is not modified */
1655
1656 off_t cqlen = chunkqueue_length(cq);
1657 if (cqlen >= UINT32_MAX) return NULL;
1658
1659 if (cq->first && NULL == cq->first->next && cq->first->type == MEM_CHUNK)
1660 return cq->first->mem;
1661
1662 chunk * const c = chunk_acquire((uint32_t)cqlen+1);
1663 char *data = c->mem->ptr;
1664 uint32_t dlen = (uint32_t)cqlen;
1665 int rc = chunkqueue_peek_data(cq, &data, &dlen, errh);
1666 if (rc < 0) {
1667 chunk_release(c);
1668 return NULL;
1669 }
1670 buffer_truncate(c->mem, dlen);
1671
1672 chunkqueue_release_chunks(cq);
1673 chunkqueue_append_chunk(cq, c);
1674 return c->mem;
1675 }
1676