1 #include "first.h"
2 
3 /**
4  * the network chunk-API
5  *
6  *
7  */
8 
9 #include "chunk.h"
10 #include "fdevent.h"
11 #include "log.h"
12 
13 #include <sys/types.h>
14 #include <sys/stat.h>
15 #include "sys-mmap.h"
16 
17 #include <stdlib.h>
18 #include <fcntl.h>
19 #include <unistd.h>
20 
21 #include <errno.h>
22 #include <string.h>
23 
24 /* default 1 MB */
25 #define DEFAULT_TEMPFILE_SIZE (1 * 1024 * 1024)
26 
27 static size_t chunk_buf_sz = 8192;
28 static chunk *chunks, *chunks_oversized, *chunks_filechunk;
29 static chunk *chunk_buffers;
30 static int chunks_oversized_n;
31 static const array *chunkqueue_default_tempdirs = NULL;
32 static off_t chunkqueue_default_tempfile_size = DEFAULT_TEMPFILE_SIZE;
33 
chunkqueue_set_chunk_size(size_t sz)34 void chunkqueue_set_chunk_size (size_t sz)
35 {
36     size_t x = 1024;
37     while (x < sz && x < (1u << 30)) x <<= 1;
38     chunk_buf_sz = sz > 0 ? x : 8192;
39 }
40 
chunkqueue_set_tempdirs_default_reset(void)41 void chunkqueue_set_tempdirs_default_reset (void)
42 {
43     chunk_buf_sz = 8192;
44     chunkqueue_default_tempdirs = NULL;
45     chunkqueue_default_tempfile_size = DEFAULT_TEMPFILE_SIZE;
46 }
47 
chunkqueue_init(chunkqueue * cq)48 chunkqueue *chunkqueue_init(chunkqueue *cq) {
49 	/* (if caller passes non-NULL cq, it must be 0-init) */
50 	if (NULL == cq) {
51 		cq = calloc(1, sizeof(*cq));
52 		force_assert(NULL != cq);
53 	}
54 
55 	cq->first = NULL;
56 	cq->last = NULL;
57 
58 	cq->tempdirs              = chunkqueue_default_tempdirs;
59 	cq->upload_temp_file_size = chunkqueue_default_tempfile_size;
60 
61 	return cq;
62 }
63 
64 __attribute_returns_nonnull__
chunk_init(void)65 static chunk *chunk_init(void) {
66 	chunk * const restrict c = calloc(1, sizeof(*c));
67 	force_assert(NULL != c);
68 
69       #if 0 /*(zeroed by calloc())*/
70 	c->type = MEM_CHUNK;
71 	c->next = NULL;
72 	c->offset = 0;
73 	c->file.length = 0;
74 	c->file.mmap.length = c->file.mmap.offset = 0;
75 	c->file.is_temp = 0;
76       #endif
77 	c->file.fd = -1;
78 	c->file.mmap.start = MAP_FAILED;
79 
80 	c->mem = buffer_init();
81 	return c;
82 }
83 
84 __attribute_returns_nonnull__
chunk_init_sz(size_t sz)85 static chunk *chunk_init_sz(size_t sz) {
86 	chunk * const restrict c = chunk_init();
87 	buffer_string_prepare_copy(c->mem, sz-1);
88 	return c;
89 }
90 
chunk_reset_file_chunk(chunk * c)91 static void chunk_reset_file_chunk(chunk *c) {
92 	if (c->file.is_temp) {
93 		c->file.is_temp = 0;
94 		if (!buffer_is_blank(c->mem))
95 			unlink(c->mem->ptr);
96 	}
97 	if (c->file.refchg) {
98 		c->file.refchg(c->file.ref, -1);
99 		c->file.refchg = 0; /* NULL fn ptr */
100 		c->file.ref = NULL;
101 	}
102 	else if (c->file.fd != -1) {
103 		close(c->file.fd);
104 	}
105 	if (MAP_FAILED != c->file.mmap.start) {
106 		munmap(c->file.mmap.start, c->file.mmap.length);
107 		c->file.mmap.start = MAP_FAILED;
108 		c->file.mmap.length = c->file.mmap.offset = 0;
109 	}
110 	c->file.fd = -1;
111 	c->file.length = 0;
112 	c->type = MEM_CHUNK;
113 }
114 
chunk_reset(chunk * c)115 static void chunk_reset(chunk *c) {
116 	if (c->type == FILE_CHUNK) chunk_reset_file_chunk(c);
117 
118 	buffer_clear(c->mem);
119 	c->offset = 0;
120 }
121 
chunk_free(chunk * c)122 static void chunk_free(chunk *c) {
123 	if (c->type == FILE_CHUNK) chunk_reset_file_chunk(c);
124 	buffer_free(c->mem);
125 	free(c);
126 }
127 
chunk_pop_oversized(size_t sz)128 static chunk * chunk_pop_oversized(size_t sz) {
129     /* future: might have buckets of certain sizes, up to socket buf sizes */
130     if (chunks_oversized && chunks_oversized->mem->size >= sz) {
131         --chunks_oversized_n;
132         chunk *c = chunks_oversized;
133         chunks_oversized = c->next;
134         return c;
135     }
136     return NULL;
137 }
138 
chunk_push_oversized(chunk * const c,const size_t sz)139 static void chunk_push_oversized(chunk * const c, const size_t sz) {
140     /* XXX: chunk_buffer_yield() may have removed need for list size limit */
141     if (chunks_oversized_n < 64 && chunk_buf_sz >= 4096) {
142         ++chunks_oversized_n;
143         chunk **co = &chunks_oversized;
144         while (*co && sz < (*co)->mem->size) co = &(*co)->next;
145         c->next = *co;
146         *co = c;
147     }
148     else {
149         buffer * const tb = chunks_oversized ? chunks_oversized->mem : NULL;
150         if (tb && tb->size < sz) {
151             /* swap larger mem block onto head of list; free smaller mem */
152             chunks_oversized->mem = c->mem;
153             c->mem = tb;
154         }
155         chunk_free(c);
156     }
157 }
158 
159 __attribute_returns_nonnull__
chunk_buffer_acquire_sz(const size_t sz)160 static buffer * chunk_buffer_acquire_sz(const size_t sz) {
161     chunk *c;
162     buffer *b;
163     if (sz <= (chunk_buf_sz|1)) {
164         if (chunks) {
165             c = chunks;
166             chunks = c->next;
167         }
168         else
169             c = chunk_init_sz(chunk_buf_sz);
170     }
171     else {
172         c = chunk_pop_oversized(sz);
173         if (NULL == c) {
174             /*(round up to nearest chunk_buf_sz)*/
175             /* NB: round down power-2 + 1 to avoid excess allocation
176              * (sz & ~1uL) relies on buffer_realloc() adding +1 *and* on callers
177              * of this func never passing power-2 + 1 sz unless direct caller
178              * adds +1 for '\0', as is done in chunk_buffer_prepare_append() */
179             c = chunk_init_sz(((sz&~1uL)+(chunk_buf_sz-1)) & ~(chunk_buf_sz-1));
180         }
181     }
182     c->next = chunk_buffers;
183     chunk_buffers = c;
184     b = c->mem;
185     c->mem = NULL;
186     return b;
187 }
188 
chunk_buffer_acquire(void)189 buffer * chunk_buffer_acquire(void) {
190     return chunk_buffer_acquire_sz(chunk_buf_sz);
191 }
192 
chunk_buffer_release(buffer * b)193 void chunk_buffer_release(buffer *b) {
194     if (NULL == b) return;
195     if (chunk_buffers) {
196         chunk *c = chunk_buffers;
197         chunk_buffers = c->next;
198         c->mem = b;
199         buffer_clear(b);
200         if (b->size == (chunk_buf_sz|1)) {
201             c->next = chunks;
202             chunks = c;
203         }
204         else if (b->size > chunk_buf_sz)
205             chunk_push_oversized(c, b->size);
206         else
207             chunk_free(c);
208     }
209     else {
210         buffer_free(b);
211     }
212 }
213 
chunk_buffer_yield(buffer * b)214 void chunk_buffer_yield(buffer *b) {
215     if (b->size == (chunk_buf_sz|1)) return;
216 
217     buffer * const cb = chunk_buffer_acquire_sz(chunk_buf_sz);
218     buffer tb = *b;
219     *b = *cb;
220     *cb = tb;
221     chunk_buffer_release(cb);
222 }
223 
chunk_buffer_prepare_append(buffer * const b,size_t sz)224 size_t chunk_buffer_prepare_append(buffer * const b, size_t sz) {
225     if (sz > buffer_string_space(b)) {
226         sz += b->used ? b->used : 1;
227         buffer * const cb = chunk_buffer_acquire_sz(sz);
228         /* swap buffer contents and copy original b->ptr into larger b->ptr */
229         /*(this does more than buffer_move())*/
230         buffer tb = *b;
231         *b = *cb;
232         *cb = tb;
233         if ((b->used = tb.used))
234             memcpy(b->ptr, tb.ptr, tb.used);
235         chunk_buffer_release(cb);
236     }
237     return buffer_string_space(b);
238 }
239 
240 __attribute_returns_nonnull__
chunk_acquire(size_t sz)241 static chunk * chunk_acquire(size_t sz) {
242     if (sz <= (chunk_buf_sz|1)) {
243         if (chunks) {
244             chunk *c = chunks;
245             chunks = c->next;
246             return c;
247         }
248         sz = chunk_buf_sz;
249     }
250     else {
251         /*(round up to nearest chunk_buf_sz)*/
252         sz = (sz + (chunk_buf_sz-1)) & ~(chunk_buf_sz-1);
253         chunk *c = chunk_pop_oversized(sz);
254         if (c) return c;
255     }
256 
257     return chunk_init_sz(sz);
258 }
259 
chunk_release(chunk * c)260 static void chunk_release(chunk *c) {
261     const size_t sz = c->mem->size;
262     if (sz == (chunk_buf_sz|1)) {
263         chunk_reset(c);
264         c->next = chunks;
265         chunks = c;
266     }
267     else if (sz > chunk_buf_sz) {
268         chunk_reset(c);
269         chunk_push_oversized(c, sz);
270     }
271     else if (c->type == FILE_CHUNK) {
272         chunk_reset(c);
273         c->next = chunks_filechunk;
274         chunks_filechunk = c;
275     }
276     else {
277         chunk_free(c);
278     }
279 }
280 
281 __attribute_returns_nonnull__
chunk_acquire_filechunk(void)282 static chunk * chunk_acquire_filechunk(void) {
283     if (chunks_filechunk) {
284         chunk *c = chunks_filechunk;
285         chunks_filechunk = c->next;
286         return c;
287     }
288     return chunk_init();
289 }
290 
chunkqueue_chunk_pool_clear(void)291 void chunkqueue_chunk_pool_clear(void)
292 {
293     for (chunk *next, *c = chunks; c; c = next) {
294         next = c->next;
295         chunk_free(c);
296     }
297     chunks = NULL;
298     for (chunk *next, *c = chunks_oversized; c; c = next) {
299         next = c->next;
300         chunk_free(c);
301     }
302     chunks_oversized = NULL;
303     chunks_oversized_n = 0;
304     for (chunk *next, *c = chunks_filechunk; c; c = next) {
305         next = c->next;
306         chunk_free(c);
307     }
308     chunks_filechunk = NULL;
309 }
310 
chunkqueue_chunk_pool_free(void)311 void chunkqueue_chunk_pool_free(void)
312 {
313     chunkqueue_chunk_pool_clear();
314     for (chunk *next, *c = chunk_buffers; c; c = next) {
315         next = c->next;
316       #if 1 /*(chunk_buffers contains MEM_CHUNK with (c->mem == NULL))*/
317         free(c);
318       #else /*(c->mem = buffer_init() is no longer necessary below)*/
319         c->mem = buffer_init(); /*(chunk_reset() expects c->mem != NULL)*/
320         chunk_free(c);
321       #endif
322     }
323     chunk_buffers = NULL;
324 }
325 
326 __attribute_pure__
chunk_remaining_length(const chunk * c)327 static off_t chunk_remaining_length(const chunk *c) {
328     /* MEM_CHUNK or FILE_CHUNK */
329     return (c->type == MEM_CHUNK
330               ? (off_t)buffer_clen(c->mem)
331               : c->file.length)
332            - c->offset;
333 }
334 
chunkqueue_release_chunks(chunkqueue * cq)335 static void chunkqueue_release_chunks(chunkqueue *cq) {
336     cq->last = NULL;
337     for (chunk *c; (c = cq->first); ) {
338         cq->first = c->next;
339         chunk_release(c);
340     }
341 }
342 
chunkqueue_free(chunkqueue * cq)343 void chunkqueue_free(chunkqueue *cq) {
344     if (NULL == cq) return;
345     chunkqueue_release_chunks(cq);
346     free(cq);
347 }
348 
chunkqueue_prepend_chunk(chunkqueue * const restrict cq,chunk * const restrict c)349 static void chunkqueue_prepend_chunk(chunkqueue * const restrict cq, chunk * const restrict c) {
350     if (NULL == (c->next = cq->first)) cq->last = c;
351     cq->first = c;
352 }
353 
chunkqueue_append_chunk(chunkqueue * const restrict cq,chunk * const restrict c)354 static void chunkqueue_append_chunk(chunkqueue * const restrict cq, chunk * const restrict c) {
355     c->next = NULL;
356     *(cq->last ? &cq->last->next : &cq->first) = c;
357     cq->last = c;
358 }
359 
360 __attribute_returns_nonnull__
chunkqueue_prepend_mem_chunk(chunkqueue * cq,size_t sz)361 static chunk * chunkqueue_prepend_mem_chunk(chunkqueue *cq, size_t sz) {
362     chunk *c = chunk_acquire(sz);
363     chunkqueue_prepend_chunk(cq, c);
364     return c;
365 }
366 
367 __attribute_returns_nonnull__
chunkqueue_append_mem_chunk(chunkqueue * cq,size_t sz)368 static chunk * chunkqueue_append_mem_chunk(chunkqueue *cq, size_t sz) {
369     chunk *c = chunk_acquire(sz);
370     chunkqueue_append_chunk(cq, c);
371     return c;
372 }
373 
__attribute_nonnull__()374 __attribute_nonnull__()
375 __attribute_returns_nonnull__
376 static chunk * chunkqueue_append_file_chunk(chunkqueue * const restrict cq, const buffer * const restrict fn, off_t offset, off_t len) {
377     chunk * const c = chunk_acquire_filechunk();
378     chunkqueue_append_chunk(cq, c);
379     c->type = FILE_CHUNK;
380     c->offset = offset;
381     c->file.length = offset + len;
382     cq->bytes_in += len;
383     buffer_copy_buffer(c->mem, fn);
384     return c;
385 }
386 
chunkqueue_reset(chunkqueue * cq)387 void chunkqueue_reset(chunkqueue *cq) {
388     chunkqueue_release_chunks(cq);
389     cq->bytes_in = 0;
390     cq->bytes_out = 0;
391     cq->tempdir_idx = 0;
392 }
393 
chunkqueue_append_file_fd(chunkqueue * const restrict cq,const buffer * const restrict fn,int fd,off_t offset,off_t len)394 void chunkqueue_append_file_fd(chunkqueue * const restrict cq, const buffer * const restrict fn, int fd, off_t offset, off_t len) {
395     if (len > 0) {
396         (chunkqueue_append_file_chunk(cq, fn, offset, len))->file.fd = fd;
397     }
398     else {
399         close(fd);
400     }
401 }
402 
chunkqueue_append_file(chunkqueue * const restrict cq,const buffer * const restrict fn,off_t offset,off_t len)403 void chunkqueue_append_file(chunkqueue * const restrict cq, const buffer * const restrict fn, off_t offset, off_t len) {
404     if (len > 0) {
405         chunkqueue_append_file_chunk(cq, fn, offset, len);
406     }
407 }
408 
409 
chunkqueue_append_mem_extend_chunk(chunkqueue * const restrict cq,const char * const restrict mem,size_t len)410 static int chunkqueue_append_mem_extend_chunk(chunkqueue * const restrict cq, const char * const restrict mem, size_t len) {
411 	chunk *c = cq->last;
412 	if (0 == len) return 1;
413 	if (c != NULL && c->type == MEM_CHUNK
414 	    && buffer_string_space(c->mem) >= len) {
415 		buffer_append_string_len(c->mem, mem, len);
416 		cq->bytes_in += len;
417 		return 1;
418 	}
419 	return 0;
420 }
421 
422 
chunkqueue_append_buffer(chunkqueue * const restrict cq,buffer * const restrict mem)423 void chunkqueue_append_buffer(chunkqueue * const restrict cq, buffer * const restrict mem) {
424 	chunk *c;
425 	const size_t len = buffer_clen(mem);
426 	if (len < 1024 && chunkqueue_append_mem_extend_chunk(cq, mem->ptr, len)) {
427 		buffer_clear(mem);
428 		return;
429 	}
430 
431 	c = chunkqueue_append_mem_chunk(cq, chunk_buf_sz);
432 	cq->bytes_in += len;
433 	buffer_move(c->mem, mem);
434 }
435 
436 
chunkqueue_append_mem(chunkqueue * const restrict cq,const char * const restrict mem,size_t len)437 void chunkqueue_append_mem(chunkqueue * const restrict cq, const char * const restrict mem, size_t len) {
438 	chunk *c;
439 	if (len < chunk_buf_sz && chunkqueue_append_mem_extend_chunk(cq, mem, len))
440 		return;
441 
442 	c = chunkqueue_append_mem_chunk(cq, len+1);
443 	cq->bytes_in += len;
444 	buffer_copy_string_len(c->mem, mem, len);
445 }
446 
447 
chunkqueue_append_mem_min(chunkqueue * const restrict cq,const char * const restrict mem,size_t len)448 void chunkqueue_append_mem_min(chunkqueue * const restrict cq, const char * const restrict mem, size_t len) {
449 	chunk *c;
450 	if (len < chunk_buf_sz && chunkqueue_append_mem_extend_chunk(cq, mem, len))
451 		return;
452 
453 	c = chunk_init_sz(len+1);
454 	chunkqueue_append_chunk(cq, c);
455 	cq->bytes_in += len;
456 	buffer_copy_string_len(c->mem, mem, len);
457 }
458 
459 
chunkqueue_append_chunkqueue(chunkqueue * const restrict cq,chunkqueue * const restrict src)460 void chunkqueue_append_chunkqueue(chunkqueue * const restrict cq, chunkqueue * const restrict src) {
461 	if (NULL == src->first) return;
462 
463 	if (NULL == cq->first) {
464 		cq->first = src->first;
465 	} else {
466 		cq->last->next = src->first;
467 	}
468 	cq->last = src->last;
469 	cq->bytes_in += chunkqueue_length(src);
470 
471 	src->first = NULL;
472 	src->last = NULL;
473 	src->bytes_out = src->bytes_in;
474 }
475 
476 
chunkqueue_prepend_buffer_open_sz(chunkqueue * cq,size_t sz)477 buffer * chunkqueue_prepend_buffer_open_sz(chunkqueue *cq, size_t sz) {
478 	chunk * const c = chunkqueue_prepend_mem_chunk(cq, sz);
479 	return c->mem;
480 }
481 
482 
chunkqueue_prepend_buffer_open(chunkqueue * cq)483 buffer * chunkqueue_prepend_buffer_open(chunkqueue *cq) {
484 	return chunkqueue_prepend_buffer_open_sz(cq, chunk_buf_sz);
485 }
486 
487 
chunkqueue_prepend_buffer_commit(chunkqueue * cq)488 void chunkqueue_prepend_buffer_commit(chunkqueue *cq) {
489 	cq->bytes_in += buffer_clen(cq->first->mem);
490 }
491 
492 
chunkqueue_append_buffer_open_sz(chunkqueue * cq,size_t sz)493 buffer * chunkqueue_append_buffer_open_sz(chunkqueue *cq, size_t sz) {
494 	chunk * const c = chunkqueue_append_mem_chunk(cq, sz);
495 	return c->mem;
496 }
497 
498 
chunkqueue_append_buffer_open(chunkqueue * cq)499 buffer * chunkqueue_append_buffer_open(chunkqueue *cq) {
500 	return chunkqueue_append_buffer_open_sz(cq, chunk_buf_sz);
501 }
502 
503 
chunkqueue_append_buffer_commit(chunkqueue * cq)504 void chunkqueue_append_buffer_commit(chunkqueue *cq) {
505 	cq->bytes_in += buffer_clen(cq->last->mem);
506 }
507 
508 
chunkqueue_get_memory(chunkqueue * const restrict cq,size_t * const restrict len)509 char * chunkqueue_get_memory(chunkqueue * const restrict cq, size_t * const restrict len) {
510 	size_t sz = *len ? *len : (chunk_buf_sz >> 1);
511 	buffer *b;
512 	chunk *c = cq->last;
513 	if (NULL != c && MEM_CHUNK == c->type) {
514 		/* return pointer into existing buffer if large enough */
515 		size_t avail = buffer_string_space(c->mem);
516 		if (avail >= sz) {
517 			*len = avail;
518 			b = c->mem;
519 			return b->ptr + buffer_clen(b);
520 		}
521 	}
522 
523 	/* allocate new chunk */
524 	b = chunkqueue_append_buffer_open_sz(cq, sz);
525 	*len = buffer_string_space(b);
526 	return b->ptr;
527 }
528 
chunkqueue_use_memory(chunkqueue * const restrict cq,chunk * ckpt,size_t len)529 void chunkqueue_use_memory(chunkqueue * const restrict cq, chunk *ckpt, size_t len) {
530     buffer *b = cq->last->mem;
531 
532     if (len > 0) {
533         buffer_commit(b, len);
534         cq->bytes_in += len;
535         if (cq->last == ckpt || NULL == ckpt || MEM_CHUNK != ckpt->type
536             || len > buffer_string_space(ckpt->mem)) return;
537 
538         buffer_append_string_buffer(ckpt->mem, b);
539     }
540     else if (!buffer_is_blank(b)) { /*(cq->last == ckpt)*/
541         return; /* last chunk is not empty */
542     }
543 
544     /* remove empty last chunk */
545     chunk_release(cq->last);
546     cq->last = ckpt;
547     *(ckpt ? &ckpt->next : &cq->first) = NULL;
548 }
549 
chunkqueue_update_file(chunkqueue * const restrict cq,chunk * c,off_t len)550 void chunkqueue_update_file(chunkqueue * const restrict cq, chunk *c, off_t len) {
551     /*assert(c->type == FILE_CHUNK);*/
552     c->file.length += len;
553     cq->bytes_in += len;
554     if (0 == chunk_remaining_length(c))
555         chunkqueue_remove_empty_chunks(cq);
556 }
557 
chunkqueue_set_tempdirs_default(const array * tempdirs,off_t upload_temp_file_size)558 void chunkqueue_set_tempdirs_default (const array *tempdirs, off_t upload_temp_file_size) {
559     if (upload_temp_file_size == 0)
560         upload_temp_file_size = DEFAULT_TEMPFILE_SIZE;
561     chunkqueue_default_tempdirs = tempdirs;
562     chunkqueue_default_tempfile_size = upload_temp_file_size;
563 }
564 
chunkqueue_set_tempdirs(chunkqueue * const restrict cq,const array * const restrict tempdirs,off_t upload_temp_file_size)565 void chunkqueue_set_tempdirs(chunkqueue * const restrict cq, const array * const restrict tempdirs, off_t upload_temp_file_size) {
566     if (upload_temp_file_size == 0)
567         upload_temp_file_size = chunkqueue_default_tempfile_size;
568     cq->tempdirs = tempdirs;
569     cq->upload_temp_file_size = upload_temp_file_size;
570     cq->tempdir_idx = 0;
571 }
572 
573 __attribute_noinline__
chunkqueue_dup_file_chunk_fd(chunk * const restrict d,const chunk * const restrict c)574 static void chunkqueue_dup_file_chunk_fd (chunk * const restrict d, const chunk * const restrict c) {
575     /*assert(d != c);*/
576     /*assert(d->type == FILE_CHUNK);*/
577     /*assert(c->type == FILE_CHUNK);*/
578     if (c->file.fd >= 0) {
579         if (c->file.refchg) {
580             d->file.fd = c->file.fd;
581             d->file.ref = c->file.ref;
582             d->file.refchg = c->file.refchg;
583             d->file.refchg(d->file.ref, 1);
584         }
585         else
586             d->file.fd = fdevent_dup_cloexec(c->file.fd);
587     }
588 }
589 
590 __attribute_noinline__
chunkqueue_steal_partial_file_chunk(chunkqueue * const restrict dest,const chunk * const restrict c,const off_t len)591 static void chunkqueue_steal_partial_file_chunk(chunkqueue * const restrict dest, const chunk * const restrict c, const off_t len) {
592     chunkqueue_append_file(dest, c->mem, c->offset, len);
593     chunkqueue_dup_file_chunk_fd(dest->last, c);
594 }
595 
chunkqueue_steal(chunkqueue * const restrict dest,chunkqueue * const restrict src,off_t len)596 void chunkqueue_steal(chunkqueue * const restrict dest, chunkqueue * const restrict src, off_t len) {
597 	for (off_t clen; len > 0; len -= clen) {
598 		chunk * const c = src->first;
599 		if (__builtin_expect( (NULL == c), 0)) break;
600 
601 		clen = chunk_remaining_length(c);
602 
603 		if (len >= clen) {
604 			/* move complete chunk */
605 			src->first = c->next;
606 			if (c == src->last) src->last = NULL;
607 
608 			if (__builtin_expect( (0 != clen), 1)) {
609 				chunkqueue_append_chunk(dest, c);
610 				dest->bytes_in += clen;
611 			}
612 			else /* drop empty chunk */
613 				chunk_release(c);
614 		} else {
615 			/* copy partial chunk */
616 
617 			switch (c->type) {
618 			case MEM_CHUNK:
619 				chunkqueue_append_mem(dest, c->mem->ptr + c->offset, len);
620 				break;
621 			case FILE_CHUNK:
622 				/* tempfile flag is in "last" chunk after the split */
623 				chunkqueue_steal_partial_file_chunk(dest, c, len);
624 				break;
625 			}
626 
627 			c->offset += len;
628 			clen = len;
629 		}
630 
631 		src->bytes_out += clen;
632 	}
633 }
634 
chunkqueue_get_append_mkstemp(buffer * const b,const char * path,const uint32_t len)635 static int chunkqueue_get_append_mkstemp(buffer * const b, const char *path, const uint32_t len) {
636     buffer_copy_path_len2(b,path,len,CONST_STR_LEN("lighttpd-upload-XXXXXX"));
637   #if defined(HAVE_SPLICE) && defined(HAVE_PWRITE)
638     /*(splice() rejects O_APPEND target; omit flag if also using pwrite())*/
639     return fdevent_mkostemp(b->ptr, 0);
640   #else
641     return fdevent_mkostemp(b->ptr, O_APPEND);
642   #endif
643 }
644 
chunkqueue_get_append_newtempfile(chunkqueue * const restrict cq,log_error_st * const restrict errh)645 static chunk *chunkqueue_get_append_newtempfile(chunkqueue * const restrict cq, log_error_st * const restrict errh) {
646     static const buffer emptyb = { "", 0, 0 };
647     chunk * const restrict last = cq->last;
648     chunk * const restrict c = chunkqueue_append_file_chunk(cq, &emptyb, 0, 0);
649     buffer * const restrict template = c->mem;
650     c->file.is_temp = 1;
651 
652     if (cq->tempdirs && cq->tempdirs->used) {
653         /* we have several tempdirs, only if all of them fail we jump out */
654         for (errno = EIO; cq->tempdir_idx < cq->tempdirs->used; ++cq->tempdir_idx) {
655             data_string *ds = (data_string *)cq->tempdirs->data[cq->tempdir_idx];
656             c->file.fd =
657               chunkqueue_get_append_mkstemp(template, BUF_PTR_LEN(&ds->value));
658             if (-1 != c->file.fd) return c;
659         }
660     }
661     else {
662         c->file.fd =
663           chunkqueue_get_append_mkstemp(template, CONST_STR_LEN("/var/tmp"));
664         if (-1 != c->file.fd) return c;
665     }
666 
667     /* (report only last error to mkstemp() even if multiple temp dirs tried) */
668     log_perror(errh, __FILE__, __LINE__,
669       "opening temp-file failed: %s", template->ptr);
670     /* remove (failed) final chunk */
671     c->file.is_temp = 0;
672     if ((cq->last = last))
673         last->next = NULL;
674     else
675         cq->first = NULL;
676     chunk_release(c);
677     return NULL;
678 }
679 
chunkqueue_get_append_tempfile(chunkqueue * const restrict cq,log_error_st * const restrict errh)680 static chunk *chunkqueue_get_append_tempfile(chunkqueue * const restrict cq, log_error_st * const restrict errh) {
681     /*
682      * if the last chunk is
683      * - smaller than cq->upload_temp_file_size
684      * -> append to it (and it then might exceed cq->upload_temp_file_size)
685      * otherwise
686      * -> create a new chunk
687      */
688 
689     chunk * const c = cq->last;
690     if (NULL != c && c->file.is_temp && c->file.fd >= 0) {
691 
692         if (c->file.length < (off_t)cq->upload_temp_file_size)
693             return c; /* ok, take the last chunk for our job */
694 
695         /* the chunk is too large now, close it */
696         force_assert(0 == c->file.refchg); /*(else should not happen)*/
697         int rc = close(c->file.fd);
698         c->file.fd = -1;
699         if (0 != rc) {
700             log_perror(errh, __FILE__, __LINE__,
701               "close() temp-file %s failed", c->mem->ptr);
702             return NULL;
703         }
704     }
705     return chunkqueue_get_append_newtempfile(cq, errh);
706 }
707 
708 __attribute_cold__
chunkqueue_append_tempfile_err(chunkqueue * const cq,log_error_st * const restrict errh,chunk * const c)709 static int chunkqueue_append_tempfile_err(chunkqueue * const cq, log_error_st * const restrict errh, chunk * const c) {
710     const int errnum = errno;
711     if (errnum == EINTR) return 1; /* retry */
712 
713     int retry = (errnum == ENOSPC && cq->tempdirs
714                  && ++cq->tempdir_idx < cq->tempdirs->used);
715     if (!retry)
716         log_perror(errh, __FILE__, __LINE__,
717           "write() temp-file %s failed", c->mem->ptr);
718 
719     if (0 == chunk_remaining_length(c)) {
720         /*(remove empty chunk and unlink tempfile)*/
721         chunkqueue_remove_empty_chunks(cq);
722     }
723     else {/*(close tempfile; avoid later attempts to append)*/
724         force_assert(0 == c->file.refchg); /*(else should not happen)*/
725         int rc = close(c->file.fd);
726         c->file.fd = -1;
727         if (0 != rc) {
728             log_perror(errh, __FILE__, __LINE__,
729               "close() temp-file %s failed", c->mem->ptr);
730             retry = 0;
731         }
732     }
733     return retry;
734 }
735 
736 __attribute_cold__
737 __attribute_noinline__
chunkqueue_to_tempfiles(chunkqueue * const restrict dest,log_error_st * const restrict errh)738 static int chunkqueue_to_tempfiles(chunkqueue * const restrict dest, log_error_st * const restrict errh) {
739     /* transfer chunks from dest to src, adjust dest->bytes_in, and then call
740      * chunkqueue_steal_with_tempfiles() to write chunks from src back into
741      * dest, but into tempfiles.   chunkqueue_steal_with_tempfiles() calls back
742      * into chunkqueue_append_mem_to_tempfile(), but will not re-enter this func
743      * since chunks moved to src, and dest made empty before recursive call */
744     const off_t cqlen = chunkqueue_length(dest);
745     chunkqueue src = *dest; /*(copy struct)*/
746     dest->first = dest->last = NULL;
747     dest->bytes_in -= cqlen;
748     if (0 == chunkqueue_steal_with_tempfiles(dest, &src, cqlen, errh))
749         return 0;
750     else {
751         const int errnum = errno;
752         chunkqueue_release_chunks(&src);
753         return -errnum;
754     }
755 }
756 
chunkqueue_append_mem_to_tempfile(chunkqueue * const restrict dest,const char * restrict mem,size_t len,log_error_st * const restrict errh)757 int chunkqueue_append_mem_to_tempfile(chunkqueue * const restrict dest, const char * restrict mem, size_t len, log_error_st * const restrict errh) {
758 	chunk *dst_c = dest->first;
759 
760 	/* check if prior MEM_CHUNK(s) exist and write to tempfile
761 	 * (check first chunk only, since if we are using tempfiles, then
762 	 *  we expect further chunks to be tempfiles after starting tempfiles)*/
763 	if (dst_c && dst_c->type == MEM_CHUNK
764 	    && 0 != chunkqueue_to_tempfiles(dest, errh)) {
765 		return -1;
766 	}
767 
768 	do {
769 		/*(aside: arg len is permitted to be 0 and creates tempfile as a
770 		 * side effect.  This is used by mod_ssi for ssi exec, as the func
771 		 * chunkqueue_get_append_tempfile() is not public.  The result is
772 		 * an empty chunk at the end of the chunkqueue, which typically
773 		 * should be avoided)*/
774 		dst_c = chunkqueue_get_append_tempfile(dest, errh);
775 		if (NULL == dst_c)
776 			return -1;
777 	      #ifdef __COVERITY__
778 		if (dst_c->file.fd < 0) return -1;
779 	      #endif
780 	      #ifdef HAVE_PWRITE
781 		/* coverity[negative_returns : FALSE] */
782 		const ssize_t written =pwrite(dst_c->file.fd, mem, len, dst_c->file.length);
783 	      #else
784 		/* coverity[negative_returns : FALSE] */
785 		const ssize_t written = write(dst_c->file.fd, mem, len);
786 	      #endif
787 
788 		if ((size_t) written == len) {
789 			dst_c->file.length += len;
790 			dest->bytes_in += len;
791 			return 0;
792 		} else if (written >= 0) {
793 			/*(assume EINTR if partial write and retry write();
794 			 * retry write() might fail with ENOSPC if no more space on volume)*/
795 			dest->bytes_in += written;
796 			mem += written;
797 			len -= (size_t)written;
798 			dst_c->file.length += (size_t)written;
799 			/* continue; retry */
800 		} else if (!chunkqueue_append_tempfile_err(dest, errh, dst_c)) {
801 			break; /* return -1; */
802 		} /* else continue; retry */
803 	} while (len);
804 
805 	return -1;
806 }
807 
808 #ifdef HAVE_PWRITEV
809 
810 #ifdef HAVE_SYS_UIO_H
811 #include <sys/uio.h>
812 #endif
813 
814 __attribute_cold__
815 __attribute_noinline__
chunkqueue_append_cqmem_to_tempfile_partial(chunkqueue * const dest,chunk * const c,ssize_t wr,log_error_st * const restrict errh)816 static ssize_t chunkqueue_append_cqmem_to_tempfile_partial(chunkqueue * const dest, chunk * const c, ssize_t wr, log_error_st * const restrict errh) {
817     /* recover from partial write of existing dest MEM_CHUNK to tempfile */
818     chunk *ckpt = dest->first;
819     while (ckpt->next != c) ckpt = ckpt->next;
820     ckpt->next = NULL;
821     dest->last = ckpt;
822     dest->bytes_in  -= wr; /*(avoid double count in dest cq)*/
823     dest->bytes_out -= wr;
824     chunkqueue_mark_written(dest, wr);/*(remove MEM_CHUNK written to tempfile)*/
825 
826     c->next = dest->first; /*(place tempfile at beginning of dest cq)*/
827     dest->first = c;
828     return (0 == chunkqueue_to_tempfiles(dest, errh)) ? 0 : -1;
829 }
830 
chunkqueue_append_cqmem_to_tempfile(chunkqueue * const restrict dest,chunkqueue * const restrict src,off_t len,log_error_st * const restrict errh)831 static ssize_t chunkqueue_append_cqmem_to_tempfile(chunkqueue * const restrict dest, chunkqueue * const restrict src, off_t len, log_error_st * const restrict errh) {
832     /* write multiple MEM_CHUNKs to tempfile in single pwritev() syscall */
833     /*(could lseek() and writev() if pwritev() is not available,
834      * but if writev() is available, pwritev() is likely available,
835      * e.g. any modern Linux or *BSD, and possibly anything not Windows)*/
836     unsigned int iovcnt = 0;
837     struct iovec iov[16];
838 
839     off_t dlen = 0;
840     chunk *c;
841     for (c = dest->first; c && c->type == MEM_CHUNK; c = c->next) {
842         const off_t clen = chunk_remaining_length(c);
843         iov[iovcnt].iov_base = c->mem->ptr + c->offset;
844         iov[iovcnt].iov_len  = (size_t)clen;
845         dlen += clen;
846         ++iovcnt;
847         if (__builtin_expect( (iovcnt == sizeof(iov)/sizeof(*iov)), 0))
848             break; /*(not expecting large number of MEM_CHUNK)*/
849     }
850     if (__builtin_expect( (c != NULL), 0) && dest->first->type == MEM_CHUNK) {
851         /*(expecting only MEM_CHUNK if dest cq starts w/ MEM_CHUNK)*/
852         /*(use less efficient fallback if that assumption does not hold true)*/
853         if (0 != chunkqueue_to_tempfiles(dest, errh))
854             return -1;
855         dlen = 0;
856         iovcnt = 0;
857     }
858 
859     if (__builtin_expect( (iovcnt < sizeof(iov)/sizeof(*iov)), 1)) {
860         for (c = src->first; c && c->type == MEM_CHUNK; c = c->next) {
861             off_t clen = chunk_remaining_length(c);
862             if (clen > len) clen = len;
863             iov[iovcnt].iov_base = c->mem->ptr + c->offset;
864             iov[iovcnt].iov_len  = (size_t)clen;
865             len -= clen;
866             ++iovcnt;
867             if (0 == len) break;
868             if (__builtin_expect( (iovcnt == sizeof(iov)/sizeof(*iov)), 0))
869                 break; /*(not expecting large number of MEM_CHUNK)*/
870         }
871     }
872 
873     if (__builtin_expect( (0 == iovcnt), 0)) return 0; /*(should not happen)*/
874 
875     c = chunkqueue_get_append_tempfile(dest, errh);
876     if (NULL == c)
877         return -1;
878   #ifdef __COVERITY__
879     if (c->file.fd < 0) return -1;
880   #endif
881     /* coverity[negative_returns : FALSE] */
882     ssize_t wr = pwritev(c->file.fd, iov, (int)iovcnt, c->file.length);
883 
884     /*(memory use in chunkqueues is expected to be limited before spilling
885      * to tempfiles, so common case will write entire iovec to tempfile,
886      * and we return amount written *from src cq*, even if partial write;
887      * (not looping here to retry writing more, but caller might loop))*/
888 
889     if (wr >= 0) {
890         c->file.length += wr;
891         dest->bytes_in += wr;
892         if (dlen) {
893             if (__builtin_expect( (wr < dlen), 0))
894                 return
895                   chunkqueue_append_cqmem_to_tempfile_partial(dest,c,wr,errh);
896             wr -= (ssize_t)dlen;
897             dest->bytes_in  -= dlen; /*(avoid double count in dest cq)*/
898             dest->bytes_out -= dlen;
899             chunkqueue_mark_written(dest, dlen);
900         }
901     }
902 
903     return wr;
904 }
905 
906 #endif /* HAVE_PWRITEV */
907 
908 #ifdef HAVE_SPLICE
909 
910 __attribute_cold__
911 __attribute_noinline__
chunkqueue_append_drain_pipe_tempfile(chunkqueue * const restrict cq,const int fd,unsigned int len,log_error_st * const restrict errh)912 static ssize_t chunkqueue_append_drain_pipe_tempfile(chunkqueue * const restrict cq, const int fd, unsigned int len, log_error_st * const restrict errh) {
913     /* attempt to drain full 'len' from pipe
914      * (even if len not reduced to opts->max_per_read limit)
915      * since data may have already been moved from socket to pipe
916      *(returns 0 on success, or -errno (negative errno) if error,
917      * even if partial write occurred)*/
918     char buf[16384];
919     ssize_t rd;
920     do {
921         do {
922             rd = read(fd, buf, sizeof(buf));
923         } while (rd < 0 && errno == EINTR);
924         if (rd < 0) break;
925         if (0 != chunkqueue_append_mem_to_tempfile(cq, buf, (size_t)rd, errh))
926             break;
927     } while ((len -= (unsigned int)rd));
928 
929     if (0 == len)
930         return 0;
931     else {
932         const int errnum = errno;
933         if (cq->last && 0 == chunk_remaining_length(cq->last)) {
934             /*(remove empty chunk and unlink tempfile)*/
935             chunkqueue_remove_empty_chunks(cq);
936         }
937         return -errnum;
938     }
939 }
940 
chunkqueue_append_splice_pipe_tempfile(chunkqueue * const restrict cq,const int fd,unsigned int len,log_error_st * const restrict errh)941 ssize_t chunkqueue_append_splice_pipe_tempfile(chunkqueue * const restrict cq, const int fd, unsigned int len, log_error_st * const restrict errh) {
942     /* check if prior MEM_CHUNK(s) exist and write to tempfile
943      * (check first chunk only, since if we are using tempfiles, then
944      *  we expect further chunks to be tempfiles after starting tempfiles)*/
945     if (cq->first && cq->first->type == MEM_CHUNK) {
946         int rc = chunkqueue_to_tempfiles(cq, errh);
947         if (__builtin_expect( (0 != rc), 0)) return rc;
948     }
949 
950     /*(returns num bytes written, or -errno (negative errno) if error)*/
951     ssize_t total = 0;
952     do {
953         chunk * const c = chunkqueue_get_append_tempfile(cq, errh);
954         if (__builtin_expect( (NULL == c), 0)) return -errno;
955 
956         loff_t off = c->file.length;
957         ssize_t wr = splice(fd, NULL, c->file.fd, &off, len,
958                             SPLICE_F_MOVE | SPLICE_F_NONBLOCK);
959 
960         if (__builtin_expect(((size_t)wr == len), 1)) {
961             c->file.length += len;
962             cq->bytes_in += len;
963             return total + len;
964         }
965         else if (wr >= 0) {
966             /*(assume EINTR if partial write and retry;
967              * retry might fail with ENOSPC if no more space on volume)*/
968             cq->bytes_in += wr;
969             total += wr;
970             len -= (size_t)wr;
971             c->file.length += (size_t)wr;
972             /* continue; retry */
973         }
974         else {
975             const int errnum = errno;
976             switch (errnum) {
977               case EAGAIN:
978              #ifdef EWOULDBLOCK
979              #if EWOULDBLOCK != EAGAIN
980               case EWOULDBLOCK:
981              #endif
982              #endif
983                 if (0 == chunk_remaining_length(c)) {
984                     /*(remove empty chunk and unlink tempfile)*/
985                     chunkqueue_remove_empty_chunks(cq);
986                 }
987                 return total;
988               case EINVAL: /*(assume total == 0 if EINVAL)*/
989                 wr = chunkqueue_append_drain_pipe_tempfile(cq, fd, len, errh);
990                 return (0 == wr) ? total + (ssize_t)len : wr;
991               default:
992                 if (!chunkqueue_append_tempfile_err(cq, errh, c))
993                     return -errnum;
994                 break; /* else continue; retry */
995             }
996         }
997     } while (len);
998     return -EIO; /*(not reached)*/
999 }
1000 
1001 static int cqpipes[2] = { -1, -1 };
1002 
1003 __attribute_cold__
1004 __attribute_noinline__
chunkqueue_internal_pipes(int init)1005 void chunkqueue_internal_pipes(int init) {
1006     /*(intended for internal use within a single lighttpd process;
1007      * must be initialized after fork() and graceful-restart to avoid
1008      * sharing pipes between processes)*/
1009     if (-1 != cqpipes[0]) { close(cqpipes[0]); cqpipes[0] = -1; }
1010     if (-1 != cqpipes[1]) { close(cqpipes[1]); cqpipes[1] = -1; }
1011     if (init)
1012         if (0 != fdevent_pipe_cloexec(cqpipes, 262144)) { } /*(ignore error)*/
1013 }
1014 
1015 __attribute_cold__
1016 __attribute_noinline__
chunkqueue_pipe_read_discard(void)1017 static void chunkqueue_pipe_read_discard (void) {
1018     char buf[16384];
1019     ssize_t rd;
1020     do {
1021         rd = read(cqpipes[0], buf, sizeof(buf));
1022     } while (rd > 0 || (rd < 0 && errno == EINTR));
1023     if (rd < 0
1024       #ifdef EWOULDBLOCK
1025       #if EWOULDBLOCK != EAGAIN
1026         && errno != EWOULDBLOCK
1027       #endif
1028       #endif
1029         && errno != EAGAIN) {
1030         chunkqueue_internal_pipes(1); /*(close() and re-initialize)*/
1031     }
1032 }
1033 
chunkqueue_append_splice_sock_tempfile(chunkqueue * const restrict cq,const int fd,unsigned int len,log_error_st * const restrict errh)1034 ssize_t chunkqueue_append_splice_sock_tempfile(chunkqueue * const restrict cq, const int fd, unsigned int len, log_error_st * const restrict errh) {
1035     /*(returns num bytes written, or -errno (negative errno) if error)*/
1036     int * const pipes = cqpipes;
1037     if (-1 == pipes[1])
1038         return -EINVAL; /*(not configured; not handled here)*/
1039 
1040     /* splice() socket data to intermediate pipe */
1041     ssize_t wr = splice(fd, NULL, pipes[1], NULL, len,
1042                         SPLICE_F_MOVE | SPLICE_F_NONBLOCK);
1043     if (__builtin_expect( (wr <= 0), 0))
1044         return -EINVAL; /*(reuse to indicate not handled here)*/
1045     len = (unsigned int)wr;
1046 
1047     /* splice() data from intermediate pipe to tempfile */
1048     wr = chunkqueue_append_splice_pipe_tempfile(cq, pipes[0], len, errh);
1049     if (wr < 0) /* expect (wr == (ssize_t)len) or (wr == -1) */
1050         chunkqueue_pipe_read_discard();/* discard data from intermediate pipe */
1051     return wr;
1052 }
1053 
1054 #endif /* HAVE_SPLICE */
1055 
chunkqueue_steal_with_tempfiles(chunkqueue * const restrict dest,chunkqueue * const restrict src,off_t len,log_error_st * const restrict errh)1056 int chunkqueue_steal_with_tempfiles(chunkqueue * const restrict dest, chunkqueue * const restrict src, off_t len, log_error_st * const restrict errh) {
1057 	for (off_t clen; len > 0; len -= clen) {
1058 		chunk * const c = src->first;
1059 		if (__builtin_expect( (NULL == c), 0)) break;
1060 
1061 	  #ifdef HAVE_PWRITEV
1062 
1063 		if (c->type == MEM_CHUNK) {
1064 			clen = chunkqueue_append_cqmem_to_tempfile(dest, src, len, errh);
1065 			if (__builtin_expect( (clen < 0), 0)) return -1;
1066 			chunkqueue_mark_written(src, clen); /*(updates src->bytes_out)*/
1067 		}
1068 		else { /* (c->type == FILE_CHUNK) */
1069 			clen = chunk_remaining_length(c);
1070 			if (len < clen) clen = len;
1071 			chunkqueue_steal(dest, src, clen);/*(same as below for FILE_CHUNK)*/
1072 		}
1073 
1074 	  #else
1075 
1076 		clen = chunk_remaining_length(c);
1077 		if (__builtin_expect( (0 == clen), 0)) {
1078 			/* drop empty chunk */
1079 			src->first = c->next;
1080 			if (c == src->last) src->last = NULL;
1081 			chunk_release(c);
1082 			continue;
1083 		}
1084 
1085 		switch (c->type) {
1086 		case FILE_CHUNK:
1087 			if (len >= clen) {
1088 				/* move complete chunk */
1089 				src->first = c->next;
1090 				if (c == src->last) src->last = NULL;
1091 				chunkqueue_append_chunk(dest, c);
1092 				dest->bytes_in += clen;
1093 			} else {
1094 				/* copy partial chunk */
1095 				/* tempfile flag is in "last" chunk after the split */
1096 				chunkqueue_steal_partial_file_chunk(dest, c, len);
1097 				c->offset += len;
1098 				clen = len;
1099 			}
1100 			break;
1101 
1102 		case MEM_CHUNK:
1103 			/* store bytes from memory chunk in tempfile */
1104 			if (0 != chunkqueue_append_mem_to_tempfile(dest, c->mem->ptr + c->offset,
1105 			                                           (len >= clen) ? clen : len, errh)) {
1106 				return -1;
1107 			}
1108 
1109 			if (len >= clen) {
1110 				/* finished chunk */
1111 				src->first = c->next;
1112 				if (c == src->last) src->last = NULL;
1113 				chunk_release(c);
1114 			} else {
1115 				/* partial chunk */
1116 				c->offset += len;
1117 				clen = len;
1118 			}
1119 			break;
1120 		}
1121 
1122 		src->bytes_out += clen;
1123 
1124 	  #endif
1125 	}
1126 
1127 	return 0;
1128 }
1129 
chunkqueue_append_cq_range(chunkqueue * const dst,const chunkqueue * const src,off_t offset,off_t len)1130 void chunkqueue_append_cq_range (chunkqueue * const dst, const chunkqueue * const src, off_t offset, off_t len) {
1131     /* similar to chunkqueue_steal() but copy and append src range to dst cq */
1132     /* (dst cq and src cq can be the same cq, so neither is marked restrict) */
1133 
1134     /* copy and append range len from src to dst */
1135     for (const chunk *c = src->first; len > 0 && c != NULL; c = c->next) {
1136         /* scan into src to range offset (also skips empty chunks) */
1137         off_t clen = chunk_remaining_length(c);
1138         if (offset >= clen) {
1139             offset -= clen;
1140             continue;
1141         }
1142         clen -= offset;
1143         if (len < clen) clen = len;
1144         len -= clen;
1145 
1146         if (c->type == FILE_CHUNK) {
1147             chunkqueue_append_file(dst, c->mem, c->offset + offset, clen);
1148             chunkqueue_dup_file_chunk_fd(dst->last, c);
1149         }
1150         else { /*(c->type == MEM_CHUNK)*/
1151             /*(string refs would reduce copying,
1152              * but this path is not expected to be hot)*/
1153             chunkqueue_append_mem(dst, c->mem->ptr + c->offset + offset, clen);
1154         }
1155         offset = 0;
1156     }
1157 }
1158 
chunkqueue_mark_written(chunkqueue * cq,off_t len)1159 void chunkqueue_mark_written(chunkqueue *cq, off_t len) {
1160     cq->bytes_out += len;
1161 
1162     for (chunk *c = cq->first; c; ) {
1163         off_t c_len = chunk_remaining_length(c);
1164         if (len >= c_len) { /* chunk got finished */
1165             chunk * const x = c;
1166             c = c->next;
1167             len -= c_len;
1168             chunk_release(x);
1169         }
1170         else { /* partial chunk */
1171             c->offset += len;
1172             cq->first = c;
1173             return; /* chunk not finished */
1174         }
1175     }
1176     cq->first = cq->last = NULL;
1177 }
1178 
chunkqueue_remove_finished_chunks(chunkqueue * cq)1179 void chunkqueue_remove_finished_chunks(chunkqueue *cq) {
1180     for (chunk *c; (c = cq->first) && 0 == chunk_remaining_length(c); ){
1181         if (NULL == (cq->first = c->next)) cq->last = NULL;
1182         chunk_release(c);
1183     }
1184 }
1185 
chunkqueue_remove_empty_chunks(chunkqueue * cq)1186 void chunkqueue_remove_empty_chunks(chunkqueue *cq) {
1187 	chunk *c;
1188 	chunkqueue_remove_finished_chunks(cq);
1189 
1190 	for (c = cq->first; c && c->next; c = c->next) {
1191 		if (0 == chunk_remaining_length(c->next)) {
1192 			chunk *empty = c->next;
1193 			c->next = empty->next;
1194 			if (empty == cq->last) cq->last = c;
1195 			chunk_release(empty);
1196 		}
1197 	}
1198 }
1199 
chunkqueue_compact_mem_offset(chunkqueue * const cq)1200 void chunkqueue_compact_mem_offset(chunkqueue * const cq) {
1201     chunk * const restrict c = cq->first;
1202     if (0 == c->offset) return;
1203     if (c->type != MEM_CHUNK) return; /*(should not happen)*/
1204 
1205     buffer * const restrict b = c->mem;
1206     size_t len = buffer_clen(b) - c->offset;
1207     memmove(b->ptr, b->ptr+c->offset, len);
1208     c->offset = 0;
1209     buffer_truncate(b, len);
1210 }
1211 
chunkqueue_compact_mem(chunkqueue * cq,size_t clen)1212 void chunkqueue_compact_mem(chunkqueue *cq, size_t clen) {
1213     /* caller must guarantee that chunks in chunkqueue are MEM_CHUNK,
1214      * which is currently always true when reading input from client */
1215     chunk *c = cq->first;
1216     buffer *b = c->mem;
1217     size_t len = buffer_clen(b) - c->offset;
1218     if (len >= clen) return;
1219     if (b->size > clen) {
1220         if (buffer_string_space(b) < clen - len)
1221             chunkqueue_compact_mem_offset(cq);
1222     }
1223     else {
1224         b = chunkqueue_prepend_buffer_open_sz(cq, clen+1);
1225         buffer_append_string_len(b, c->mem->ptr + c->offset, len);
1226         cq->first->next = c->next;
1227         if (NULL == c->next) cq->last = cq->first;
1228         chunk_release(c);
1229         c = cq->first;
1230     }
1231 
1232     for (chunk *fc = c; ((clen -= len) && (c = fc->next)); ) {
1233         len = buffer_clen(c->mem) - c->offset;
1234         if (len > clen) {
1235             buffer_append_string_len(b, c->mem->ptr + c->offset, clen);
1236             c->offset += clen;
1237             break;
1238         }
1239         buffer_append_string_len(b, c->mem->ptr + c->offset, len);
1240         fc->next = c->next;
1241         if (NULL == c->next) cq->last = fc;
1242         chunk_release(c);
1243     }
1244     /* chunkqueue_prepend_buffer_commit() is not called here;
1245      * no data added/removed from chunkqueue; consolidated only */
1246 }
1247 
chunk_open_file_chunk(chunk * const restrict c,log_error_st * const restrict errh)1248 static int chunk_open_file_chunk(chunk * const restrict c, log_error_st * const restrict errh) {
1249 	if (-1 == c->file.fd) {
1250 		/* (permit symlinks; should already have been checked.  However, TOC-TOU remains) */
1251 		if (-1 == (c->file.fd = fdevent_open_cloexec(c->mem->ptr, 1, O_RDONLY, 0))) {
1252 			log_perror(errh, __FILE__, __LINE__, "open failed: %s",c->mem->ptr);
1253 			return -1;
1254 		}
1255 	}
1256 
1257 	/*(skip file size checks if file is temp file created by lighttpd)*/
1258 	if (c->file.is_temp) return 0;
1259 
1260 	struct stat st;
1261 	if (-1 == fstat(c->file.fd, &st)) {
1262 		log_perror(errh, __FILE__, __LINE__, "fstat failed");
1263 		return -1;
1264 	}
1265 
1266 	const off_t offset = c->offset;
1267 	const off_t len = c->file.length - c->offset;
1268 	force_assert(offset >= 0 && len >= 0);
1269 	if (offset > st.st_size - len) {
1270 		log_error(errh, __FILE__, __LINE__, "file shrunk: %s", c->mem->ptr);
1271 		return -1;
1272 	}
1273 
1274 	return 0;
1275 }
1276 
chunkqueue_open_file_chunk(chunkqueue * const restrict cq,log_error_st * const restrict errh)1277 int chunkqueue_open_file_chunk(chunkqueue * const restrict cq, log_error_st * const restrict errh) {
1278     return chunk_open_file_chunk(cq->first, errh);
1279 }
1280 
1281 
1282 static ssize_t
chunkqueue_write_data(const int fd,const void * buf,size_t count)1283 chunkqueue_write_data (const int fd, const void *buf, size_t count)
1284 {
1285     ssize_t wr;
1286     do { wr = write(fd, buf, count); } while (-1 == wr && errno == EINTR);
1287     return wr;
1288 }
1289 
1290 
1291 #if defined(HAVE_MMAP) || defined(_WIN32) /*(see local sys-mmap.h)*/
1292 __attribute_cold__
1293 #endif
1294 __attribute_noinline__
1295 static ssize_t
chunkqueue_write_chunk_file_intermed(const int fd,chunk * const restrict c,log_error_st * const errh)1296 chunkqueue_write_chunk_file_intermed (const int fd, chunk * const restrict c, log_error_st * const errh)
1297 {
1298     char buf[16384];
1299     char *data = buf;
1300     const off_t count = c->file.length - c->offset;
1301     uint32_t dlen = count < (off_t)sizeof(buf) ? (uint32_t)count : sizeof(buf);
1302     chunkqueue cq = {c,c,0,0,0,0,0}; /*(fake cq for chunkqueue_peek_data())*/
1303     if (0 != chunkqueue_peek_data(&cq, &data, &dlen, errh) && 0 == dlen)
1304         return -1;
1305     return chunkqueue_write_data(fd, data, dlen);
1306 }
1307 
1308 
1309 #if defined(HAVE_MMAP) || defined(_WIN32) /*(see local sys-mmap.h)*/
1310 
1311 /*(improved from network_write_mmap.c)*/
1312 static off_t
mmap_align_offset(off_t start)1313 mmap_align_offset (off_t start)
1314 {
1315     static off_t pagemask = 0;
1316     if (0 == pagemask) {
1317       #ifndef _WIN32
1318         long pagesize = sysconf(_SC_PAGESIZE);
1319       #else
1320         long pagesize = -1; /*(not implemented (yet))*/
1321       #endif
1322         if (-1 == pagesize) pagesize = 4096;
1323         pagemask = ~((off_t)pagesize - 1); /* pagesize always power-of-2 */
1324     }
1325     return (start & pagemask);
1326 }
1327 
1328 
1329 __attribute_noinline__
1330 static char *
chunkqueue_mmap_chunk_len(chunk * const c,off_t len)1331 chunkqueue_mmap_chunk_len (chunk * const c, off_t len)
1332 {
1333     /* (re)mmap the buffer to file length if range is not covered completely */
1334     /*(caller is responsible for handling SIGBUS if chunkqueue might contain
1335      * untrusted file, i.e. any file other than lighttpd-created tempfile)*/
1336     /*(tempfiles are expected for input, MAP_PRIVATE used for portability)*/
1337     /*(mmaps and writes complete chunk instead of only small parts; files
1338      * are expected to be temp files with reasonable chunk sizes)*/
1339     if (MAP_FAILED == c->file.mmap.start
1340         || c->offset < c->file.mmap.offset
1341         || c->offset+len > (off_t)(c->file.mmap.offset + c->file.mmap.length)) {
1342 
1343         if (MAP_FAILED != c->file.mmap.start) {
1344             munmap(c->file.mmap.start, c->file.mmap.length);
1345             /*c->file.mmap.start = MAP_FAILED;*//*(assigned below)*/
1346         }
1347 
1348         c->file.mmap.offset = mmap_align_offset(c->offset);
1349         c->file.mmap.length = c->file.length - c->file.mmap.offset;
1350         c->file.mmap.start  =
1351           mmap(NULL, c->file.mmap.length, PROT_READ, MAP_PRIVATE,
1352                c->file.fd, c->file.mmap.offset);
1353         if (MAP_FAILED == c->file.mmap.start) return NULL;
1354 
1355       #if 0 /*(review callers before changing; some expect open file)*/
1356         /* close() fd as soon as fully mmap() rather than when done w/ chunk
1357          * (possibly worthwhile to keep active fd count lower) */
1358         if (c->file.is_temp && !c->file.refchg) {
1359             close(c->file.fd);
1360             c->file.fd = -1;
1361         }
1362       #endif
1363     }
1364     return c->file.mmap.start + c->offset - c->file.mmap.offset;
1365 }
1366 
1367 #endif
1368 
1369 
1370 #if defined HAVE_SYS_SENDFILE_H && defined HAVE_SENDFILE \
1371  && (!defined _LARGEFILE_SOURCE || defined HAVE_SENDFILE64) \
1372  && defined(__linux__) && !defined HAVE_SENDFILE_BROKEN
1373 #include <sys/sendfile.h>
1374 #include <stdint.h>
1375 #endif
1376 static ssize_t
chunkqueue_write_chunk_file(const int fd,chunk * const restrict c,log_error_st * const errh)1377 chunkqueue_write_chunk_file (const int fd, chunk * const restrict c, log_error_st * const errh)
1378 {
1379     /*(similar to network_write_file_chunk_mmap(), but does not use send() on
1380     *  Windows because fd is expected to be file or pipe here, not socket)*/
1381 
1382     if (0 != chunk_open_file_chunk(c, errh))
1383         return -1;
1384 
1385     const off_t count = c->file.length - c->offset;
1386     if (0 == count) return 0; /*(sanity check)*/
1387 
1388   #if defined HAVE_SYS_SENDFILE_H && defined HAVE_SENDFILE \
1389    && (!defined _LARGEFILE_SOURCE || defined HAVE_SENDFILE64) \
1390    && defined(__linux__) && !defined HAVE_SENDFILE_BROKEN
1391     /* Linux kernel >= 2.6.33 supports sendfile() between most fd types */
1392     off_t offset = c->offset;
1393     const ssize_t wr =
1394       sendfile(fd, c->file.fd, &offset, count<INT32_MAX ? count : INT32_MAX);
1395     if (__builtin_expect( (wr >= 0), 1) || (errno != EINVAL && errno != ENOSYS))
1396         return wr;
1397   #endif
1398 
1399   #if defined(HAVE_MMAP) || defined(_WIN32) /*(see local sys-mmap.h)*/
1400     const char * const data = chunkqueue_mmap_chunk_len(c, count);
1401     if (NULL != data)
1402         return chunkqueue_write_data(fd, data, count);
1403   #endif
1404 
1405     return chunkqueue_write_chunk_file_intermed(fd, c, errh);
1406 }
1407 
1408 
1409 static ssize_t
chunkqueue_write_chunk_mem(const int fd,const chunk * const restrict c)1410 chunkqueue_write_chunk_mem (const int fd, const chunk * const restrict c)
1411 {
1412     const void * const buf = c->mem->ptr + c->offset;
1413     const size_t count = buffer_clen(c->mem) - (size_t)c->offset;
1414     ssize_t wr;
1415     do { wr = write(fd, buf, count); } while (-1 == wr && errno == EINTR);
1416     return wr;
1417 }
1418 
1419 
1420 ssize_t
chunkqueue_write_chunk(const int fd,chunkqueue * const restrict cq,log_error_st * const restrict errh)1421 chunkqueue_write_chunk (const int fd, chunkqueue * const restrict cq, log_error_st * const restrict errh)
1422 {
1423     /*(note: expects non-empty cq->first)*/
1424     chunk * const c = cq->first;
1425     switch (c->type) {
1426       case MEM_CHUNK:
1427         return chunkqueue_write_chunk_mem(fd, c);
1428       case FILE_CHUNK:
1429         return chunkqueue_write_chunk_file(fd, c, errh);
1430       default:
1431         errno = EINVAL;
1432         return -1;
1433     }
1434 }
1435 
1436 
1437 ssize_t
chunkqueue_write_chunk_to_pipe(const int fd,chunkqueue * const restrict cq,log_error_st * const restrict errh)1438 chunkqueue_write_chunk_to_pipe (const int fd, chunkqueue * const restrict cq, log_error_st * const restrict errh)
1439 {
1440     /*(note: expects non-empty cq->first)*/
1441   #ifdef HAVE_SPLICE /* splice() temp files to pipe on Linux */
1442     chunk * const c = cq->first;
1443     if (c->type == FILE_CHUNK) {
1444         loff_t abs_offset = c->offset;
1445         return (0 == chunk_open_file_chunk(c, errh))
1446           ? splice(c->file.fd, &abs_offset, fd, NULL,
1447                    (size_t)(c->file.length - c->offset), SPLICE_F_NONBLOCK)
1448           : -1;
1449     }
1450   #endif
1451     return chunkqueue_write_chunk(fd, cq, errh);
1452 }
1453 
1454 
1455 void
chunkqueue_small_resp_optim(chunkqueue * const restrict cq)1456 chunkqueue_small_resp_optim (chunkqueue * const restrict cq)
1457 {
1458     /*(caller must verify response is small (and non-empty) before calling)*/
1459     /*(caller must verify first chunk is MEM_CHUNK, i.e. response headers)*/
1460     /*(caller must verify response is non-zero length)*/
1461 
1462     /*(optimization to use fewer syscalls to send a small response by reading
1463      * small files into memory, thereby avoiding use of sendfile() and multiple
1464      * calls to writev()  (benefit for cleartext (non-TLS) and <= HTTP/1.1))
1465      *(If TLS, then will shortly need to be in memory for encryption anyway)*/
1466 
1467     /*assert(cq->first);*/
1468     /*assert(cq->first->type == MEM_CHUNK);*/
1469     /*assert(cq->first->next);*/
1470     chunk * restrict c = cq->first;
1471     chunk * const restrict filec = c->next;  /*(require file already be open)*/
1472     if (filec != cq->last || filec->type != FILE_CHUNK || filec->file.fd < 0)
1473         return;
1474 
1475   #ifndef HAVE_PREAD
1476     if (-1 == lseek(filec->file.fd, filec->offset, SEEK_SET)) return;
1477   #endif
1478 
1479     /* Note: there should be no size change in chunkqueue,
1480      * so cq->bytes_in and cq->bytes_out should not be modified */
1481 
1482     off_t len = filec->file.length - filec->offset;
1483     if ((size_t)len > buffer_string_space(c->mem)) {
1484         c->next = chunk_acquire((size_t)len+1);
1485         c = c->next;
1486         /*c->next = filec;*/
1487     }
1488     /* detach filec from chunkqueue; file expected to be read fully */
1489     c->next = NULL;
1490     cq->last = c;
1491 
1492     ssize_t rd;
1493     off_t offset = 0;
1494     char * const ptr = buffer_extend(c->mem, len);
1495     do {
1496       #ifdef HAVE_PREAD
1497         rd =pread(filec->file.fd, ptr+offset, (size_t)len,filec->offset+offset);
1498       #else
1499         rd = read(filec->file.fd, ptr+offset, (size_t)len);
1500       #endif
1501     } while (rd > 0 ? (offset += rd, len -= rd) : errno == EINTR);
1502     /*(contents of chunkqueue kept valid even if error reading from file)*/
1503     if (__builtin_expect( (0 == len), 1))
1504         chunk_release(filec);
1505     else { /*(unexpected; error recovery)*/
1506         buffer_truncate(c->mem, (uint32_t)(ptr + offset - c->mem->ptr));
1507         cq->last = c->next = filec;
1508         if (offset)
1509             filec->offset += offset;
1510         else if (__builtin_expect( (cq->first != c), 0)) {
1511             cq->first->next = filec;
1512             chunk_release(c);
1513         }
1514     }
1515 }
1516 
1517 
1518 int
chunkqueue_peek_data(chunkqueue * const cq,char ** const data,uint32_t * const dlen,log_error_st * const errh)1519 chunkqueue_peek_data (chunkqueue * const cq,
1520                       char ** const data, uint32_t * const dlen,
1521                       log_error_st * const errh)
1522 {
1523     char * const data_in = *data;
1524     const uint32_t data_insz = *dlen;
1525     *dlen = 0;
1526 
1527     for (chunk *c = cq->first; c; ) {
1528         uint32_t space = data_insz - *dlen;
1529         switch (c->type) {
1530           case MEM_CHUNK:
1531             {
1532                 uint32_t have = buffer_clen(c->mem) - (uint32_t)c->offset;
1533                 if (have > space)
1534                     have = space;
1535                 if (*dlen)
1536                     memcpy(data_in + *dlen, c->mem->ptr + c->offset, have);
1537                 else
1538                     *data = c->mem->ptr + c->offset; /*(reference; defer copy)*/
1539                 *dlen += have;
1540                 break;
1541             }
1542 
1543           case FILE_CHUNK:
1544             if (c->file.fd >= 0 || 0 == chunk_open_file_chunk(c, errh)) {
1545                 off_t offset = c->offset;
1546                 off_t len = c->file.length - c->offset;
1547                 if (len > (off_t)space)
1548                     len = (off_t)space;
1549                 if (0 == len)
1550                     break;
1551 
1552             #if 0 /* XXX: might improve performance on some system workloads */
1553               #if defined(_LP64) || defined(__LP64__) || defined(_WIN64)
1554               #if defined(HAVE_MMAP) || defined(_WIN32) /*see local sys-mmap.h*/
1555                 /* mmap file to access data
1556                  * (Only consider temp files here since not catching SIGBUS)
1557                  * (For now, also limit to 64-bit to avoid address space issues)
1558                  * If temp file is used, data should be large enough that mmap
1559                  * is worthwhile.  fd need not be kept open for the mmap once
1560                  * the mmap has been created, but is currently kept open for
1561                  * other pre-existing logic which checks fd and opens file,
1562                  * such as the condition for entering this code block above. */
1563                 /* Note: under heavy load (or microbenchmark), system-reported
1564                  * memory use for RSS can be very, very large, due to presence
1565                  * of lots and lots of temp file read-only memory maps.
1566                  * pmap -X and exclude lighttpd temporary files to get a better
1567                  * view of memory use */
1568                 char *mdata;
1569                 if (c->file.is_temp
1570                     && (mdata = chunkqueue_mmap_chunk_len(c, len))) {
1571                     if (*dlen) {
1572                         if (*data != data_in) {
1573                             memcpy(data_in, *data, *dlen);
1574                             *data = data_in;
1575                         }
1576                         memcpy(data_in+*dlen, mdata, (size_t)len);
1577                     }
1578                     else {
1579                         *data = mdata;
1580                     }
1581                     *dlen += (uint32_t)len;
1582                     break;
1583                 }
1584               #endif
1585               #endif
1586             #endif
1587 
1588               #ifndef HAVE_PREAD
1589                 if (-1 == lseek(c->file.fd, offset, SEEK_SET)) {
1590                     log_perror(errh, __FILE__, __LINE__, "lseek(\"%s\")",
1591                                c->mem->ptr);
1592                     return -1;
1593                 }
1594               #endif
1595                 ssize_t rd;
1596                 do {
1597                   #ifdef HAVE_PREAD
1598                     rd =pread(c->file.fd, data_in + *dlen, (size_t)len, offset);
1599                   #else
1600                     rd = read(c->file.fd, data_in + *dlen, (size_t)len);
1601                   #endif
1602                 } while (-1 == rd && errno == EINTR);
1603                 if (rd <= 0) { /* -1 error; 0 EOF (unexpected) */
1604                     log_perror(errh, __FILE__, __LINE__, "read(\"%s\")",
1605                                c->mem->ptr);
1606                     return -1;
1607                 }
1608 
1609                 *dlen += (uint32_t)rd;
1610                 break;
1611             }
1612             return -1;
1613 
1614           default:
1615             return -1;
1616         }
1617 
1618         if (*dlen == data_insz)
1619             break;
1620 
1621         c = c->next;
1622         if (NULL == c)
1623             break;
1624 
1625         if (*dlen && *data != data_in) {
1626             memcpy(data_in, *data, *dlen);
1627             *data = data_in;
1628         }
1629     }
1630 
1631     return 0;
1632 }
1633 
1634 
1635 int
chunkqueue_read_data(chunkqueue * const cq,char * const data,const uint32_t dlen,log_error_st * const errh)1636 chunkqueue_read_data (chunkqueue * const cq,
1637                       char * const data, const uint32_t dlen,
1638                       log_error_st * const errh)
1639 {
1640     char *ptr = data;
1641     uint32_t len = dlen;
1642     if (chunkqueue_peek_data(cq, &ptr, &len, errh) < 0 || len != dlen)
1643         return -1;
1644     if (data != ptr) memcpy(data, ptr, len);
1645     chunkqueue_mark_written(cq, len);
1646     return 0;
1647 }
1648 
1649 
1650 buffer *
chunkqueue_read_squash(chunkqueue * const restrict cq,log_error_st * const restrict errh)1651 chunkqueue_read_squash (chunkqueue * const restrict cq, log_error_st * const restrict errh)
1652 {
1653     /* read and replace chunkqueue contents with single MEM_CHUNK.
1654      * cq->bytes_out is not modified */
1655 
1656     off_t cqlen = chunkqueue_length(cq);
1657     if (cqlen >= UINT32_MAX) return NULL;
1658 
1659     if (cq->first && NULL == cq->first->next && cq->first->type == MEM_CHUNK)
1660         return cq->first->mem;
1661 
1662     chunk * const c = chunk_acquire((uint32_t)cqlen+1);
1663     char *data = c->mem->ptr;
1664     uint32_t dlen = (uint32_t)cqlen;
1665     int rc = chunkqueue_peek_data(cq, &data, &dlen, errh);
1666     if (rc < 0) {
1667         chunk_release(c);
1668         return NULL;
1669     }
1670     buffer_truncate(c->mem, dlen);
1671 
1672     chunkqueue_release_chunks(cq);
1673     chunkqueue_append_chunk(cq, c);
1674     return c->mem;
1675 }
1676