1 /*
2  * %CopyrightBegin%
3  *
4  * Copyright Ericsson AB 2017-2018. All Rights Reserved.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  * %CopyrightEnd%
19  */
20 
21 #ifdef HAVE_CONFIG_H
22 #  include "config.h"
23 #endif
24 
25 #include "sys.h"
26 #include "global.h"
27 
28 #define ERL_WANT_HIPE_BIF_WRAPPER__
29 #include "bif.h"
30 #undef ERL_WANT_HIPE_BIF_WRAPPER__
31 
32 #include "erl_bits.h"
33 #include "erl_io_queue.h"
34 
35 #define IOL2V_SMALL_BIN_LIMIT (ERL_ONHEAP_BIN_LIMIT * 4)
36 
37 static void free_binary(ErtsIOQBinary *b, int driver);
38 static ErtsIOQBinary *alloc_binary(Uint size, char *source, void **iov_base, int driver);
39 
erts_ioq_init(ErtsIOQueue * q,ErtsAlcType_t alct,int driver)40 void erts_ioq_init(ErtsIOQueue *q, ErtsAlcType_t alct, int driver)
41 {
42 
43     ERTS_CT_ASSERT(offsetof(ErlNifIOVec,flags) == sizeof(ErtsIOVecCommon));
44     ERTS_CT_ASSERT(sizeof(ErlIOVec) == sizeof(ErtsIOVecCommon));
45     ERTS_CT_ASSERT(sizeof(size_t) == sizeof(ErlDrvSizeT));
46     ERTS_CT_ASSERT(sizeof(size_t) == sizeof(Uint));
47 
48     q->alct = alct;
49     q->driver = driver;
50     q->size = 0;
51     q->v_head = q->v_tail = q->v_start = q->v_small;
52     q->v_end = q->v_small + ERTS_SMALL_IO_QUEUE;
53     q->b_head = q->b_tail = q->b_start = q->b_small;
54     q->b_end = q->b_small + ERTS_SMALL_IO_QUEUE;
55 }
56 
erts_ioq_clear(ErtsIOQueue * q)57 void erts_ioq_clear(ErtsIOQueue *q)
58 {
59     ErtsIOQBinary** binp = q->b_head;
60     int driver = q->driver;
61 
62     if (q->v_start != q->v_small)
63 	erts_free(q->alct, (void *) q->v_start);
64 
65     while(binp < q->b_tail) {
66 	if (*binp != NULL)
67             free_binary(*binp, driver);
68 	binp++;
69     }
70     if (q->b_start != q->b_small)
71 	erts_free(q->alct, (void *) q->b_start);
72     q->v_start = q->v_end = q->v_head = q->v_tail = NULL;
73     q->b_start = q->b_end = q->b_head = q->b_tail = NULL;
74     q->size = 0;
75 }
76 
free_binary(ErtsIOQBinary * b,int driver)77 static void free_binary(ErtsIOQBinary *b, int driver)
78 {
79     if (driver)
80         driver_free_binary(&b->driver);
81     else if (erts_refc_dectest(&b->nif.intern.refc, 0) == 0)
82         erts_bin_free(&b->nif);
83 }
84 
alloc_binary(Uint size,char * source,void ** iov_base,int driver)85 static ErtsIOQBinary *alloc_binary(Uint size, char *source, void **iov_base, int driver)
86 {
87     if (driver) {
88         ErlDrvBinary *bin = driver_alloc_binary(size);
89         if (!bin) return NULL;
90         sys_memcpy(bin->orig_bytes, source, size);
91         *iov_base = bin->orig_bytes;
92         return (ErtsIOQBinary *)bin;
93     } else {
94         /* This clause can be triggered in enif_ioq_enq_binary is used */
95         Binary *bin = erts_bin_nrml_alloc(size);
96         if (!bin) return NULL;
97         erts_refc_init(&bin->intern.refc, 1);
98         sys_memcpy(bin->orig_bytes, source, size);
99         *iov_base = bin->orig_bytes;
100         return (ErtsIOQBinary *)bin;
101     }
102 }
103 
erts_ioq_size(ErtsIOQueue * q)104 Uint erts_ioq_size(ErtsIOQueue *q)
105 {
106     return q->size;
107 }
108 
109 /* expand queue to hold n elements in tail or head */
expandq(ErtsIOQueue * q,int n,int tail)110 static int expandq(ErtsIOQueue* q, int n, int tail)
111 /* tail: 0 if make room in head, make room in tail otherwise */
112 {
113     int h_sz;  /* room before header */
114     int t_sz;  /* room after tail */
115     int q_sz;  /* occupied */
116     int nvsz;
117     SysIOVec* niov;
118     ErtsIOQBinary** nbinv;
119 
120     h_sz = q->v_head - q->v_start;
121     t_sz = q->v_end -  q->v_tail;
122     q_sz = q->v_tail - q->v_head;
123 
124     if (tail && (n <= t_sz)) /* do we need to expand tail? */
125 	return 0;
126     else if (!tail && (n <= h_sz))  /* do we need to expand head? */
127 	return 0;
128     else if (n > (h_sz + t_sz)) { /* need to allocate */
129 	/* we may get little extra but it ok */
130 	nvsz = (q->v_end - q->v_start) + n;
131 
132 	niov = erts_alloc_fnf(q->alct, nvsz * sizeof(SysIOVec));
133 	if (!niov)
134 	    return -1;
135 	nbinv = erts_alloc_fnf(q->alct, nvsz * sizeof(ErtsIOQBinary**));
136 	if (!nbinv) {
137 	    erts_free(q->alct, (void *) niov);
138 	    return -1;
139 	}
140 	if (tail) {
141 	    sys_memcpy(niov, q->v_head, q_sz*sizeof(SysIOVec));
142 	    if (q->v_start != q->v_small)
143 		erts_free(q->alct, (void *) q->v_start);
144 	    q->v_start = niov;
145 	    q->v_end = niov + nvsz;
146 	    q->v_head = q->v_start;
147 	    q->v_tail = q->v_head + q_sz;
148 
149 	    sys_memcpy(nbinv, q->b_head, q_sz*sizeof(ErtsIOQBinary*));
150 	    if (q->b_start != q->b_small)
151 		erts_free(q->alct, (void *) q->b_start);
152 	    q->b_start = nbinv;
153 	    q->b_end = nbinv + nvsz;
154 	    q->b_head = q->b_start;
155 	    q->b_tail = q->b_head + q_sz;
156 	}
157 	else {
158 	    sys_memcpy(niov+nvsz-q_sz, q->v_head, q_sz*sizeof(SysIOVec));
159 	    if (q->v_start != q->v_small)
160 		erts_free(q->alct, (void *) q->v_start);
161 	    q->v_start = niov;
162 	    q->v_end = niov + nvsz;
163 	    q->v_tail = q->v_end;
164 	    q->v_head = q->v_tail - q_sz;
165 
166 	    sys_memcpy(nbinv+nvsz-q_sz, q->b_head, q_sz*sizeof(ErtsIOQBinary*));
167 	    if (q->b_start != q->b_small)
168 		erts_free(q->alct, (void *) q->b_start);
169 	    q->b_start = nbinv;
170 	    q->b_end = nbinv + nvsz;
171 	    q->b_tail = q->b_end;
172 	    q->b_head = q->b_tail - q_sz;
173 	}
174     }
175     else if (tail) {  /* move to beginning to make room in tail */
176 	sys_memmove(q->v_start, q->v_head, q_sz*sizeof(SysIOVec));
177 	q->v_head = q->v_start;
178 	q->v_tail = q->v_head + q_sz;
179 	sys_memmove(q->b_start, q->b_head, q_sz*sizeof(ErtsIOQBinary*));
180 	q->b_head = q->b_start;
181 	q->b_tail = q->b_head + q_sz;
182     }
183     else {   /* move to end to make room */
184 	sys_memmove(q->v_end-q_sz, q->v_head, q_sz*sizeof(SysIOVec));
185 	q->v_tail = q->v_end;
186 	q->v_head = q->v_tail-q_sz;
187 	sys_memmove(q->b_end-q_sz, q->b_head, q_sz*sizeof(ErtsIOQBinary*));
188 	q->b_tail = q->b_end;
189 	q->b_head = q->b_tail-q_sz;
190     }
191 
192     return 0;
193 }
194 
195 static
skip(ErtsIOVec * vec,Uint skipbytes,SysIOVec ** iovp,ErtsIOQBinary *** binvp,Uint * lenp)196 int skip(ErtsIOVec* vec, Uint skipbytes,
197          SysIOVec **iovp, ErtsIOQBinary ***binvp,
198          Uint *lenp)
199 {
200     int n;
201     Uint len;
202     SysIOVec* iov;
203     ErtsIOQBinary** binv;
204 
205     if (vec->common.size <= skipbytes)
206 	return -1;
207 
208     iov = vec->common.iov;
209     binv = vec->common.binv;
210     n = vec->common.vsize;
211     /* we use do here to strip iov_len=0 from beginning */
212     do {
213 	len = iov->iov_len;
214 	if (len <= skipbytes) {
215 	    skipbytes -= len;
216 	    iov++;
217 	    binv++;
218 	    n--;
219 	}
220 	else {
221 	    iov->iov_base = ((char *)(iov->iov_base)) + skipbytes;
222 	    iov->iov_len -= skipbytes;
223 	    skipbytes = 0;
224 	}
225     } while(skipbytes > 0);
226 
227     *binvp = binv;
228     *iovp = iov;
229     *lenp = len;
230 
231     return n;
232 }
233 
234 /* Put elements from vec at q tail */
erts_ioq_enqv(ErtsIOQueue * q,ErtsIOVec * eiov,Uint skipbytes)235 int erts_ioq_enqv(ErtsIOQueue *q, ErtsIOVec *eiov, Uint skipbytes)
236 {
237     int n;
238     Uint len;
239     Uint size = eiov->common.size - skipbytes;
240     SysIOVec *iov;
241     ErtsIOQBinary** binv;
242     ErtsIOQBinary*  b;
243 
244     if (q == NULL)
245 	return -1;
246 
247     ASSERT(eiov->common.size >= skipbytes);
248     if (eiov->common.size <= skipbytes)
249 	return 0;
250 
251     n = skip(eiov, skipbytes, &iov, &binv, &len);
252 
253     if (n < 0)
254         return n;
255 
256     if (q->v_tail + n >= q->v_end)
257 	if (expandq(q, n, 1))
258             return -1;
259 
260     /* Queue and reference all binaries (remove zero length items) */
261     while(n--) {
262 	if ((len = iov->iov_len) > 0) {
263 	    if ((b = *binv) == NULL) { /* special case create binary ! */
264 		b = alloc_binary(len, iov->iov_base, (void**)&q->v_tail->iov_base,
265                                  q->driver);
266                 if (!b) return -1;
267 		*q->b_tail++ = b;
268 		q->v_tail->iov_len = len;
269 		q->v_tail++;
270 	    }
271 	    else {
272                 if (q->driver)
273                     driver_binary_inc_refc(&b->driver);
274                 else
275                     erts_refc_inc(&b->nif.intern.refc, 1);
276 		*q->b_tail++ = b;
277 		*q->v_tail++ = *iov;
278 	    }
279 	}
280 	iov++;
281 	binv++;
282     }
283     q->size += size;      /* update total size in queue */
284     return 0;
285 }
286 
287 /* Put elements from vec at q head */
erts_ioq_pushqv(ErtsIOQueue * q,ErtsIOVec * vec,Uint skipbytes)288 int erts_ioq_pushqv(ErtsIOQueue *q, ErtsIOVec* vec, Uint skipbytes)
289 {
290     int n;
291     Uint len;
292     Uint size = vec->common.size - skipbytes;
293     SysIOVec* iov;
294     ErtsIOQBinary** binv;
295     ErtsIOQBinary* b;
296 
297     if (q == NULL)
298 	return -1;
299 
300     ASSERT(vec->common.size >= skipbytes);
301     if (vec->common.size <= skipbytes)
302 	return 0;
303 
304     n = skip(vec, skipbytes, &iov, &binv, &len);
305 
306     if (n < 0)
307         return n;
308 
309     if (q->v_head - n < q->v_start)
310 	if (expandq(q, n, 0))
311             return -1;
312 
313     /* Queue and reference all binaries (remove zero length items) */
314     iov += (n-1);  /* move to end */
315     binv += (n-1); /* move to end */
316     while(n--) {
317 	if ((len = iov->iov_len) > 0) {
318 	    if ((b = *binv) == NULL) { /* special case create binary ! */
319                 if (q->driver) {
320                     ErlDrvBinary *bin = driver_alloc_binary(len);
321                     if (!bin) return -1;
322                     sys_memcpy(bin->orig_bytes, iov->iov_base, len);
323                     b = (ErtsIOQBinary *)bin;
324                     q->v_head->iov_base = bin->orig_bytes;
325                 }
326 		*--q->b_head = b;
327 		q->v_head--;
328 		q->v_head->iov_len = len;
329 	    }
330 	    else {
331                 if (q->driver)
332                     driver_binary_inc_refc(&b->driver);
333                 else
334                     erts_refc_inc(&b->nif.intern.refc, 1);
335 		*--q->b_head = b;
336 		*--q->v_head = *iov;
337 	    }
338 	}
339 	iov--;
340 	binv--;
341     }
342     q->size += size;      /* update total size in queue */
343     return 0;
344 }
345 
346 
347 /*
348 ** Remove size bytes from queue head
349 ** Return number of bytes that remain in queue
350 */
erts_ioq_deq(ErtsIOQueue * q,Uint size)351 int erts_ioq_deq(ErtsIOQueue *q, Uint size)
352 {
353     Uint len;
354 
355     if ((q == NULL) || (q->size < size))
356 	return -1;
357     q->size -= size;
358     while (size > 0) {
359 	ASSERT(q->v_head != q->v_tail);
360 
361 	len = q->v_head->iov_len;
362 	if (len <= size) {
363 	    size -= len;
364             free_binary(*q->b_head, q->driver);
365 	    *q->b_head++ = NULL;
366 	    q->v_head++;
367 	}
368 	else {
369 	    q->v_head->iov_base = ((char *)(q->v_head->iov_base)) + size;
370 	    q->v_head->iov_len -= size;
371 	    size = 0;
372 	}
373     }
374 
375     /* restart pointers (optimised for enq) */
376     if (q->v_head == q->v_tail) {
377 	q->v_head = q->v_tail = q->v_start;
378 	q->b_head = q->b_tail = q->b_start;
379     }
380     return 0;
381 }
382 
383 
erts_ioq_peekqv(ErtsIOQueue * q,ErtsIOVec * ev)384 Uint erts_ioq_peekqv(ErtsIOQueue *q, ErtsIOVec *ev) {
385     ASSERT(ev);
386 
387     if (! q) {
388 	return (Uint) -1;
389     } else {
390 	if ((ev->common.vsize = q->v_tail - q->v_head) == 0) {
391 	    ev->common.size = 0;
392 	    ev->common.iov = NULL;
393 	    ev->common.binv = NULL;
394 	} else {
395 	    ev->common.size = q->size;
396 	    ev->common.iov = q->v_head;
397 	    ev->common.binv = q->b_head;
398 	}
399 	return q->size;
400     }
401 }
402 
erts_ioq_peekq(ErtsIOQueue * q,int * vlenp)403 SysIOVec* erts_ioq_peekq(ErtsIOQueue *q, int* vlenp)  /* length of io-vector */
404 {
405 
406     if (q == NULL) {
407 	*vlenp = -1;
408 	return NULL;
409     }
410     if ((*vlenp = (q->v_tail - q->v_head)) == 0)
411 	return NULL;
412     return q->v_head;
413 }
414 
415 /* Fills a possibly deep list of chars and binaries into vec
416 ** Small characters are first stored in the buffer buf of length ln
417 ** binaries found are copied and linked into msoh
418 ** Return  vector length on succsess,
419 **        -1 on overflow
420 **        -2 on type error
421 */
422 
423 static ERTS_INLINE void
io_list_to_vec_set_vec(SysIOVec ** iov,ErtsIOQBinary *** binv,ErtsIOQBinary * bin,byte * ptr,Uint len,int * vlen)424 io_list_to_vec_set_vec(SysIOVec **iov, ErtsIOQBinary ***binv,
425                        ErtsIOQBinary *bin, byte *ptr, Uint len,
426                        int *vlen)
427 {
428     while (len > MAX_SYSIOVEC_IOVLEN) {
429         (*iov)->iov_base = ptr;
430         (*iov)->iov_len = MAX_SYSIOVEC_IOVLEN;
431         ptr += MAX_SYSIOVEC_IOVLEN;
432         len -= MAX_SYSIOVEC_IOVLEN;
433         (*iov)++;
434         (*vlen)++;
435         *(*binv)++ = bin;
436     }
437     (*iov)->iov_base = ptr;
438     (*iov)->iov_len = len;
439     *(*binv)++ = bin;
440     (*iov)++;
441     (*vlen)++;
442 }
443 
444 int
erts_ioq_iolist_to_vec(Eterm obj,SysIOVec * iov,ErtsIOQBinary ** binv,ErtsIOQBinary * cbin,Uint bin_limit,int driver)445 erts_ioq_iolist_to_vec(Eterm obj,	  /* io-list */
446                        SysIOVec* iov,	  /* io vector */
447                        ErtsIOQBinary** binv,       /* binary reference vector */
448                        ErtsIOQBinary* cbin,        /* binary to store characters */
449                        Uint bin_limit,  /* small binaries limit */
450                        int driver)
451 {
452     DECLARE_ESTACK(s);
453     Eterm* objp;
454     byte *buf  = NULL;
455     Uint len = 0;
456     Uint csize  = 0;
457     int vlen   = 0;
458     byte* cptr;
459 
460     if (cbin) {
461         if (driver) {
462             buf = (byte*)cbin->driver.orig_bytes;
463             len = cbin->driver.orig_size;
464         } else {
465             buf = (byte*)cbin->nif.orig_bytes;
466             len = cbin->nif.orig_size;
467         }
468     }
469     cptr = buf;
470 
471     goto L_jump_start;  /* avoid push */
472 
473     while (!ESTACK_ISEMPTY(s)) {
474 	obj = ESTACK_POP(s);
475     L_jump_start:
476 	if (is_list(obj)) {
477 	L_iter_list:
478 	    objp = list_val(obj);
479 	    obj = CAR(objp);
480 	    if (is_byte(obj)) {
481 		if (len == 0)
482 		    goto L_overflow;
483 		*buf++ = unsigned_val(obj);
484 		csize++;
485 		len--;
486 	    } else if (is_binary(obj)) {
487 		ESTACK_PUSH(s, CDR(objp));
488 		goto handle_binary;
489 	    } else if (is_list(obj)) {
490 		ESTACK_PUSH(s, CDR(objp));
491 		goto L_iter_list;    /* on head */
492 	    } else if (!is_nil(obj)) {
493 		goto L_type_error;
494 	    }
495 	    obj = CDR(objp);
496 	    if (is_list(obj))
497 		goto L_iter_list; /* on tail */
498 	    else if (is_binary(obj)) {
499 		goto handle_binary;
500 	    } else if (!is_nil(obj)) {
501 		goto L_type_error;
502 	    }
503 	} else if (is_binary(obj)) {
504 	    Eterm real_bin;
505 	    Uint offset;
506 	    Eterm* bptr;
507 	    Uint size;
508 	    int bitoffs;
509 	    int bitsize;
510 
511 	handle_binary:
512 	    size = binary_size(obj);
513 	    ERTS_GET_REAL_BIN(obj, real_bin, offset, bitoffs, bitsize);
514 	    ASSERT(bitsize == 0);
515 	    bptr = binary_val(real_bin);
516 	    if (*bptr == HEADER_PROC_BIN) {
517 		ProcBin* pb = (ProcBin *) bptr;
518 		if (bitoffs != 0) {
519 		    if (len < size) {
520 			goto L_overflow;
521 		    }
522 		    erts_copy_bits(pb->bytes+offset, bitoffs, 1,
523 				   (byte *) buf, 0, 1, size*8);
524 		    csize += size;
525 		    buf += size;
526 		    len -= size;
527 		} else if (bin_limit && size < bin_limit) {
528 		    if (len < size) {
529 			goto L_overflow;
530 		    }
531 		    sys_memcpy(buf, pb->bytes+offset, size);
532 		    csize += size;
533 		    buf += size;
534 		    len -= size;
535 		} else {
536                     ErtsIOQBinary *qbin;
537 		    if (csize != 0) {
538                         io_list_to_vec_set_vec(&iov, &binv, cbin,
539                                                cptr, csize, &vlen);
540 			cptr = buf;
541 			csize = 0;
542 		    }
543 		    if (pb->flags) {
544 			erts_emasculate_writable_binary(pb);
545 		    }
546                     if (driver)
547                         qbin = (ErtsIOQBinary*)Binary2ErlDrvBinary(pb->val);
548                     else
549                         qbin = (ErtsIOQBinary*)pb->val;
550 
551                     io_list_to_vec_set_vec(
552                         &iov, &binv, qbin,
553                         pb->bytes+offset, size, &vlen);
554 		}
555 	    } else {
556 		ErlHeapBin* hb = (ErlHeapBin *) bptr;
557 		if (len < size) {
558 		    goto L_overflow;
559 		}
560 		copy_binary_to_buffer(buf, 0,
561 				      ((byte *) hb->data)+offset, bitoffs,
562 				      8*size);
563 		csize += size;
564 		buf += size;
565 		len -= size;
566 	    }
567 	} else if (!is_nil(obj)) {
568 	    goto L_type_error;
569 	}
570     }
571 
572     if (csize != 0) {
573         io_list_to_vec_set_vec(&iov, &binv, cbin, cptr, csize, &vlen);
574     }
575 
576     DESTROY_ESTACK(s);
577     return vlen;
578 
579  L_type_error:
580     DESTROY_ESTACK(s);
581     return -2;
582 
583  L_overflow:
584     DESTROY_ESTACK(s);
585     return -1;
586 }
587 
588 static ERTS_INLINE int
io_list_vec_count(Eterm obj,Uint * v_size,Uint * c_size,Uint * b_size,Uint * in_clist,Uint * p_v_size,Uint * p_c_size,Uint * p_in_clist,Uint blimit)589 io_list_vec_count(Eterm obj, Uint *v_size,
590                   Uint *c_size, Uint *b_size, Uint *in_clist,
591                   Uint *p_v_size, Uint *p_c_size, Uint *p_in_clist,
592                   Uint blimit)
593 {
594     Uint size = binary_size(obj);
595     Eterm real;
596     ERTS_DECLARE_DUMMY(Uint offset);
597     int bitoffs;
598     int bitsize;
599     ERTS_GET_REAL_BIN(obj, real, offset, bitoffs, bitsize);
600     if (bitsize != 0) return 1;
601     if (thing_subtag(*binary_val(real)) == REFC_BINARY_SUBTAG &&
602 	bitoffs == 0) {
603 	*b_size += size;
604         if (*b_size < size) return 2;
605 	*in_clist = 0;
606         ++*v_size;
607         /* If iov_len is smaller then Uint we split the binary into*/
608         /* multiple smaller (2GB) elements in the iolist.*/
609 	*v_size += size / MAX_SYSIOVEC_IOVLEN;
610         if (size >= blimit) {
611             *p_in_clist = 0;
612             ++*p_v_size;
613         } else {
614             *p_c_size += size;
615             if (!*p_in_clist) {
616                 *p_in_clist = 1;
617                 ++*p_v_size;
618             }
619         }
620     } else {
621 	*c_size += size;
622         if (*c_size < size) return 2;
623 	if (!*in_clist) {
624 	    *in_clist = 1;
625 	    ++*v_size;
626 	}
627 	*p_c_size += size;
628 	if (!*p_in_clist) {
629 	    *p_in_clist = 1;
630 	    ++*p_v_size;
631 	}
632     }
633     return 0;
634 }
635 
636 #define IO_LIST_VEC_COUNT(obj)                                          \
637     do {                                                                \
638         switch (io_list_vec_count(obj, &v_size, &c_size,                \
639                                   &b_size, &in_clist,                   \
640                                   &p_v_size, &p_c_size, &p_in_clist,    \
641                                   blimit)) {                            \
642         case 1: goto L_type_error;                                      \
643         case 2: goto L_overflow_error;                                  \
644         default: break;                                                 \
645         }                                                               \
646     } while(0)
647 
648 /*
649  * Returns 0 if successful and a non-zero value otherwise.
650  *
651  * Return values through pointers:
652  *    *vsize      - SysIOVec size needed for a writev
653  *    *csize      - Number of bytes not in binary (in the common binary)
654  *    *pvsize     - SysIOVec size needed if packing small binaries
655  *    *pcsize     - Number of bytes in the common binary if packing
656  *    *total_size - Total size of iolist in bytes
657  */
658 int
erts_ioq_iolist_vec_len(Eterm obj,int * vsize,Uint * csize,Uint * pvsize,Uint * pcsize,size_t * total_size,Uint blimit)659 erts_ioq_iolist_vec_len(Eterm obj, int* vsize, Uint* csize,
660                         Uint* pvsize, Uint* pcsize,
661                         size_t* total_size, Uint blimit)
662 {
663     DECLARE_ESTACK(s);
664     Eterm* objp;
665     Uint v_size = 0;
666     Uint c_size = 0;
667     Uint b_size = 0;
668     Uint in_clist = 0;
669     Uint p_v_size = 0;
670     Uint p_c_size = 0;
671     Uint p_in_clist = 0;
672     size_t total;
673 
674     goto L_jump_start;  /* avoid a push */
675 
676     while (!ESTACK_ISEMPTY(s)) {
677 	obj = ESTACK_POP(s);
678     L_jump_start:
679 	if (is_list(obj)) {
680 	L_iter_list:
681 	    objp = list_val(obj);
682 	    obj = CAR(objp);
683 
684 	    if (is_byte(obj)) {
685 		c_size++;
686 		if (c_size == 0) {
687 		    goto L_overflow_error;
688 		}
689 		if (!in_clist) {
690 		    in_clist = 1;
691 		    v_size++;
692 		}
693 		p_c_size++;
694 		if (!p_in_clist) {
695 		    p_in_clist = 1;
696 		    p_v_size++;
697 		}
698 	    }
699 	    else if (is_binary(obj)) {
700                 IO_LIST_VEC_COUNT(obj);
701 	    }
702 	    else if (is_list(obj)) {
703 		ESTACK_PUSH(s, CDR(objp));
704 		goto L_iter_list;   /* on head */
705 	    }
706 	    else if (!is_nil(obj)) {
707 		goto L_type_error;
708 	    }
709 
710 	    obj = CDR(objp);
711 	    if (is_list(obj))
712 		goto L_iter_list;   /* on tail */
713 	    else if (is_binary(obj)) {  /* binary tail is OK */
714 		IO_LIST_VEC_COUNT(obj);
715 	    }
716 	    else if (!is_nil(obj)) {
717 		goto L_type_error;
718 	    }
719 	}
720 	else if (is_binary(obj)) {
721 	    IO_LIST_VEC_COUNT(obj);
722 	}
723 	else if (!is_nil(obj)) {
724 	    goto L_type_error;
725 	}
726     }
727 
728     total = c_size + b_size;
729     if (total < c_size) {
730 	goto L_overflow_error;
731     }
732     *total_size = total;
733 
734     DESTROY_ESTACK(s);
735     *vsize = v_size;
736     *csize = c_size;
737     *pvsize = p_v_size;
738     *pcsize = p_c_size;
739     return 0;
740 
741  L_type_error:
742  L_overflow_error:
743     DESTROY_ESTACK(s);
744     return 1;
745 }
746 
747 typedef struct {
748     Eterm result_head;
749     Eterm result_tail;
750     Eterm input_list;
751 
752     UWord acc_size;
753     Binary *acc;
754 
755     /* We yield after copying this many bytes into the accumulator (Minus
756      * eating a few on consing etc). Large binaries will only count to the
757      * extent their split (if any) resulted in a copy op. */
758     UWord bytereds_available;
759     UWord bytereds_spent;
760 
761     Process *process;
762     ErtsEStack estack;
763 
764     Eterm magic_reference;
765 } iol2v_state_t;
766 
iol2v_state_destructor(Binary * data)767 static int iol2v_state_destructor(Binary *data) {
768     iol2v_state_t *state = ERTS_MAGIC_BIN_UNALIGNED_DATA(data);
769 
770     DESTROY_SAVED_ESTACK(&state->estack);
771 
772     if (state->acc != NULL) {
773         erts_bin_free(state->acc);
774     }
775 
776     return 1;
777 }
778 
iol2v_init(iol2v_state_t * state,Process * process,Eterm input)779 static void iol2v_init(iol2v_state_t *state, Process *process, Eterm input) {
780     state->process = process;
781 
782     state->result_head = NIL;
783     state->result_tail = NIL;
784     state->input_list = input;
785 
786     state->magic_reference = NIL;
787     state->acc_size = 0;
788     state->acc = NULL;
789 
790     CLEAR_SAVED_ESTACK(&state->estack);
791 }
792 
iol2v_make_sub_bin(iol2v_state_t * state,Eterm bin_term,UWord offset,UWord size)793 static Eterm iol2v_make_sub_bin(iol2v_state_t *state, Eterm bin_term,
794         UWord offset, UWord size) {
795     Uint byte_offset, bit_offset, bit_size;
796     ErlSubBin *sb;
797     Eterm orig_pb_term;
798 
799     sb = (ErlSubBin*)HAlloc(state->process, ERL_SUB_BIN_SIZE);
800 
801     ERTS_GET_REAL_BIN(bin_term, orig_pb_term,
802         byte_offset, bit_offset, bit_size);
803 
804     ASSERT(bit_size == 0);
805 
806     sb->thing_word = HEADER_SUB_BIN;
807     sb->bitoffs = bit_offset;
808     sb->bitsize = 0;
809     sb->orig = orig_pb_term;
810     sb->is_writable = 0;
811 
812     sb->offs = byte_offset + offset;
813     sb->size = size;
814 
815     return make_binary(sb);
816 }
817 
iol2v_promote_acc(iol2v_state_t * state)818 static Eterm iol2v_promote_acc(iol2v_state_t *state) {
819     Eterm bin;
820 
821     bin = erts_build_proc_bin(&MSO(state->process),
822                               HAlloc(state->process, PROC_BIN_SIZE),
823                               erts_bin_realloc(state->acc, state->acc_size));
824     state->acc_size = 0;
825     state->acc = NULL;
826 
827     return bin;
828 }
829 
830 /* Destructively enqueues a term to the result list, saving us the hassle of
831  * having to reverse it later. This is safe since GC is disabled and we never
832  * leak the unfinished term to the outside. */
iol2v_enqueue_result(iol2v_state_t * state,Eterm term)833 static void iol2v_enqueue_result(iol2v_state_t *state, Eterm term) {
834     Eterm prev_tail;
835     Eterm *hp;
836 
837     prev_tail = state->result_tail;
838 
839     hp = HAlloc(state->process, 2);
840     state->result_tail = CONS(hp, term, NIL);
841 
842     if(prev_tail != NIL) {
843         Eterm *prev_cell = list_val(prev_tail);
844         CDR(prev_cell) = state->result_tail;
845     } else {
846         state->result_head = state->result_tail;
847     }
848 
849     state->bytereds_spent += 1;
850 }
851 
852 #ifndef DEBUG
853     #define ACC_REALLOCATION_LIMIT (IOL2V_SMALL_BIN_LIMIT * 32)
854 #else
855     #define ACC_REALLOCATION_LIMIT (IOL2V_SMALL_BIN_LIMIT * 4)
856 #endif
857 
iol2v_expand_acc(iol2v_state_t * state,UWord extra)858 static void iol2v_expand_acc(iol2v_state_t *state, UWord extra) {
859     UWord required_bytes, acc_alloc_size;
860 
861     ERTS_CT_ASSERT(ERTS_UWORD_MAX > ACC_REALLOCATION_LIMIT / 2);
862     ASSERT(extra >= 1);
863 
864     acc_alloc_size = state->acc != NULL ? (state->acc)->orig_size : 0;
865     required_bytes = state->acc_size + extra;
866 
867     if (state->acc == NULL) {
868         UWord new_size = MAX(required_bytes, IOL2V_SMALL_BIN_LIMIT);
869 
870         state->acc = erts_bin_nrml_alloc(new_size);
871     } else if (required_bytes > acc_alloc_size) {
872         Binary *prev_acc;
873         UWord new_size;
874 
875         if (acc_alloc_size >= ACC_REALLOCATION_LIMIT) {
876             /* We skip reallocating once we hit a certain point; it often
877              * results in extra copying and we're very likely to overallocate
878              * on anything other than absurdly long byte/heapbin sequences. */
879             iol2v_enqueue_result(state, iol2v_promote_acc(state));
880             iol2v_expand_acc(state, extra);
881             return;
882         }
883 
884         new_size = MAX(required_bytes, acc_alloc_size * 2);
885         prev_acc = state->acc;
886 
887         state->acc = erts_bin_realloc(prev_acc, new_size);
888 
889         if (prev_acc != state->acc) {
890             state->bytereds_spent += state->acc_size;
891         }
892     }
893 
894     state->bytereds_spent += extra;
895 }
896 
iol2v_append_byte_seq(iol2v_state_t * state,Eterm seq_start,Eterm * seq_end)897 static int iol2v_append_byte_seq(iol2v_state_t *state, Eterm seq_start, Eterm *seq_end) {
898     Eterm lookahead, iterator;
899     Uint observed_bits;
900     SWord seq_length;
901     char *acc_data;
902 
903     lookahead = seq_start;
904     seq_length = 0;
905 
906     ASSERT(state->bytereds_available > state->bytereds_spent);
907 
908     while (is_list(lookahead)) {
909         Eterm *cell = list_val(lookahead);
910 
911         if (!is_small(CAR(cell))) {
912             break;
913         }
914 
915         if (seq_length * 2 >= (state->bytereds_available - state->bytereds_spent)) {
916             break;
917         }
918 
919         lookahead = CDR(cell);
920         seq_length += 1;
921     }
922 
923     ASSERT(seq_length >= 1);
924 
925     iol2v_expand_acc(state, seq_length);
926 
927     /* Bump a few extra reductions to account for list traversal. */
928     state->bytereds_spent += seq_length;
929 
930     acc_data = &(state->acc)->orig_bytes[state->acc_size];
931     state->acc_size += seq_length;
932 
933     iterator = seq_start;
934     observed_bits = 0;
935 
936     while (iterator != lookahead) {
937         Eterm *cell;
938         Uint byte;
939 
940         cell = list_val(iterator);
941         iterator = CDR(cell);
942 
943         byte = unsigned_val(CAR(cell));
944         observed_bits |= byte;
945 
946         ASSERT(acc_data < &(state->acc)->orig_bytes[state->acc_size]);
947         *(acc_data++) = byte;
948     }
949 
950     if (observed_bits > UCHAR_MAX) {
951         return 0;
952     }
953 
954     ASSERT(acc_data == &(state->acc)->orig_bytes[state->acc_size]);
955     *seq_end = iterator;
956 
957     return 1;
958 }
959 
iol2v_append_binary(iol2v_state_t * state,Eterm bin_term)960 static int iol2v_append_binary(iol2v_state_t *state, Eterm bin_term) {
961     int is_acc_small, is_bin_small;
962     UWord combined_size;
963     UWord binary_size;
964 
965     Uint byte_offset, bit_offset, bit_size;
966     byte *binary_data;
967 
968     Eterm *parent_header;
969     Eterm parent_binary;
970 
971     ASSERT(state->bytereds_available > state->bytereds_spent);
972 
973     ERTS_GET_REAL_BIN(bin_term, parent_binary, byte_offset, bit_offset, bit_size);
974     parent_header = binary_val(parent_binary);
975     binary_size = binary_size(bin_term);
976 
977     if (bit_size != 0) {
978         return 0;
979     } else if (binary_size == 0) {
980         state->bytereds_spent += 1;
981         return 1;
982     }
983 
984     is_acc_small = state->acc_size < IOL2V_SMALL_BIN_LIMIT;
985     is_bin_small = binary_size < IOL2V_SMALL_BIN_LIMIT;
986     combined_size = binary_size + state->acc_size;
987 
988     if (thing_subtag(*parent_header) == REFC_BINARY_SUBTAG) {
989         ProcBin *pb = (ProcBin*)parent_header;
990 
991         if (pb->flags) {
992             erts_emasculate_writable_binary(pb);
993         }
994 
995         binary_data = &((byte*)pb->bytes)[byte_offset];
996     } else {
997         ErlHeapBin *hb = (ErlHeapBin*)parent_header;
998 
999         ASSERT(thing_subtag(*parent_header) == HEAP_BINARY_SUBTAG);
1000         ASSERT(is_bin_small);
1001 
1002         binary_data = &((byte*)&hb->data)[byte_offset];
1003     }
1004 
1005     if (!is_bin_small && (state->acc_size == 0 || !is_acc_small)) {
1006         /* Avoid combining if we encounter an acceptably large binary while the
1007          * accumulator is either empty or large enough to be returned on its
1008          * own. */
1009         if (state->acc_size != 0) {
1010             iol2v_enqueue_result(state, iol2v_promote_acc(state));
1011         }
1012 
1013         iol2v_enqueue_result(state, bin_term);
1014     } else if (is_bin_small || combined_size < (IOL2V_SMALL_BIN_LIMIT * 2)) {
1015         /* If the candidate is small or we can't split the combination in two,
1016          * then just copy it into the accumulator. */
1017         iol2v_expand_acc(state, binary_size);
1018 
1019         if (ERTS_LIKELY(bit_offset == 0)) {
1020             sys_memcpy(&(state->acc)->orig_bytes[state->acc_size],
1021                 binary_data, binary_size);
1022         } else {
1023             ASSERT(binary_size <= ERTS_UWORD_MAX / 8);
1024 
1025             erts_copy_bits(binary_data, bit_offset, 1,
1026                 (byte*)&(state->acc)->orig_bytes[state->acc_size], 0, 1,
1027                 binary_size * 8);
1028         }
1029 
1030         state->acc_size += binary_size;
1031     } else {
1032         /* Otherwise, append enough data for the accumulator to be valid, and
1033          * then return the rest as a sub-binary. */
1034         UWord spill = IOL2V_SMALL_BIN_LIMIT - state->acc_size;
1035         Eterm binary_tail;
1036 
1037         iol2v_expand_acc(state, spill);
1038 
1039         if (ERTS_LIKELY(bit_offset == 0)) {
1040             sys_memcpy(&(state->acc)->orig_bytes[state->acc_size],
1041                 binary_data, spill);
1042         } else {
1043             ASSERT(binary_size <= ERTS_UWORD_MAX / 8);
1044 
1045             erts_copy_bits(binary_data, bit_offset, 1,
1046                 (byte*)&(state->acc)->orig_bytes[state->acc_size], 0, 1,
1047                 spill * 8);
1048         }
1049 
1050         state->acc_size += spill;
1051 
1052         binary_tail = iol2v_make_sub_bin(state, bin_term, spill,
1053             binary_size - spill);
1054 
1055         iol2v_enqueue_result(state, iol2v_promote_acc(state));
1056         iol2v_enqueue_result(state, binary_tail);
1057     }
1058 
1059     return 1;
1060 }
1061 
iol2v_yield(iol2v_state_t * state)1062 static BIF_RETTYPE iol2v_yield(iol2v_state_t *state) {
1063     if (is_nil(state->magic_reference)) {
1064         iol2v_state_t *boxed_state;
1065         Binary *magic_binary;
1066         Eterm *hp;
1067 
1068         magic_binary = erts_create_magic_binary_x(sizeof(*state),
1069             &iol2v_state_destructor, ERTS_ALC_T_BINARY, 1);
1070 
1071         boxed_state = ERTS_MAGIC_BIN_UNALIGNED_DATA(magic_binary);
1072         sys_memcpy(boxed_state, state, sizeof(*state));
1073 
1074         hp = HAlloc(boxed_state->process, ERTS_MAGIC_REF_THING_SIZE);
1075         boxed_state->magic_reference =
1076             erts_mk_magic_ref(&hp, &MSO(boxed_state->process), magic_binary);
1077 
1078         state = boxed_state;
1079     }
1080 
1081     ERTS_BIF_YIELD1(bif_export[BIF_iolist_to_iovec_1],
1082         state->process, state->magic_reference);
1083 }
1084 
iol2v_continue(iol2v_state_t * state)1085 static BIF_RETTYPE iol2v_continue(iol2v_state_t *state) {
1086     Eterm iterator;
1087 
1088     DECLARE_ESTACK(s);
1089     ESTACK_CHANGE_ALLOCATOR(s, ERTS_ALC_T_SAVED_ESTACK);
1090 
1091     state->bytereds_available =
1092         ERTS_BIF_REDS_LEFT(state->process) * IOL2V_SMALL_BIN_LIMIT;
1093     state->bytereds_spent = 0;
1094 
1095     if (state->estack.start) {
1096         ESTACK_RESTORE(s, &state->estack);
1097     }
1098 
1099     iterator = state->input_list;
1100 
1101     for(;;) {
1102         if (state->bytereds_spent >= state->bytereds_available) {
1103             ESTACK_SAVE(s, &state->estack);
1104             state->input_list = iterator;
1105 
1106             return iol2v_yield(state);
1107         }
1108 
1109         while (is_list(iterator)) {
1110             Eterm *cell;
1111             Eterm head;
1112 
1113             cell = list_val(iterator);
1114             head = CAR(cell);
1115 
1116             if (is_binary(head)) {
1117                 if (!iol2v_append_binary(state, head)) {
1118                     goto l_badarg;
1119                 }
1120 
1121                 iterator = CDR(cell);
1122             } else if (is_small(head)) {
1123                 Eterm seq_end;
1124 
1125                 if (!iol2v_append_byte_seq(state, iterator, &seq_end)) {
1126                     goto l_badarg;
1127                 }
1128 
1129                 iterator = seq_end;
1130             } else if (is_list(head) || is_nil(head)) {
1131                 Eterm tail = CDR(cell);
1132 
1133                 if (!is_nil(tail)) {
1134                     ESTACK_PUSH(s, tail);
1135                 }
1136 
1137                 state->bytereds_spent += 1;
1138                 iterator = head;
1139             } else {
1140                 goto l_badarg;
1141             }
1142 
1143             if (state->bytereds_spent >= state->bytereds_available) {
1144                 ESTACK_SAVE(s, &state->estack);
1145                 state->input_list = iterator;
1146 
1147                 return iol2v_yield(state);
1148             }
1149         }
1150 
1151         if (is_binary(iterator)) {
1152             if (!iol2v_append_binary(state, iterator)) {
1153                 goto l_badarg;
1154             }
1155         } else if (!is_nil(iterator)) {
1156             goto l_badarg;
1157         }
1158 
1159         if(ESTACK_ISEMPTY(s)) {
1160             break;
1161         }
1162 
1163         iterator = ESTACK_POP(s);
1164     }
1165 
1166     if (state->acc_size != 0) {
1167         iol2v_enqueue_result(state, iol2v_promote_acc(state));
1168     }
1169 
1170     BUMP_REDS(state->process, state->bytereds_spent / IOL2V_SMALL_BIN_LIMIT);
1171 
1172     CLEAR_SAVED_ESTACK(&state->estack);
1173     DESTROY_ESTACK(s);
1174 
1175     BIF_RET(state->result_head);
1176 
1177 l_badarg:
1178     CLEAR_SAVED_ESTACK(&state->estack);
1179     DESTROY_ESTACK(s);
1180 
1181     if (state->acc != NULL) {
1182         erts_bin_free(state->acc);
1183         state->acc = NULL;
1184     }
1185 
1186     BIF_ERROR(state->process, BADARG);
1187 }
1188 
1189 HIPE_WRAPPER_BIF_DISABLE_GC(iolist_to_iovec, 1)
1190 
iolist_to_iovec_1(BIF_ALIST_1)1191 BIF_RETTYPE iolist_to_iovec_1(BIF_ALIST_1) {
1192     BIF_RETTYPE result;
1193 
1194     if (is_nil(BIF_ARG_1)) {
1195         BIF_RET(NIL);
1196     } else if (is_binary(BIF_ARG_1)) {
1197         if (binary_bitsize(BIF_ARG_1) != 0) {
1198             ASSERT(!(BIF_P->flags & F_DISABLE_GC));
1199             BIF_ERROR(BIF_P, BADARG);
1200         } else if (binary_size(BIF_ARG_1) != 0) {
1201             Eterm *hp = HAlloc(BIF_P, 2);
1202 
1203             BIF_RET(CONS(hp, BIF_ARG_1, NIL));
1204         } else {
1205             BIF_RET(NIL);
1206         }
1207     } else if (is_internal_magic_ref(BIF_ARG_1)) {
1208         iol2v_state_t *state;
1209         Binary *magic;
1210 
1211         magic = erts_magic_ref2bin(BIF_ARG_1);
1212 
1213         if (ERTS_MAGIC_BIN_DESTRUCTOR(magic) != &iol2v_state_destructor) {
1214             ASSERT(!(BIF_P->flags & F_DISABLE_GC));
1215             BIF_ERROR(BIF_P, BADARG);
1216         }
1217 
1218         ASSERT(BIF_P->flags & F_DISABLE_GC);
1219 
1220         state = ERTS_MAGIC_BIN_UNALIGNED_DATA(magic);
1221         result = iol2v_continue(state);
1222     } else if (!is_list(BIF_ARG_1)) {
1223         ASSERT(!(BIF_P->flags & F_DISABLE_GC));
1224         BIF_ERROR(BIF_P, BADARG);
1225     } else {
1226         iol2v_state_t state;
1227 
1228         iol2v_init(&state, BIF_P, BIF_ARG_1);
1229 
1230         erts_set_gc_state(BIF_P, 0);
1231 
1232         result = iol2v_continue(&state);
1233     }
1234 
1235     if (result != THE_NON_VALUE || BIF_P->freason != TRAP) {
1236         erts_set_gc_state(BIF_P, 1);
1237     }
1238 
1239     BIF_RET(result);
1240 }
1241